
    qi                         d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
  G d dee      Ze G d d	             Z G d
 de      Zy)a  Base text aggregator interface for Pipecat text processing.

This module defines the abstract base class for text aggregators that accumulate
and process text tokens, typically used by TTS services to determine when
aggregated text should be sent for speech synthesis.
    )ABCabstractmethod)	dataclass)Enum)AsyncIteratorOptionalc                   "    e Zd ZdZdZdZdZd Zy)AggregationTypezBuilt-in aggregation strings.sentencetokenwordc                     | j                   S )N)valueselfs    Y/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/utils/text/base_text_aggregator.py__str__zAggregationType.__str__   s    zz    N)__name__
__module____qualname____doc__SENTENCETOKENWORDr    r   r   r
   r
      s    'HEDr   r
   c                   2    e Zd ZU dZeed<   eed<   defdZy)Aggregationa  Data class representing aggregated text and its type.

    An Aggregation object is created whenever a stream of text is aggregated by
    a text aggregator. It contains the aggregated text and a type indicating
    the nature of the aggregation.

    Parameters:
        text: The aggregated text content.
        type: The type of aggregation the text represents (e.g., 'sentence', 'word', 'token',
              'my_custom_aggregation').
    texttypereturnc                 :    d| j                    d| j                   S )zReturn a string representation of the aggregation.

        Returns:
            A descriptive string showing the type and text of the aggregation.
        zAggregation by z: )r    r   r   s    r   r   zAggregation.__str__0   s     !2dii[99r   N)r   r   r   r   str__annotations__r   r   r   r   r   r      s    
 I
I: :r   r   c                       e Zd ZdZej
                  ddefdZedefd       Zee	de
fd              Ze	dedee
   fd	       Ze	dee
   fd
       Ze	d        Ze	d        Zy)BaseTextAggregatora  Base class for text aggregators in the Pipecat framework.

    Text aggregators are usually used by the TTS service to aggregate LLM tokens
    and decide when the aggregated text should be pushed to the TTS service.

    Text aggregators can also be used to manipulate text while it's being
    aggregated (e.g. reasoning blocks can be removed).

    Subclasses must implement all abstract methods to define specific aggregation
    logic, text manipulation behavior, and state management for interruptions.
    )aggregation_typer'   c                $    t        |      | _        y)a(  Initialize the base text aggregator.

        Args:
            aggregation_type: The aggregation strategy to use. SENTENCE buffers
                text until sentence boundaries are detected, TOKEN passes text
                through immediately, and WORD buffers until word boundaries.
        N)r
   _aggregation_type)r   r'   s     r   __init__zBaseTextAggregator.__init__F   s     "11A!Br   r!   c                     | j                   S )zjGet the aggregation type for this aggregator.

        Returns:
            The aggregation type.
        )r)   r   s    r   r'   z#BaseTextAggregator.aggregation_typeP   s     %%%r   c                      y)a  Get the currently aggregated text.

        Subclasses must implement this property to return the text that has
        been accumulated so far in their internal buffer or storage.

        Returns:
            The text that has been accumulated so far.
        Nr   r   s    r   r   zBaseTextAggregator.textY   s     	r   r   c                   K   	 d yw)a  Aggregate the specified text and yield completed aggregations.

        This method processes the input text character-by-character internally
        and yields Aggregation objects as they complete.

        Subclasses should implement their specific logic for:

        - How to process text character-by-character
        - When to consider the aggregated text ready for processing
        - What criteria determine text completion (e.g., sentence boundaries)
        - When a completion occurs, yield an Aggregation object containing the
          aggregated text (stripped of leading/trailing whitespace) and its type

        Args:
            text: The text to be aggregated.

        Yields:
            Aggregation objects as they complete. Each Aggregation consists of
            the aggregated text (stripped of leading/trailing whitespace) and
            a string indicating the type of aggregation (e.g., 'sentence', 'word',
            'token', 'my_custom_aggregation').
        Nr   )r   r   s     r   	aggregatezBaseTextAggregator.aggregatef   s     0 	s   
c                    K   yw)a9  Flush any pending aggregation.

        This method is called at the end of a stream (e.g., when receiving
        LLMFullResponseEndFrame) to return any text that was buffered.

        Returns:
            An Aggregation object if there is pending text, or None if there
            is no pending text.
        Nr   r   s    r   flushzBaseTextAggregator.flush         	   c                    K   yw)a  Handle interruptions in the text aggregation process.

        When an interruption occurs it is possible that we might want to discard
        the aggregated text or do some internal modifications to the aggregated text.

        Subclasses should implement this method to define how they respond to
        interruptions, such as clearing buffers, resetting state, or preserving
        partial content.
        Nr   r   s    r   handle_interruptionz&BaseTextAggregator.handle_interruption   r1   r2   c                    K   yw)a  Clear the internally aggregated text and reset to initial state.

        Subclasses should implement this method to return the aggregator to its
        initial state, discarding any previously accumulated text content and
        resetting any internal tracking variables.
        Nr   r   s    r   resetzBaseTextAggregator.reset   s      	r2   N)r   r   r   r   r
   r   r*   propertyr'   r   r   r   r#   r   r.   r   r0   r4   r6   r   r   r   r&   r&   9   s    
 ?N>V>V CO C &/ & & 	k 	  	 C M+,F  6 
Xk2 
 
 
 
  r   r&   N)r   abcr   r   dataclassesr   enumr   typingr   r   r#   r
   r   r&   r   r   r   <module>r<      sM    $ !  *c4  : : :2k kr   