
    qi                    b   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ dd
l0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB e G d d             ZC G d deDe	      ZEe G d d             ZF G d de3      ZG G d deG      ZH G d deGe8      ZI G d d eI      ZJ G d! d"eI      ZK G d# d$eJ      ZL G d% d&eI      ZM G d' d(eM      ZNy))z)Base classes for Text-to-speech services.    N)abstractmethod)	dataclass)Enum)
AnyAsyncGeneratorAsyncIterator	AwaitableCallableDictListOptionalSequenceTuple)logger)create_stream_resampler)AggregatedTextFrameAggregationTypeBotStartedSpeakingFrameBotStoppedSpeakingFrameCancelFrameEndFrame
ErrorFrameFrameInterimTranscriptionFrameInterruptionFrame LLMAssistantPushAggregationFrameLLMFullResponseEndFrameLLMFullResponseStartFrame
StartFrame	TextFrameTranscriptionFrameTTSAudioRawFrameTTSSpeakFrameTTSStartedFrameTTSStoppedFrameTTSTextFrameTTSUpdateSettingsFrame)FrameDirection)	AIService)TTSSettingsis_given)WebsocketServiceLanguage)BaseTextAggregator)BaseTextFilter)SimpleTextAggregator)seconds_to_nanosecondsc                   4    e Zd ZU dZdZeed<   dZee   ed<   y)
TTSContexta  Context information for a TTS request.

    Attributes:
        append_to_context: Whether this TTS output should be appended to the
            conversation context after it is spoken.
        push_assistant_aggregation: Whether to push an
            ``LLMAssistantPushAggregationFrame`` after the TTS has finished
            speaking, forcing the assistant aggregator to commit its current
            text buffer to the conversation context.
    Tappend_to_contextFpush_assistant_aggregationN)	__name__
__module____qualname____doc__r5   bool__annotations__r6   r        N/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/tts_service.pyr4   r4   B   s#    	 #t"166r>   r4   c                       e Zd ZdZdZdZd Zy)TextAggregationModea  Controls how incoming text is aggregated before TTS synthesis.

    Parameters:
        SENTENCE: Buffer text until sentence boundaries are detected before synthesis.
            Produces more natural speech but adds latency (~200-300ms per sentence).
        TOKEN: Stream text tokens directly to TTS as they arrive.
            Reduces latency but may affect speech quality depending on the TTS provider.
    sentencetokenc                     | j                   S N)valueselfs    r?   __str__zTextAggregationMode.__str__`   s    zzr>   N)r7   r8   r9   r:   SENTENCETOKENrI   r=   r>   r?   rA   rA   S   s     HEr>   rA   c                   0    e Zd ZU dZeed<   eed<   eed<   y)_WordTimestampEntryz?Internal: word timestamp routed through an audio context queue.word	timestamp
context_idN)r7   r8   r9   r:   strr<   floatr=   r>   r?   rM   rM   d   s    I
IOr>   rM   c            '           e Zd ZU dZeed<    e       Zddddddddddddg ddddddddee	   d	ee
   d
e
de
de
dede
dede
de
dee   dee   deee      deeeeez  eeeez  gee   f   f         deee      dee   dee   dee   de
f& fdZede
fd       Zdef fdZ fd Z fd!Zedefd"       Zedefd#       Zd$efd%Zd&efd'Zdefd(Z e!ded)ede"e#df   fd*       Z$d+e%dee   fd,Z&dedefd-Z'ddd)ee   fd.Z(d/e)f fd0Z*d/e+f fd1Z,d/e-f fd2Z.	 ded3eeeez  gee   f   d4eez  fd5Z/	 ded3eeeez  gee   f   d4eez  fd6Z0d7ede1ee2f   f fd8Z3defd9Z4d: Z5d/e#d;e6f fd<Z7e6jp                  fd/e#d;e6f fd=Z9dddd>d?e:e;   d@e
dAee   d)ee   de"e#df   f
dBZ<d/e=d;e6fdCZ>dD Z?dE Z@d/eAfdFZB	 	 	 dfdGeCdHee
   dIee
   dJee
   fdKZDd)edLe"e#dz  df   de
fdMZEdN ZFdO ZGdP ZH	 dddQeeeef      d)ee   fdRZI	 dddQeeeef      d)ee   fdSZJd)efdTZKd)ed/e#fdUZLd)efdVZMde
fdWZNdee   fdXZOdee   fdYZPdZ ZQd[ ZRd)ede
fd\ZSd)efd]ZTd^ ZUd_ ZVd` ZWd)efdaZXd)efdbZYd)efdcZZ xZ[S )g
TTSServicea  Base class for text-to-speech services.

    Provides common functionality for TTS services including text aggregation,
    filtering, audio generation, and frame management. Supports configurable
    sentence aggregation, silence insertion, and frame processing control.

    Event handlers:
        on_connected: Called when connected to the TTS service.
        on_disconnected: Called when disconnected from the TTS service.
        on_connection_error: Called when a connection to the TTS service error occurs.
        on_tts_request: Called before a TTS request is made, with the context ID and text.

    Example::

        @tts.event_handler("on_connected")
        async def on_connected(tts: TTSService):
            logger.debug(f"TTS connected")

        @tts.event_handler("on_disconnected")
        async def on_disconnected(tts: TTSService):
            logger.debug(f"TTS disconnected")

        @tts.event_handler("on_connection_error")
        async def on_connection_error(tts: TTSService, error: str):
            logger.error(f"TTS connection error: {error}")

        @tts.event_handler("on_tts_request")
        async def on_tts_request(tts: TTSService, context_id: str, text: str):
            logger.debug(f"TTS request: {context_id} - {text}")
    	_settingsNTFg       @)text_aggregation_modeaggregate_sentencespush_text_framespush_stop_framespush_start_framestop_frame_timeout_spush_silence_after_stopsilence_time_spause_frame_processingappend_trailing_spacesample_ratetext_aggregatorskip_aggregator_typestext_transformstext_filterstext_filtertransport_destinationsettingsreuse_context_id_within_turnrV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   c                   t        |   dd|xs
 t               i| |mddl} |j                         5   |j
                  d        |j                  dt        d       ddd       |"|rt        j                  nt        j                  }|t        j                  }|| _        || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _        d| _        |xs t-        | j                        | _        |rGddl} |j                         5   |j
                  d        |j                  d	t               ddd       |xs g | _        |xs g | _        |xs g | _        || _        |rOddl} |j                         5   |j
                  d        |j                  d
t               ddd       |g| _        t9               | _        d| _        t?        j@                         | _!        d| _"        i | _#        d| _$        d| _%        d| _&        g | _'        d| _(        d| _)        || _*        d| _+        d| _,        i | _-        d| _.        | j_                  d       | j_                  d       | j_                  d       | j_                  d       d| _0        y# 1 sw Y   DxY w# 1 sw Y   fxY w# 1 sw Y   xY w)a  Initialize the TTS service.

        Args:
            text_aggregation_mode: How to aggregate incoming text before synthesis.
                TextAggregationMode.SENTENCE (default) buffers until sentence boundaries,
                TextAggregationMode.TOKEN streams tokens directly for lower latency.
            aggregate_sentences: Whether to aggregate text into sentences before synthesis.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead. Set to ``TextAggregationMode.SENTENCE``
                    to aggregate text into sentences before synthesis, or
                    ``TextAggregationMode.TOKEN`` to stream tokens directly for lower latency.

            push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
            push_stop_frames: Whether to automatically push TTSStoppedFrames.
            push_start_frame: Whether to automatically create audio contexts and push TTSStartedFrames.
                When True, the base class handles ``create_audio_context`` and yields ``TTSStartedFrame``
                before each synthesis call, so ``run_tts`` implementations do not need to.
            stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
            push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
            silence_time_s: Duration of silence to push when push_silence_after_stop is True.
            pause_frame_processing: Whether to pause frame processing during audio generation.
            append_trailing_space: Whether to append a trailing space to text before sending to TTS.
                This helps prevent some TTS services from vocalizing trailing punctuation (e.g., "dot").
            sample_rate: Output sample rate for generated audio.
            text_aggregator: Custom text aggregator for processing incoming text.

                .. deprecated:: 0.0.95
                    Use an LLMTextProcessor before the TTSService for custom text aggregation.

            skip_aggregator_types: List of aggregation types that should not be spoken.
            text_transforms: A list of callables to transform text before just before sending it
                to TTS. Each callable takes the aggregated text and its type, and returns the
                transformed text. To register, provide a list of tuples of
                (aggregation_type | '*', transform_function).

            text_filters: Sequence of text filters to apply after aggregation.
            text_filter: Single text filter (deprecated, use text_filters).

                .. deprecated:: 0.0.59
                    Use `text_filters` instead, which allows multiple filters.

            transport_destination: Destination for generated audio frames.
            settings: The runtime-updatable settings for the TTS service.
            reuse_context_id_within_turn: Whether the service should reuse context IDs within the
                same turn.
            **kwargs: Additional arguments passed to the parent AIService.
        rg   Nr   alwayszParameter 'aggregate_sentences' is deprecated. Use 'text_aggregation_mode=TextAggregationMode.SENTENCE' or 'text_aggregation_mode=TextAggregationMode.TOKEN' instead.   
stacklevel)aggregation_typezuParameter 'text_aggregator' is deprecated. Use an LLMTextProcessor before the TTSService for custom text aggregation.zBParameter 'text_filter' is deprecated, use 'text_filters' instead.F on_connectedon_disconnectedon_connection_erroron_tts_requestr=   )1super__init__r*   warningscatch_warningssimplefilterwarnDeprecationWarningrA   rJ   rK   _text_aggregation_mode_push_text_frames_push_stop_frames_push_start_frame_stop_frame_timeout_s_push_silence_after_stop_silence_time_s_pause_frame_processing_append_trailing_space_init_sample_rate_sample_rater1   _text_aggregator_skip_aggregator_types_text_transforms_text_filters_transport_destinationr   
_resampler_stop_frame_taskasyncioQueue_stop_frame_queue_processing_text_tts_contexts_streamed_text!_text_aggregation_metrics_started_initial_word_timestamp_initial_word_times_word_last_pts_llm_response_started_reuse_context_id_within_turn_playing_context_id_turn_context_id_audio_contexts_audio_context_task_register_event_handler!_is_yielding_frames_synchronously)rH   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   kwargsrw   	__class__s                         r?   rv   zTTSService.__init__   s   ~ 	 	
  }		

 	
 *(((* %%%h/Q '  %, + (00,22 & !($7$@$@!;P#'7'7'7,@".E%&4-C$,A#!,4C 5
G[!88H
 (((* %%%h/ L& 2G1L"# !r 	 8D7Ir5J#(((* %%%h/X& #.D138<07&+46#%7<. -/$KM  $%+0"3O*" 37 /39;;? $$^4$$%67$$%:;$$%56 27.U H " s#   ,J$#*J1*J>$J.1J;>Kreturnc                 <    | j                   t        j                  k(  S )zNWhether the service is streaming tokens directly without sentence aggregation.)r|   rA   rK   rG   s    r?   _is_streaming_tokenszTTSService._is_streaming_tokensh  s     **.A.G.GGGr>   textc                 Z   K   | j                   ryt        | 	  |       d{    y7 w)zRecord TTS usage metrics.

        When streaming tokens, usage metrics are aggregated and reported at
        flush time instead of per token, so individual calls are skipped.

        Args:
            text: The text being processed by TTS.
        N)r   ru   start_tts_usage_metrics)rH   r   r   s     r?   r   z"TTSService.start_tts_usage_metricsm  s(      $$g-d333s    +)+c                 ~   K   | j                   s| j                  ryd| _        t        |           d{    y7 w)zStart text aggregation metrics if not already started.

        Only starts the metric once per LLM response. Skipped when streaming
        tokens since per-token aggregation time is not meaningful.
        NT)r   r   ru   start_text_aggregation_metricsrH   r   s    r?   r   z)TTSService.start_text_aggregation_metricsz  s6      $$(N(N15.g4666s   2=;=c                 L   K   d| _         t        | 	          d{    y7 w)z9Stop text aggregation metrics and reset the started flag.FN)r   ru   stop_text_aggregation_metricsr   s    r?   r   z(TTSService.stop_text_aggregation_metrics  s      16.g3555s   $"$c                     | j                   S )zkGet the current sample rate for audio output.

        Returns:
            The sample rate in Hz.
        )r   rG   s    r?   r`   zTTSService.sample_rate  s        r>   c                 <    d}t        | j                  |z  dz        S )a  Get the recommended chunk size for audio streaming.

        This property indicates how much audio we download (from TTS services
        that require chunking) before we start pushing the first audio
        frame. This will make sure we download the rest of the audio while audio
        is being played without causing audio glitches (specially at the
        beginning). Of course, this will also depend on how fast the TTS service
        generates bytes.

        Returns:
            The recommended chunk size in bytes.
        g      ?rk   )intr`   )rH   CHUNK_SECONDSs     r?   
chunk_sizezTTSService.chunk_size  s$     4##m3a788r>   modelc                 `  K   t        j                         5  t        j                  d       t        j                  dt        d       ddd       t        j                  d| d       t        | j                        }| j                   ||             d{    y# 1 sw Y   XxY w7 w)	zSet the TTS model to use.

        .. deprecated:: 0.0.104
            Use ``TTSUpdateSettingsFrame(model=...)`` instead.

        Args:
            model: The name of the TTS model.
        rj   zK'set_model' is deprecated, use 'TTSUpdateSettingsFrame(model=...)' instead.rk   rl   NzSwitching TTS model to: [])r   
rw   rx   ry   rz   r{   r   infotyperU   _update_settings)rH   r   settings_clss      r?   	set_modelzTTSService.set_model        $$& 	!!(+MM]"	 	/wa89DNN+##Lu$=>>>	 	 	?)   B.2B 	AB.B,B. B)%B.voicec                 `  K   t        j                         5  t        j                  d       t        j                  dt        d       ddd       t        j                  d| d       t        | j                        }| j                   ||             d{    y# 1 sw Y   XxY w7 w)	zSet the voice for speech synthesis.

        .. deprecated:: 0.0.104
            Use ``TTSUpdateSettingsFrame(voice=...)`` instead.

        Args:
            voice: The voice identifier or name.
        rj   zK'set_voice' is deprecated, use 'TTSUpdateSettingsFrame(voice=...)' instead.rk   rl   NzSwitching TTS voice to: [r   )r   r   )rH   r   r   s      r?   	set_voicezTTSService.set_voice  r   r   c                     | j                   r3| j                  r'| j                  | j                         | j                  S t        t	        j
                               S )zGenerate or reuse a context ID based on concurrent TTS support.

        Returns:
            A context ID string for the TTS request.
        )r   r   _refresh_audio_contextrQ   uuiduuid4rG   s    r?   create_context_idzTTSService.create_context_id  sF     --$2G2G''(=(=>(((4::<  r>   rP   c                    K   yw)ap  Run text-to-speech synthesis on the provided text.

        This method must be implemented by subclasses to provide actual TTS functionality.

        Args:
            text: The text to synthesize into speech.
            context_id: Unique identifier for this TTS context.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        Nr=   )rH   r   rP   s      r?   run_ttszTTSService.run_tts        	   languagec                     t        |      S )zConvert a language to the service-specific language format.

        Args:
            language: The language to convert.

        Returns:
            The service-specific language identifier, or None if not supported.
        r-   )rH   r   s     r?   language_to_service_languagez'TTSService.language_to_service_language  s     !!r>   c                 J    | j                   r|j                  d      s|dz   S |S )zPrepare text for TTS by applying any transformations required by the TTS service.

        Args:
            text: The text to prepare.

        Returns:
            The prepared text with transformations applied.
         )r   endswith)rH   r   s     r?   _prepare_text_for_ttsz TTSService._prepare_text_for_tts  s'     &&t}}S/A#:r>   c                    K   yw)zFlush any buffered audio data.

        Args:
            context_id: The specific context to flush. If None, falls back to the
                currently active context (for non-concurrent services).
        Nr=   rH   rP   s     r?   flush_audiozTTSService.flush_audio  s      	r   framec                   K   t         |   |       d{    | j                  xs |j                  | _        | j
                  r0| j                  s$| j                  | j                               | _        | j                          y7 pw)zwStart the TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)
ru   startr   audio_out_sample_rater   r~   r   create_task_stop_frame_handler_create_audio_context_taskrH   r   r   s     r?   r   zTTSService.start  sr      gmE""" 22Qe6Q6Q!!$*?*?$($4$4T5M5M5O$PD!'')	 	#s   B	BA1B	c                 N  K   t         |   |       d{    | j                  r*| j                  | j                         d{    d| _        | j                  r?| j
                  j                  d       d{    | j                   d{    d| _        yy7 7 Y7 %7 w)zOStop the TTS service.

        Args:
            frame: The end frame.
        N)ru   stopr   cancel_taskr   _contexts_queueputr   s     r?   r   zTTSService.stop  s      gl5!!!  ""4#8#8999$(D!## &&**4000****'+D$ $	 	"9
 1*sE   B%B.B%B5B%;B!<B%B#B%B%!B%#B%c                    K   t         |   |       d{    | j                  r*| j                  | j                         d{    d| _        | j	                          d{    y7 S7 &7 	w)zTCancel the TTS service.

        Args:
            frame: The cancel frame.
        N)ru   cancelr   r   _stop_audio_context_taskr   s     r?   r   zTTSService.cancel%  se      gnU###  ""4#8#8999$(D!++---	 	$9-s3   A0A*.A0A,A0$A.%A0,A0.A0transform_functionrn   c                 >    | j                   j                  ||f       y)a  Transform text for a specific aggregation type.

        Args:
            transform_function: The function to apply for transformation. This function should take
                the text and aggregation type as input and return the transformed text.
                Ex.: async def my_transform(text: str, aggregation_type: str) -> str:
            aggregation_type: The type of aggregation to transform. This value defaults to "*" indicating
                the function should handle all text before sending to TTS.
        N)r   append)rH   r   rn   s      r?   add_text_transformerzTTSService.add_text_transformer1  s     	$$&68J%KLr>   c                 p    | j                   D cg c]  \  }}||k(  r||k(  s||f c}}| _         yc c}}w )zRemove a text transformer for a specific aggregation type.

        Args:
            transform_function: The function to remove.
            aggregation_type: The type of aggregation to remove the transformer for.
        N)r   )rH   r   rn   agg_typefuncs        r?   remove_text_transformerz"TTSService.remove_text_transformerA  sD     #'"7"7!
$ 00T=O5O t!
 !
s   2deltac                    K   t        |j                        r>t        |j                  t              r$| j	                  |j                        }|||_        t
        |   |       d{   }|S 7 w)zApply a TTS settings delta.

        Translates language to service-specific value before applying.

        Args:
            delta: A TTS settings delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r+   r   
isinstancer.   r   ru   r   )rH   r   	convertedchangedr   s       r?   r   zTTSService._update_settingsR  s_      ENN#
5>>8(L99%..II$!*077 8s   A&A2)A0*A2c                    K   ddl } |j                         5   |j                  d        |j                  dt        d       ddd       | j                  t        |             d{    y# 1 sw Y   ,xY w7 w)zImmediately speak the provided text.

        .. deprecated:: 0.0.79
            Push a `TTSSpeakFrame` instead to ensure frame ordering is maintained.

        Args:
            text: The text to speak.
        r   Nrj   zA`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.rk   rl   )rw   rx   ry   rz   r{   queue_framer#   )rH   r   rw   s      r?   sayzTTSService.sayg  su      	$X$$& 	!H!!(+HMMS"	 }T2333	 	 	4s(   A=,A/%A=)A;*A=/A84A=c                   K   | j                   r| j                  | j                        rg| j                  r8| j	                  | j                  t        | j                               d{    | j                  | j                         d{    | j                  | j                         d{    d| _        y7 S7 27 w)z Handle the completion of a turn.rP   N)r   audio_context_availabler   r~   append_to_audio_contextr%   remove_audio_contextr   rG   s    r?   on_turn_context_completedz$TTSService.on_turn_context_completed|  s      11d6R6R!!7
 %%22))?dF[F[+\   ++D,A,ABBB $*?*?@@@ !% C 	As6   A'C)B=*"CB?#C0C1C?CC	directionc                 
  K   t         |   ||       d{    t        |t        t        t
        f      r'|j                  r| j                  ||       d{    yt        |t              r| j                  |       d{    yt        |t              rRt        |t              sBt        |t              s2| j                          d{    | j                  |       d{    yt        |t              r5| j                  ||       d{    | j                  ||       d{    yt        |t              r7d| _        | j#                         | _        | j                  ||       d{    yt        |t
        t&        f      rU| j)                          d{    | j*                  j-                          d{   }| j/                          d{    |r7| j                  t        |j0                  |j2                               d{    | j4                  rMt7        j8                  |  d| j4                   d       t         | u  | j4                         d{    d| _        d| _        t        |t
              r'| j>                  r5| j                  ||       d{    n| j                  ||       d{    | jA                          d{    yt        |tB              r| j<                  }| j$                  }d| _        | j#                         | _        |jD                  xr | j                    }| j                  t        |j0                  tF        jH                        |jD                  |       d{    | jA                          d{    | j)                          d{    || _        || _        yt        |tJ              r|jL                  $| jO                  |jL                         d{    y|jP                  rtS        jT                         5  tS        jV                  d       tS        jX                  d	tZ        d
       ddd       t3        | j\                        j_                  |jP                        }| jO                  |       d{    yyt        |t`              r3| jc                          d{    | j                  ||       d{    y| j                  ||       d{    y7 7 7 7 G7 17 	7 7 7 7 b7 M7 7 7 7 o7 Z7 7 7 7 @# 1 sw Y   xY w7 7 7 l7 Sw)a  Process frames for text-to-speech conversion.

        Handles TextFrames for synthesis, interruption frames, settings updates,
        and various control frames.

        Args:
            frame: The frame to process.
            direction: The direction of frame processing.
        NTz: Generating TTS [r   ro   F)append_tts_text_to_contextr6   rj   zPassing a dict via TTSUpdateSettingsFrame(settings={...}) is deprecated since 0.0.104, use TTSUpdateSettingsFrame(delta=TTSSettings(...)) instead.rk   rl   )2ru   process_framer   r    r   r   skip_tts
push_framer   _push_tts_framesr   r!   r   _process_text_framer   _handle_interruptionr   r   r   r   _maybe_pause_frame_processingr   flushr   r   r   r   r   debugr   r   r}   r   r#   r5   r   rJ   r'   r   r   rg   rw   rx   ry   rz   r{   rU   from_mappingr   _maybe_resume_frame_processing)	rH   r   r   	remainingprocessing_textsaved_turn_context_idr6   r   r   s	           r?   r   zTTSService.process_frame  s~     g#E9555 uy*CE\]^//%33323''...ui(u&?@u&8955777**511101++E9===//%33389)-D&$($:$:$<D!//%333 7BC 44666 #3399;;I44666++,?	PYP^P^,_``` ""v%78K8K7LANOg5d6I6IJJJ&(# %*D!%!89))//%;;;ooeY77700222}-"33O %)$9$9!$(D!$($:$:$<D!).)@)@)cIcIcEc&''#EJJ0H0HI+0+B+B+E (   
 00222 44666$9D!$3D!56{{&++EKK888,,. ))(3MMe*#$	 T^^,99%..I++E222   6755777//%333//%333} 	6 4. 81=3
 4
 7 <6`
 K <72
 3 7
 9  3733s  UT	A UT)UTAUTU$T%*UTU)T*AU0T1/U T!!!UT$UT'8UT*AU%T-&AU)T0*UT3UT6B'UT9UT<U4T?5AUU%U(2UA
U$U%)UUU(U)UUUUUUUUUU!U$U'U*U-U0U3U6U9U<U?UUU
UUUUc                   K   t        |t              r|j                  r|j                  | j                  v r| j                  |j                     j                  r!| j                  t                      d{    t        j                  |  d|j                          | j                  |j                  = | j                  rzt        |t              rjt        | j                  | j                  z  dz        }t        d|z  | j                  d      }| j                  |_        | j                  |       d{    t        |t         t        t        t"        f      r| j                  |_        t$        |   ||       d{    | j&                  ret        |t(              s0t        |t               s t        |t              st        |t              r$| j*                  j-                  |       d{    yyy7 7 7 {7 w)zPush a frame downstream with TTS-specific handling.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        Nz cleaning up TTS context rk          )audior`   num_channels)r   r%   rP   r   r6   r   r   r   r  r   r   r   r`   r"   r   rf   r$   r&   ru   r~   r   r   r   )rH   r   r   silence_num_bytessilence_framer   s        r?   r   zTTSService.push_frame  s     e_-%2B2B4#5#55%%e&6&67RR//*J*LMMMv%>u?O?O>PQR&&u'7'78((Z-O #D$8$84;K;K$Ka$O P, 11 ,,M
 372M2MM///-000eo@PR^_`*.*E*EE'g 	222!!u/0%1%!12%1((,,U333 2	 "' N 1
 	3 4sK   A4H7G88C H8G;9AHG=A.H0G?1H;H=H?H)strip_wav_headerin_sample_raterP   iteratorr  r  c                  K   t               }||}dt        dt        f fd}|2 3 d{   }|rA|j                  d      r0t        |      dk\  rt        j                  |dd d	      |dd }d
}|j                  |       t        |      dz  }	|	dkD  sq |t        |d|	              d{   }
||	d }t        |
      dkD  st        t        |
       j                  d|      }| 7 7 B6 t        |      dkD  rWt        |      dz  dk(  r|j                  d        |t        |             d{  7  }t        | j                  d       yyw)a  Stream audio frames from an async byte iterator with optional resampling.

        For WAV data, use `strip_wav_header=True` to strip the header and
        auto-detect the source sample rate. For raw PCM data, pass
        `in_sample_rate` directly. Audio is resampled to `self.sample_rate` when
        the source rate differs.

        Args:
            iterator: Async iterator yielding audio bytes.
            strip_wav_header: Strip WAV header and parse source sample rate from it.
            in_sample_rate: Source sample rate for raw PCM data. Overrides
                WAV-detected rate if both are provided.
            context_id: Unique identifier for this TTS context.

        r  r   c                    K   r>j                   k7  r/j                  j                  | j                          d {   S | S 7 wrE   )r`   r   resample)r  rH   source_sample_rates    r?   maybe_resamplezETTSService._stream_audio_frames_from_iterator.<locals>.maybe_resample:  sG     !&8D<L<L&L!__55e=OQUQaQabbbL cs   <AA ANs   RIFF,         littleFr   r  r   rk   r  )		bytearraybytes
startswithlenr   
from_bytesextendr"   r`   )rH   r  r  r  rP   bufferneed_to_strip_wav_headerr  chunkaligned_lengthaligned_chunkr   r  r  s   `            @r?   "_stream_audio_frames_from_iteratorz-TTSService._stream_audio_frames_from_iterator  sp    . +#3 	 	% 	
 $ 	  	 %'E,<,<W,Eu:#(:(B),bh)O&bc
+0( MM%  ![2-N!&4U6/>;R5S&T T0}%),m,d.>.>jE  K+	  !U $. v;?6{Q!#g&(v777E"5$*:*:A>> sQ   $EC5C1C5A(EE0C31E	(E1C53E5AE:D=;!Ec                   K   d| _         | j                  j                          d {    | j                  D ]  }|j                          d {     d| _        d| _        d| _        | j                          d {    | j                          d {    | j                         }|r!|D ]  }| j                  |       d {     | j                          d | _        d| _        | j                          y 7 7 7 7 l7 =w)NFro   r   r   )r   r   handle_interruptionr   r   r   r   reset_word_timestampsr   get_audio_contextson_audio_context_interruptedreset_active_audio_contextr   r   r   )rH   r   r   filteraudio_contextsctx_ids         r?   r  zTTSService._handle_interruption]  s     %##77999(( 	/F,,...	/ &+" 16.((***++---002( K7767JJJK'') $'')# 	:.
 	+- KsW   %DC<&DC>.D=D >DD0DD6D>D DDDc                 t   K   | j                   r&| j                  r| j                          d {    y y y 7 wrE   )r   r   pause_processing_framesrG   s    r?   r  z(TTSService._maybe_pause_frame_processingr  s5       T%A%A..000 &B 0s   ,868c                 Z   K   | j                   r| j                          d {    y y 7 wrE   )r   resume_processing_framesrG   s    r?   r  z)TTSService._maybe_resume_frame_processingv  s)     ''//111 (1s    +)+c                   K   | j                   j                  |j                        2 3 d {   }|j                  t        j
                  k(  r|j                  nd}|j                  t        j
                  k7  r| j                          d {    | j                  t        |j                  |j                        |       d {    7 7 ?7 	6 y w)NF)
r   	aggregater   r   r   rK   includes_inter_frame_spacesr   r   r   )rH   r   r8  r9  s       r?   r   zTTSService._process_text_framez  s     #44>>uzzJ 	 	) >>_%:%:: 11 (
 ~~!6!6688:::''#INNINNCE`  	 ;  KsJ   &CCC
CACC7CCC
CCCC	src_framer9  r   r6   c                 x  K   |j                   }|j                  }|| j                  v r| j                  |       d {    y |j	                  d      }|j                         sy d| _        | j                  r| xj                  |z  c_        | j                  s| j                          d {    | j                  D ]3  }|j                          d {    |j                  |       d {   }5 |j                         s%| j                  s| j                          d {    y | j                         }d|_        ||_        | j                  |       d {    |}	| j"                  D ]!  \  }
}|
|k(  s|
dk(  s ||	|       d {   }	# t%        ||nd|      | j&                  |<   | j)                  |	      }| j+                  d||       d {    | j,                  rf| j/                  |      sU| j1                  |       d {    | j3                          d {    | j5                  |t7        |             d {    | j9                  || j;                  ||             d {    | j                  s| j                          d {    | j<                  rjt?        ||      }||_         ||_        |||_        | j/                  |      r| j5                  ||       d {    y | j                  |       d {    y y 7 7 i7 E7 /7 7 7 7 Q7 7 	7 7 7 7 D7 ,w)	N
TF*)r5   r6   rt   r   aggregated_by)!r?  r   r   r   lstripstripr   r   r   start_processing_metricsr   reset_interruptionr0  stop_processing_metricsr   r5   rP   r   r4   r   r   _call_event_handlerr   r   create_audio_contextstart_ttfb_metricsr   r$   tts_process_generatorr   r}   r&   r9  )rH   r:  r9  r   r6   r   r   r0  rP   transformed_textrn   	transformprepared_textr   s                 r?   r   zTTSService._push_tts_frames  s-     &&~~ 4...//),,, {{4  zz|
 !% $$4'
 ((//111 (( 	-F++---t,,D	- zz|,,22444 ++-
 ',	#)	ooi(((  +/+@+@ 	K'i4'+;s+B)23CT)J#J 	K *4)5 9'A	*
:& 223CD &&'7]SSS!!$*F*Fz*R++J777))+++..z?V`;abbb((T\\-Q[5\]]]((..000!! !T:E0KE-)E)5*D'++J722:uEEEooe,,,% "a -0 2 ., 5 	) $K 	T 8+b] 1$ F,s  ;L:LA.L:,L-&L:LL:,L-5L:"L#7L:L!"L:>L:
L$AL:L'5L:L*L:$L-%#L:L0	)L:2L23#L:L4AL:1L62L:L8L:L:L:L:L:!L:$L:'L:*L:-L:0L:2L:4L:6L:8L:	generatorc                    K   d}|2 3 d{   }|s| j                  ||       d{    t        |t              s7d}:7 57 6 || _        yw)u  Process frames from an async generator, routing them through the audio context.

        All non-None frames yielded by the generator are appended to the audio context
        identified by context_id. The audio context must be created by run_tts (via
        create_audio_context) before the first frame is yielded.

        WebSocket services yield None to signal that audio will arrive via a separate
        receive loop; those services manage context lifetime themselves (via remove_audio_context
        in the receive loop on "done"). HTTP services never yield None and do NOT call
        remove_audio_context in run_tts — the caller (_synthesize_text) closes the context
        after appending any remaining frames (e.g. TTSTextFrame).

        Args:
            context_id: The audio context to route frames to.
            generator: An async generator yielding Frame objects or None.

        FNT)r   r   r"   r   )rH   rP   rL  is_yielding_framesr   s        r?   rH  z TTSService.tts_process_generator  sa     ( #$ 	. 	.%22:uEEEe%56)-&		.E % 2D.s?   AAAAAAAAAAA	Ac                   K   d}d }	 	 t        j                  | j                  j                         | j                         d {   }t        |t              r|j                  }d}nt        |t        t        f      rd}z7 <# t         j                  $ r+ |r&| j                  t        |             d {  7   d}Y @w xY ww)NFTtimeoutr   )r   wait_forr   getr   r   r$   rP   r%   r   TimeoutErrorr   )rH   has_startedrP   r   s       r?   r   zTTSService._stop_frame_handler  s     
(%..**..0$:T:T  e_5!&!1!1J"&K9J'KL"'K  '' (///Z*PQQQ"'K(sF   C<B B;B CB 3C7B:8C?CCCc                 \  K   | j                   dk(  r| j                         j                         }| j                  |kD  r| j                  n|| _         | j                  rJ| j                  j                         }g | _        |D ]#  \  }}}| j                  ||fg|       d{    % yyy7 	w)z5Start tracking word timestamps from the current time.rp   N)r   	get_clockget_timer   r   copy_add_word_timestamps)rH   current_timecachedrN   timestamp_secondsr2  s         r?   start_word_timestampsz TTSService.start_word_timestamps,  s     ''2->>+446L (,':':\'I##| (
 ''11668+-(7= Y3D+V33d<M5N4OQWXXXY ( . Ys   BB, B*!
B,c                    K   d| _         yw)zReset word timestamp tracking.rp   N)r   rG   s    r?   r,  z TTSService.reset_word_timestamps=  s     ')$s   	
word_timesc                    K   |rN| j                  |      r=|D ]7  \  }}| j                  |   j                  t        |||             d{    9 y| j	                  ||       d{    y7 #7 w)a  Add word timestamps for processing.

        When an audio context exists for this context_id, timestamps are routed into the
        per-context audio queue alongside audio frames so they are processed in strict
        playback order by _handle_audio_context. Otherwise they are processed immediately
        via _add_word_timestamps.

        Args:
            word_times: List of (word, timestamp) tuples where timestamp is in seconds.
            context_id: Unique identifier for the TTS context.
        )rN   rO   rP   N)r`  rP   )r   r   r   rM   rZ  )rH   r`  rP   rN   rO   s        r?   add_word_timestampszTTSService.add_word_timestampsA  s      $66zB#- i**:6::'!"+#-   ++zj+YYY Zs$   A	A3A/A3)A1*A31A3c                 (  K   |D ]~  \  }}|dk(  rf|dk(  ra| j                          d{    | j                  s6d| _        t               }| j                  |_        | j                  |       d{    r|dk(  rC|dk(  r>t        |      }| j                  |_        ||_        | j                  |       d{    t        |      }| j                  dk(  r| j                  j                  |||f       t        |t        j                        }| j                  |z   |_        ||_        || j                  v r| j                  |   j                   |_        |j                  | _        | j                  |       d{     y7 _7 7 7 w)	a  Process word timestamps directly, building and pushing frames inline.

        This is the single processing path for all word timestamp events, used both
        from _handle_audio_context (via _WordTimestampEntry) and from services that
        do not use audio contexts. Sentinel entries drive control-frame emission:

        - ("Reset", 0): reset timestamp baseline; emit LLMFullResponseEndFrame if needed.
        - ("TTSStoppedFrame", 0): emit TTSStoppedFrame.
        - Any other entry: emit TTSTextFrame with a PTS relative to the baseline.

        When the baseline (_initial_word_timestamp) is not yet set, regular word entries
        are cached in _initial_word_times and flushed once start_word_timestamps() is
        called (i.e. when the first audio chunk is received).
        Resetr   NFr%   r   rp   r>  )r,  r   r   r   ptsr   r%   rP   r2   r   r   r   r&   r   WORDr   r5   )rH   r`  rP   rN   rO   r   ts_nss          r?   rZ  zTTSService._add_word_timestamps[  sm    "  * 	1OD)w9>00222--16D.35E $ 3 3EI//%000**yA~':> //	#- ooe,,,.y9//25,,33T9j4QR )_=Q=QRE $ < <u DEI'1E$!T%7%77262D2DZ2P2b2b/*/))D'//%0007	12
 1
 - 1sL   'FFF6F1F2AF9F:CF?F 	FFFFc                    K   | j                   j                  |       d{    t        j                         | j                  |<   t        j                  |  d|        y7 @w)zCreate a new audio context for grouping related audio.

        Args:
            context_id: Unique identifier for the audio context.
        Nz created audio context )r   r   r   r   r   r   tracer   s     r?   rF  zTTSService.create_audio_context  sU      ""&&z222+2==?Z(v4ZLAB 	3s   A$A"AA$c                 8  K   |st        j                  |  d       y| j                  |      rDt        j                  |  d| d|        | j                  |   j                  |       d{    y|| j                  k(  rwt        j                  |  d|        | j                  |       d{    t        j                  |  d| d|        | j                  |   j                  |       d{    yt        j                  |  d|        y7 7 e7 $w)zAppend audio or control frame to an existing context.

        Args:
            context_id: The context to append audio to.
            frame: The audio or control frame to append.
        z: unable to append audio to context: no context ID providedNz appending audio z to audio context z recreating audio context z# unable to append audio to context )	r   r  r   ri  r   r   r   rF  warning)rH   rP   r   s      r?   r   z"TTSService.append_to_audio_context  s     LLD6![\]''
3LLD6!25'9KJ<XY&&z266u===4000 LLD6!;J<HI++J777LLD6!25'9KJ<XY&&z266u===NNdV#FzlST >
 8=s8   A+D-D.AD0D1AD3D4!DDDc                    K   | j                  |      rBt        j                  |  d| d       | j                  |   j	                  d       d{    yt        j
                  |  d|        y7  w)zhRemove an existing audio context.

        Args:
            context_id: The context to remove.
        z marking audio context z for deletionNz unable to remove context )r   r   ri  r   r   rk  r   s     r?   r   zTTSService.remove_audio_context  sn      ''
3 LLD6!8MRS&&z266t<<<NNdV#=j\JK =s   AA3A1!A3c                 X    | j                   duxr | j                  | j                         S )zCheck if there is an active audio context.

        Returns:
            True if an active audio context exists, False otherwise.
        N)r   r   rG   s    r?   has_active_audio_contextz#TTSService.has_active_audio_context  s2     ''t3 
8T8T$$9
 	
r>   c                 H    t        | j                  j                               S )z+Get a list of all available audio contexts.)listr   keysrG   s    r?   r-  zTTSService.get_audio_contexts  s    D((--/00r>   c                     | j                   S )zGet the active audio context ID.

        Returns:
            The active context ID, or None if no context is active.
        r   rG   s    r?   get_active_audio_context_idz&TTSService.get_active_audio_context_id  s     '''r>   c                    K   | j                   r4| j                  | j                          d{    | j                          yy7 w)z Remove the active audio context.N)r   r   r/  rG   s    r?   remove_active_audio_contextz&TTSService.remove_active_audio_context  s=     ##++D,D,DEEE++- $Es   +AAAc                     d| _         y)zReset the active audio context.Nrs  rG   s    r?   r/  z%TTSService.reset_active_audio_context  s
    #' r>   c                     || j                   v S )zCheck whether the given audio context is registered.

        Args:
            context_id: The context ID to check.

        Returns:
            True if the context exists and is available.
        )r   r   s     r?   r   z"TTSService.audio_context_available  s     T1111r>   c                     | j                  |      r-| j                  |   j                  t        j                         yy)zESignal that the audio context is still in use, resetting the timeout.N)r   r   
put_nowaitrT   _CONTEXT_KEEPALIVEr   s     r?   r   z!TTSService._refresh_audio_context  s4    ''
3  ,77
8U8UV 4r>   c                     | j                   sEt        j                         | _        i | _        | j                  | j                               | _         y y rE   )r   r   r   r   r   r   _audio_context_task_handlerrG   s    r?   r   z%TTSService._create_audio_context_task  sB    ''29--/D =?D '+'7'78X8X8Z'[D$ (r>   c                 ~   K   | j                   r+| j                  | j                          d {    d | _         y y 7 wrE   )r   r   rG   s    r?   r   z#TTSService._stop_audio_context_task  s9     ##""4#;#;<<<'+D$ $<s   +=;=c                 X  K   d}|r| j                   j                          d{   }|| _        |rQ| j                  |       d{    | j                  |= | j                  |       d{    | j                          nd}| j                   j                          |ryy7 7 _7 :w)z0In this task we process audio contexts in order.TNr   F)r   rS  r   _handle_audio_contextr   on_audio_context_completedr/  	task_done)rH   runningrP   s      r?   r}  z&TTSService._audio_context_task_handler  s     #337799J'1D$ 00<<< ((4555LLL//1  **,! 9 = Ms9   "B*B$!B*B&&B*-B(.3B*"B*&B*(B*c                   K   d}| j                   |   }d}d}|r 	 t        j                  |j                         |       d{   }|t        j
                  u rD|d}nt        |t              r<| j                  |j                  |j                  fg|j                         d{    t        |t              r4|s2| j                          d{    | j                          d{    d}|rCt        |t              r| j!                  |       d{    n| j#                  |       d{    |ryy7 7 7 m7 W7 ,7 # t        j$                  $ r t'        j(                  |  d|        Y yw xY ww)z@Process items from an audio context queue until it is exhausted.g      @TFrP  Nz time out on audio context )r   r   rR  rS  rT   r{  r   rM   rZ  rN   rO   rP   r"   stop_ttfb_metricsr^  r   push_error_framer   rT  r   ri  )rH   rP   AUDIO_CONTEXT_TIMEOUTqueuer  timestamps_startedr   s          r?   r  z TTSService._handle_audio_context  sg     #$$Z0"%..uyy{DYZZJ999]#G':; 33**eoo679I9I   '78-"44666"88:::-1*!%4"33E:::"ooe4447 Z 7:
 ;4'' v%@MNs   E;(E D<E E;AE 'D>(E ,E;-%E E E *E+,E EE 1E2E 6E;:E;<E >E  E E E E -E85E;7E88E;c                    K   yw)ac  Called when an audio context is cancelled due to an interruption.

        Override this in a subclass to perform provider-specific cleanup (e.g.
        sending a cancel/close message over the WebSocket) when the bot is
        interrupted mid-speech.  The audio context task has already been stopped
        and the active context has **not** yet been reset when this is called,
        so ``context_id`` reflects the context that was cut short.

        Args:
            context_id: The ID of the audio context that was interrupted, or
                ``None`` if no context was active at the time.
        Nr=   r   s     r?   r.  z'TTSService.on_audio_context_interrupted4  r   r   c                    K   yw)a  Called after an audio context has finished playing all of its audio.

        Override this in a subclass to perform provider-specific cleanup (e.g.
        sending a close-context message to free server-side resources) once an
        audio context has been fully processed.  The context entry has already
        been removed from the internal context map, and the active context has
        **not** yet been reset when this is called.

        Args:
            context_id: The ID of the audio context that finished processing.
        Nr=   r   s     r?   r  z%TTSService.on_audio_context_completedC  s      	r   rE   )r=  )FTF)\r7   r8   r9   r:   r*   r<   objectr{  r   rA   r;   rR   r   r/   r   rQ   r   r   r
   r	   r   r0   rv   propertyr   r   r   r   r`   r   r   r   r   r   r   r   r   r.   r   r   r   r   r   r   r   r   r   r   r   dictr   r   r   r   r(   r   
DOWNSTREAMr   r   r  r)  r   r  r  r  r    r   r   r   rH  r   r^  r,  rb  rZ  rF  r   r   rn  r-  rt  rv  r/  r   r   r   r   r}  r  r.  r  __classcell__r   s   @r?   rT   rT   m   s   > 
 @D.2 "&!&!&&)(- #', ',%)8<57 ;?04/3*.-1YU7  ((;<U7 &d^	U7 U7 U7 U7 $U7 "&U7" #U7& !%'U7,  $-U70 c]1U74 ""455U78  (S	29U7@ "o+XsC/<Q6RT]^aTb6b-ccd
AU7L x78MU7N n-OU7R  (}SU7T ;'UU7X '+YU7n Hd H H4# 4	76
 !S ! ! 9C 9 9 ?S ?(?S ?(	!3 	! # 3 >%QU+;V  	"X 	"(3- 	"# # HSM 
* 
*, ,"
.+ 
. 36M$c?S+@%A9S>%QRM *C/M& 36
$c?S+@%A9S>%QR
 *C/
"K DcN *4c 4*%&h4 h4> h4T JXIbIb $4e $4 $4T "'(,$(<?&<? 	<?
 !<? SM<? 
t	$<?|*0A *n **12y " 7<595:o-&o- &.d^o- %-TN	o-
 %-TNo-bDD*8t9K*LD	D:(,Y"*
 PTZuS%Z01Z?G}Z6 PT,1uS%Z01,1?G},1dCS CU UE U.LS L
$ 
1DI 1(Xc] (.(	2# 	2$ 	2W W
\,
-*%c %NS 3 r>   rT   c                   "     e Zd ZdZ fdZ xZS )WordTTSServicezDeprecated. Use TTSService directly instead.

    .. deprecated:: 0.0.105
        Word timestamp functionality is now always active in TTSService.
    c                 $    t        |   di | y)zInitialize the Word TTS service.

        Args:
            **kwargs: Additional arguments passed to the parent TTSService.
        Nr=   ru   rv   rH   r   r   s     r?   rv   zWordTTSService.__init__Y       	"6"r>   r7   r8   r9   r:   rv   r  r  s   @r?   r  r  R      # #r>   r  c                   .    e Zd ZdZdddefdZdefdZy)	WebsocketTTSServicea  Base class for websocket-based TTS services.

    Combines TTS functionality with websocket connectivity, providing automatic
    error handling and reconnection capabilities.

    Event handlers:
        on_connection_error: Called when a websocket connection error occurs.

    Example::

        @tts.event_handler("on_connection_error")
        async def on_connection_error(tts: TTSService, error: str):
            logger.error(f"TTS connection error: {error}")
    Treconnect_on_errorr  c                `    t        j                  | fi | t        j                  | fd|i| y)zInitialize the Websocket TTS service.

        Args:
            reconnect_on_error: Whether to automatically reconnect on websocket errors.
            **kwargs: Additional arguments passed to parent classes.
        r  N)rT   rv   r,   )rH   r  r   s      r?   rv   zWebsocketTTSService.__init__r  s1     	D+F+!!$X;MXQWXr>   errorc                    K   | j                  d|j                         d {    | j                  |       d {    y 7 7 w)Nrs   )rE  r  r  )rH   r  s     r?   _report_errorz!WebsocketTTSService._report_error|  s?     &&'<ekkJJJ##E*** 	K*s    AAAAAAN)r7   r8   r9   r:   r;   rv   r   r  r=   r>   r?   r  r  b  s&     6: Yd Y+ +r>   r  c                   J     e Zd ZdZ fdZdedef fdZdedef fdZ	 xZ
S )InterruptibleTTSServicezWebsocket-based TTS service that handles interruptions without word timestamps.

    Designed for TTS services that don't support word timestamps. Handles interruptions
    by reconnecting the websocket when the bot is speaking and gets interrupted.
    c                 2    t        |   di | d| _        y)zInitialize the Interruptible TTS service.

        Args:
            **kwargs: Additional arguments passed to the parent WebsocketTTSService.
        FNr=   )ru   rv   _bot_speakingr  s     r?   rv   z InterruptibleTTSService.__init__  s     	"6"
 #r>   r   r   c                    K   t         |   ||       d {    | j                  r1| j                          d {    | j	                          d {    y y 7 B7  7 
wrE   )ru   r  r  _disconnect_connectrH   r   r   r   s      r?   r  z,InterruptibleTTSService._handle_interruption  sX     g*5)<<<""$$$--/!!  	=$!s1   A A#A AA AA A A c                    K   t         |   ||       d{    t        |t              rd| _        yt        |t
              rd| _        yy7 5w)zProcess frames with bot speaking state tracking.

        Args:
            frame: The frame to process.
            direction: The direction of frame processing.
        NTF)ru   r   r   r   r  r   r  s      r?   r   z%InterruptibleTTSService.process_frame  sN      g#E9555e45!%D67!&D 8	 	6s   AA6A)r7   r8   r9   r:   rv   r   r(   r  r   r   r  r  s   @r?   r  r    s9    #"0A "n "' '> ' 'r>   r  c                   .     e Zd ZdZdddef fdZ xZS )WebsocketWordTTSServicezDeprecated. Use WebsocketTTSService directly instead.

    .. deprecated:: 0.0.105
        Word timestamp functionality is now always active in TTSService.
    Tr  r  c                (    t        |   dd|i| y)zInitialize the Websocket Word TTS service.

        Args:
            reconnect_on_error: Whether to automatically reconnect on websocket errors.
            **kwargs: Additional arguments passed to parent classes.
        r  Nr=   r  )rH   r  r   r   s      r?   rv   z WebsocketWordTTSService.__init__  s     	I,>I&Ir>   r7   r8   r9   r:   r;   rv   r  r  s   @r?   r  r    s!     6: Jd J Jr>   r  c                   "     e Zd ZdZ fdZ xZS )InterruptibleWordTTSServicezDeprecated. Use InterruptibleTTSService directly instead.

    .. deprecated:: 0.0.105
        Word timestamp functionality is now always active in TTSService.
    c                 $    t        |   di | y)zInitialize the Interruptible Word TTS service.

        Args:
            **kwargs: Additional arguments passed to the parent InterruptibleTTSService.
        Nr=   r  r  s     r?   rv   z$InterruptibleWordTTSService.__init__  r  r>   r  r  s   @r?   r  r    r  r>   r  c                   4     e Zd ZdZddddedef fdZ xZS )AudioContextTTSServicea  Deprecated. Inherit from WebsocketTTSService directly instead.

    Audio context management (previously the main purpose of this class) is now
    built into TTSService. This class is kept only for backwards compatibility.

    .. deprecated:: 0.0.105
        Subclass :class:`WebsocketTTSService` directly and pass
        ``reuse_context_id_within_turn`` as
        keyword arguments to its ``__init__``.
    Trh   r  rh   r  c                d    ddl } |j                  dt        d       t        |   d||d| y)a[  Initialize the Audio Context TTS service.

        Args:
            reuse_context_id_within_turn: Whether the service should reuse context IDs within the same turn.
            reconnect_on_error: Whether to automatically reconnect on websocket errors.
            **kwargs: Additional arguments passed to the parent WebsocketTTSService.
        r   NzAudioContextTTSService is deprecated. Inherit from WebsocketTTSService directly and pass reuse_context_id_within_turn as kwargs.rk   rl   r  r=   rw   rz   r{   ru   rv   )rH   rh   r  r   rw   r   s        r?   rv   zAudioContextTTSService.__init__  sE     	?		
 	 	
)E1	
 	
r>   r  r  s   @r?   r  r    s.    	 .2#'	
 '+
 !	
 
r>   r  c                   .     e Zd ZdZdddef fdZ xZS )AudioContextWordTTSServicezDeprecated. Use WebsocketTTSService directly instead.

    .. deprecated:: 0.0.105
        Subclass :class:`WebsocketTTSService` directly.
    Tr  r  c                b    ddl } |j                  dt        d       t        |   dd|i| y)zInitialize the Audio Context Word TTS service.

        Args:
            reconnect_on_error: Whether to automatically reconnect on websocket errors.
            **kwargs: Additional arguments passed to parent classes.
        r   NzTAudioContextWordTTSService is deprecated. Inherit from WebsocketTTSService directly.rk   rl   r  r=   r  )rH   r  r   rw   r   s       r?   rv   z#AudioContextWordTTSService.__init__  s7     	b	

 	I,>I&Ir>   r  r  s   @r?   r  r    s!     6: Jd J Jr>   r  )Or:   r   r   rw   abcr   dataclassesr   enumr   typingr   r   r   r	   r
   r   r   r   r   r   logurur   pipecat.audio.utilsr   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   "pipecat.processors.frame_processorr(   pipecat.services.ai_servicer)   pipecat.services.settingsr*   r+   "pipecat.services.websocket_servicer,   pipecat.transcriptions.languager.   'pipecat.utils.text.base_text_aggregatorr/   #pipecat.utils.text.base_text_filterr0   )pipecat.utils.text.simple_text_aggregatorr1   pipecat.utils.timer2   r4   rQ   rA   rM   rT   r  r  r  r  r  r  r  r=   r>   r?   <module>r     s   0     !     7     0 > 1 ; ? 4 F > J 5 7 7 7 #t "   b bJ'#Z # +*&6 +>&'1 &'RJ1 J"#"9 # &
0 &
RJ!7 Jr>   