
    qiC                        d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlZddlZddlmZ ddlmZ d e        ZddlmZ dd	lmZmZm Z m!Z! 	 dd
l"m#Z$ ddl%m&Z& ddl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9m:Z: ddl;m<Z< e G d de             Z= G d de9      Z> G d de:      Z?y# e'$ r7Z( ejR                  de(         ejR                  d        e*de(       dZ([(ww xY w)u  Inworld AI Text-to-Speech Service Implementation.

Contains two TTS services:
- InworldTTSService: WebSocket-based TTS service.
- InworldHttpTTSService: HTTP-based TTS service.

Inworld’s text-to-speech (TTS) models offer ultra-realistic, context-aware speech synthesis and precise voice cloning capabilities, enabling developers to build natural and engaging experiences with human-like speech quality at an accessible price point.
    N)	dataclassfield)
AnyAsyncGeneratorClassVarDictListLiteralMappingOptionalSelfTuple)logger)versionzpipecat/)	BaseModel)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)connect)StatezException: zLIn order to use Inworld WebSocket TTS, you need to `pip install websockets`.zMissing module: )	CancelFrameEndFrame
ErrorFrameFrameInterruptionFrame
StartFrameTTSAudioRawFrameTTSStartedFrameTTSStoppedFrame)FrameDirection)TextAggregationMode
TTSServiceWebsocketTTSService)
traced_ttsc                        e Zd ZU dZ ed       Zeez  ed<    ed       Z	eez  ed<   ddd	Z
eeeef      ed
<   edeeef   def fd       Z xZS )InworldTTSSettingszSettings for InworldTTSService and InworldHttpTTSService.

    Parameters:
        speaking_rate: Speaking rate for speech synthesis.
        temperature: Temperature for speech synthesis.
    c                      t         S Nr        N/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/inworld/tts.py<lambda>zInworldTTSSettings.<lambda>N   s    Y r,   )default_factoryspeaking_ratec                      t         S r)   r*   r+   r,   r-   r.   zInworldTTSSettings.<lambda>O   s    9 r,   temperaturevoicemodel)voiceIdmodelId_aliasessettingsreturnc                     t        |      }|j                  dd      }t        |t               r!|j                  d|j	                  d             t
        |   |      S )zRConstruct settings from a plain dict, destructuring legacy nested ``audioConfig``.audioConfigNr0   speakingRate)dictpop
isinstance
setdefaultgetsuperfrom_mapping)clsr8   flatnested	__class__s       r-   rC   zInworldTTSSettings.from_mappingV   sO     H~-.fd#OOOVZZ-GHw#D))r,   )__name__
__module____qualname____doc__r   r0   floatr   __annotations__r2   r7   r   r   strclassmethodr   r   r   rC   __classcell__rG   s   @r-   r'   r'   E   s     (-=N'OM59$O%*;L%MK"M *HhtCH~& 
 *GCH$5 *$ * *r,   r'   c                       e Zd ZU dZeZeed<    G d de      Zddddddddd	d
e	de
j                  dee	   dee	   dedee   de	deed      dee   dee   f fdZdefdZdef fdZej*                  fdedef fdZdee	ef   deeee	ef      ef   fdZede	de	deedf   fd        Z d!e
jB                  de	deedf   fd"Z"d!e
jB                  de	deedf   fd#Z#d$e$de	deedf   fd%Z% xZ&S )&InworldHttpTTSServicezInworld AI HTTP-based TTS service.

    Supports both streaming and non-streaming modes via the `streaming` parameter.
    Outputs LINEAR16 audio at configurable sample rates with word-level timestamps.
    	_settingsc                   T    e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee
d      ed<   y)!InworldHttpTTSService.InputParamsa  Input parameters for Inworld TTS configuration.

        .. deprecated:: 0.0.105
            Use ``InworldTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            temperature: Temperature for speech synthesis.
            speaking_rate: Speaking rate for speech synthesis.
            timestamp_transport_strategy: The strategy to use for timestamp transport.
        Nr2   r0   ASYNCrW   SYNCtimestamp_transport_strategy)rH   rI   rJ   rK   r2   r   rL   rM   r0   rZ   r
   r+   r,   r-   InputParamsrV   j   s:    		 (,Xe_+)-x-KR$hw/G&HRr,   r[   NTLINEAR16rW   )voice_idr4   	streamingsample_rateencodingrZ   paramsr8   api_keyaiohttp_sessionr]   r4   r^   r_   r`   rZ   rX   ra   r8   c       
   	      <   t        ddddd      }|t        dt         d       ||_        |t        dt         d       ||_        |	dt        dt                |
sR|	j                  |	j                  |_        |	j
                  |	j
                  |_        |	j                  |	j                  }|
|j                  |
       t        | $  dd	d
d
||d| || _
        || _        || _        d| _        |rd| _        nd| _        d| _        || _        d| _        || _        y)a  Initialize the Inworld TTS service.

        Args:
            api_key: Inworld API key.
            aiohttp_session: aiohttp ClientSession for HTTP requests.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(voice=...)`` instead.

            model: ID of the model to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(model=...)`` instead.

            streaming: Whether to use streaming mode.
            sample_rate: Audio sample rate in Hz.
            encoding: Audio encoding format.
            timestamp_transport_strategy: Strategy for timestamp transport
                ("ASYNC" or "SYNC"). Defaults to "ASYNC".
            params: Input parameters for Inworld TTS configuration.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to the parent class.
        inworld-tts-1.5-maxAshleyNr4   r3   languager0   r2   r]   r3   r4   ra   FT)push_text_framespush_stop_framespush_start_framer_   r8   WORDz*https://api.inworld.ai/tts/v1/voice:streamz#https://api.inworld.ai/tts/v1/voice        r   r+   )r'   r   r3   r4   r0   r2   rZ   apply_updaterB   __init___api_key_session
_streaming_timestamp_type	_base_url_cumulative_time_audio_encoding_audio_sample_rate_timestamp_transport_strategy)selfrb   rc   r]   r4   r^   r_   r`   rZ   ra   r8   kwargsdefault_settingsrG   s                r-   ro   zInworldHttpTTSService.__init__z   sQ   Z .'
 ":/A7K%-""7,>H%*" "8-?@''35;5I5I$2%%1393E3E$066B393V3V0 ))(3 	
"!!#%	
 	
  '#%IDNBDN #  ("#-I*r,   r9   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Inworld TTS service supports metrics generation.
        Tr+   ry   s    r-   can_generate_metricsz*InworldHttpTTSService.can_generate_metrics        r,   framec                 b   K   t         |   |       d{    | j                  | _        y7 w)zZStart the Inworld TTS service.

        Args:
            frame: The start frame.
        N)rB   startr_   rw   ry   r   rG   s     r-   r   zInworldHttpTTSService.start   s.      gmE""""&"2"2 	#s   /-/	directionc                    K   t         |   ||       d{    t        |t        t        f      r3d| _        t        |t              r| j                  dg       d{    yyy7 N7 	w)Push a frame and handle state changes.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        Nrm   Resetr   )rB   
push_framer?   r   r    ru   add_word_timestampsry   r   r   rG   s      r-   r   z InworldHttpTTSService.push_frame   sl      g 	222e/AB$'D!%1..~>>> 2 C 	3 ?s"   A*A&AA*A(A*(A*timestamp_infoc                 r   g }d}|j                  di       }|j                  dg       }|j                  dg       }|j                  dg       }|rf|rdt        |      t        |      k(  rMt        |      D ]*  \  }}	| j                  ||   z   }
|j	                  |	|
f       , |rt        |      dkD  r|d   }||fS )a  Calculate word timestamps from Inworld HTTP API word-level response.

        Note: Inworld HTTP provides timestamps that reset for each request.
        We track cumulative time across requests to maintain continuity.

        Args:
            timestamp_info: The timestamp information from Inworld API.

        Returns:
            Tuple of (word_times, chunk_end_time) where chunk_end_time is the
            end time of the last word in this chunk (not cumulative).
        rm   wordAlignmentwordswordStartTimeSecondswordEndTimeSecondsr   )rA   len	enumerateru   append)ry   r   
word_timeschunk_end_time	alignmentr   start_times	end_timesiword
word_starts              r-   _calculate_word_timesz+InworldHttpTTSService._calculate_word_times   s      /1
"&&;	gr*mm$:B?MM"6;	[SZ3{3C%C$U+ 64!22[^C
!!4"456
 S^a/!*2N++r,   text
context_idc                  K   t        j                  |  d| d| j                   d       | j                  | j                  d}| j
                  j                  | j
                  j                  |d<   || j
                  j                  | j
                  j                  |d}| j
                  j                  | j
                  j                  |d<   | j                  |d	<   | j                  | j                  |d
<   t        t        j                               }d| j                   dt         |d}	 | j"                  j%                  | j&                  ||      4 d{   }|j(                  dk7  ro|j+                          d{   }t        j,                  d| d|        t/        d|        	 ddd      d{    | j1                          d{    y| j                  r!| j3                  ||      2 3 d{   }	|	 | j5                  ||      2 3 d{   }	|	 7 7 7 o7 Y7 36 n7 6 ddd      d{  7   n# 1 d{  7  sw Y   nxY w| j7                  |       d{  7   n5# t8        $ r)}
| j;                  d|
 |
       d{  7   Y d}
~
nd}
~
ww xY w| j1                          d{  7   y# | j1                          d{  7   w xY ww)zGenerate TTS audio for the given text.

        Args:
            text: The text to generate TTS audio for.
            context_id: Unique identifier for this TTS context.

        Returns:
            An asynchronous generator of frames.
        z: Generating TTS [z] (streaming=)audioEncodingsampleRateHertzNr<   )r   r5   r6   r;   r2   timestampTypetimestampTransportStrategyBasic zapplication/json)AuthorizationzContent-TypeX-User-AgentX-Request-Id)jsonheaders   zInworld API error (request_id=z): Inworld API error: errorUnknown error occurred: 	error_msg	exception)r   debugrr   rv   rw   rT   r0   r3   r4   r2   rs   rx   rN   uuiduuid4rp   
USER_AGENTrq   postrt   statusr   r   r   stop_all_metrics_process_streaming_response_process_non_streaming_responsestart_tts_usage_metrics	Exception
push_error)ry   r   r   audio_configpayload
request_idr   response
error_textr   es              r-   run_ttszInworldHttpTTSService.run_tts!  s     	v/v]4??BSSTUV "11#66
 >>''3+/>>+G+GL( ~~++~~++'	
 >>%%1%)^^%?%?GM" $(#7#7 --9484V4VG01&
%dmm_5.&&	
	*}}))Wg *  $ $??c)'/}}!6JLL#A*SQ[P\!]^$-@+MNN$ $, ''))) ??'+'G'GR\'] $ $e#'+'K'KHV`'a $ $e#$ "7	$, *$']$'a$ $ $ $ $  ..t444 	Y//.Fqc,JVW/XXX	Y '')))$'')))sD  D.K,1,I? H.I? !#IH02I7I? H2I? K,H4K, I>H8H6H8IH<#H:$H<'I.I? 0I2I? 4K,6H88I:H<<I=I? I	I? I!II!I? 8I;9I? >K ?	J1J,!J$"J,'K ,J11K 4K,K
K,K)"K%#K))K,r   c                  K   d}d}|j                   j                  d      2 3 d{   }||j                  d      z  }d|v s"|j                  dd      \  }}|j	                         }|s-	 t        j                  |      }d|v rYd	|d   v rR| j                          d{    | j                  t        j                  |d   d	         |      2 3 d{   }	|	 d|v rKd
|d   v rD|d   d
   }
| j                  |
      \  }}|r| j                  ||       d{    t        ||      }d|v r7 7 7 e6 _7  # t
        j                  $ r Y w xY w6 |dkD  r| xj                  |z  c_        yyw)a  Process a streaming response from the Inworld API.

        Args:
            response: The response from the Inworld API.
            context_id: Unique identifier for this TTS context.

        Yields:
            An asynchronous generator of frames.
         rm   i   Nzutf-8
   resultaudioContenttimestampInfor   )contentiter_chunkeddecodesplitstripr   loadsstop_ttfb_metrics_process_audio_chunkbase64	b64decoder   r   maxJSONDecodeErrorru   )ry   r   r   bufferutterance_durationchunklineline_str
chunk_datar   r   r   r   s                r-   r   z1InworldHttpTTSService._process_streaming_responsee  s      #++88> 	 	%ell7++F&.%||D!4f::<!%H!5J:-.JxDX2X"44666+/+D+D",,Z-A.-QRT^, ( (% #(K:-/ZPXEY2Y)3H)=o)N595O5OP^5_2
N%"&":"::z"RRR-01C^-T*/ &.	 7( , S ++ 9 ?B !!!%77! "s    E7ED5EE7(E7-3E  D8!/E D<D:D<AE D>E .E73E75E8E :D<<E >E  EE7E E7c                  K   |j                          d{   }d|vr%t        j                  d       t        d       yd}d|v r7|d   }| j	                  |      \  }}|r| j                  ||       d{    |}t        j                  |d         }t        |      dkD  r|j                  d	      r|dd }| j                  }	t        d
t        |      |	      D ]A  }
||
|
|	z    }|s| j                          d{    t        || j                  d|       C |d
kD  r| xj                  |z  c_        yy7 )7 7 Cw)a
  Process a non-streaming response from the Inworld API.

        Args:
            response: The response from the Inworld API.
            context_id: Unique identifier for this TTS context.

        Returns:
            An asynchronous generator of frames.
        Nr   z'No audioContent in Inworld API responsezNo audioContent in responser   rm   r   ,      RIFFr   r   audior_   num_channelsr   )r   r   r   r   r   r   r   r   r   
startswith
chunk_sizeranger   r   r_   ru   )ry   r   r   response_datar   r   r   r   
audio_datar   r   r   s               r-   r   z5InworldHttpTTSService._process_non_streaming_response  sY     'mmo-.LLBC#@AA m+*?;N)-)C)CN)S&J..z:FFF!/%%mN&CD
z?RJ$9$9'$B#BCJ__
q#j/:6 	Aq1z>2E,,...&T-=-=AZd 		 !!!%77! "= . G /s;   ED?A#E:E;A1E-E E?EEEaudio_chunkc                   K   |sy|}t        |      dkD  r|j                  d      r|dd }|rt        || j                  d|       yyw)zProcess an audio chunk from the Inworld API.

        Args:
            audio_chunk: The audio chunk to process.
            context_id: Unique identifier for this TTS context.

        Returns:
            An asynchronous generator of frames.
        Nr   r   r   r   )r   r   r   r_   )ry   r   r   r   s       r-   r   z*InworldHttpTTSService._process_audio_chunk  s`       
{b [%;%;G%D$RS)J"  ,,%	  s   A
A)'rH   rI   rJ   rK   r'   SettingsrM   r   r[   rN   aiohttpClientSessionr   boolintr
   ro   r~   r   r   r!   
DOWNSTREAMr   r   r   r   r   r	   rL   r   r%   r   r   ClientResponser   r   bytesr   rP   rQ   s   @r-   rS   rS   `   s    "H!!Si S* #'#%)"KR(,15dJ dJ !..	dJ
 3-dJ }dJ dJ c]dJ dJ '/w/G&HdJ %dJ -.dJLd 3 3 JXIbIb ?e ? ?!,S#X!, 
tE#u*%&-	.!,F A*# A*3 A*>%QU+;V A* A*F28..28<?28	t	$28h+8..+8<?+8	t	$+8Z .1	t	$r,   rS   c                   \    e Zd ZU dZeZeed<    G d de      Zddddddddddddd	d
de	de
e	   de
e	   de	de
e   de	de
e   de
e	   de
ed      de
e   de
e   de
e   de
e   dedef fdZdefdZdef fdZdef fd Zdef fd!Zd:d"e
e	   fd#Zej4                  fded$ef fd%Zd&ee	ef   deee	e f      fd'Z!d"e	fd(Z"d"e	fd)Z#d"e	fd*Z$d+ Z% fd,Z& fd-Z'd.e(de)e	ef   f fd/Z*d0 Z+d1 Z,d2 Z-d3 Z.d"e	fd4Z/d"e	d5e	fd6Z0d"e	fd7Z1d"e	fd8Z2e3d5e	d"e	de4edf   fd9       Z5 xZ6S );InworldTTSServicezInworld AI WebSocket-based TTS service.

    Uses bidirectional WebSocket for lower latency streaming. Supports multiple
    independent audio contexts per connection (max 5). Outputs LINEAR16 audio
    with word-level timestamps.
    rT   c                       e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee
   ed<   dZee   ed<   dZee   ed<   dZee   ed	<   d
Zeed      ed<   y)InworldTTSService.InputParamsa  Input parameters for Inworld WebSocket TTS configuration.

        .. deprecated:: 0.0.105
            Use ``InworldTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            temperature: Temperature for speech synthesis.
            speaking_rate: Speaking rate for speech synthesis.
            apply_text_normalization: Whether to apply text normalization.
            max_buffer_delay_ms: Maximum buffer delay in milliseconds.
            buffer_char_threshold: Buffer character threshold.
            auto_mode: Whether to use auto mode. Recommended when texts are sent
                in full sentences/phrases. When enabled, the server controls
                flushing of buffered text to achieve minimal latency while
                maintaining high quality audio output. If None (default),
                automatically set based on aggregate_sentences.
            timestamp_transport_strategy: The strategy to use for timestamp transport.
        Nr2   r0   apply_text_normalizationmax_buffer_delay_msbuffer_char_thresholdT	auto_moderW   rX   rZ   )rH   rI   rJ   rK   r2   r   rL   rM   r0   r   rN   r   r   r   r   r   rZ   r
   r+   r,   r-   r[   r     su    	& (,Xe_+)-x-26 (3-6-1Xc]1/3x}3$(	8D>(KR$hw/G&HRr,   r[   Nz5wss://api.inworld.ai/tts/v1/voice:streamBidirectionalr\   rW   T)r]   r4   urlr_   r`   r   r   rZ   ra   r8   aggregate_sentencestext_aggregation_modeappend_trailing_spacerb   r]   r4   r   r_   r`   r   r   rZ   rX   ra   r8   r   r  r  rz   c                   ||dn|}t        ddddd      }|t        dt         d       ||_        |t        dt         d       ||_        d}d}|
t        d	t                |s|
j                  |
j                  |_        |
j
                  |
j
                  |_        |
j                  |
j                  }|
j                  |
j                  }	|
j                  |
j                  }|
j                  }|
j                  }||j                  |       t        | 4  dd
dd|||||d| || _        || _        d| _        ||d| _        d| _        d| _        d| _        d| _        || _        d| _        || _        || _        |	| _        y)a  Initialize the Inworld WebSocket TTS service.

        Args:
            api_key: Inworld API key.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(voice=...)`` instead.

            model: ID of the model to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(model=...)`` instead.

            url: URL of the Inworld WebSocket API.
            sample_rate: Audio sample rate in Hz.
            encoding: Audio encoding format.
            auto_mode: Whether to use auto mode. When enabled, the server
                controls flushing of buffered text. If None (default),
                automatically set based on ``aggregate_sentences``.
            apply_text_normalization: Whether to apply text normalization.
            timestamp_transport_strategy: Strategy for timestamp transport
                ("ASYNC" or "SYNC"). Defaults to "ASYNC".
            params: Input parameters for Inworld WebSocket TTS configuration.

                .. deprecated:: 0.0.105
                    Use ``settings=InworldTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            aggregate_sentences: Deprecated. Use text_aggregation_mode instead.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            text_aggregation_mode: How to aggregate text before synthesis.
            append_trailing_space: Whether to append a trailing space to text before sending to TTS.
            **kwargs: Additional arguments passed to the parent class.
        NTre   rf   rg   r]   r3   r4   ra   F)ri   rj   pause_frame_processingr_   r   r  r  r8   rl   )maxBufferDelayMsbufferCharThresholdrm   r   r+   )r'   r   r3   r4   r0   r2   r   rZ   r   r   r   rn   rB   ro   rp   _urlrs   _buffer_settings_receive_task_keepalive_taskru   _generation_end_timerv   rw   
_auto_mode_apply_text_normalizationrx   )ry   rb   r]   r4   r   r_   r`   r   r   rZ   ra   r8   r   r  r  rz   r{   _buffer_max_delay_ms_buffer_char_thresholdrG   s                      r-   ro   zInworldTTSService.__init__
  s   v  3 ;ATI .'
 ":/A7K%-""7,>H%*"  $!%"8-?@''35;5I5I$2%%1393E3E$022>/5/N/N,66B393V3V0##/ & 0 0I#)#=#= %+%A%A" ))(3 
	
"!#'# 3"7"7%
	
 
	
  	% !5#9!

 "# !$$'!  ("##)A&-I*r,   r9   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Inworld WebSocket TTS service supports metrics generation.
        Tr+   r}   s    r-   r~   z&InworldTTSService.can_generate_metrics  r   r,   r   c                    K   t         |   |       d{    | j                  | _        | j	                          d{    y7 .7 w)zdStart the Inworld WebSocket TTS service.

        Args:
            frame: The start frame.
        N)rB   r   r_   rw   _connectr   s     r-   r   zInworldTTSService.start  sA      gmE""""&"2"2mmo 	#s    A	A(A	A A	A	c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zaStop the Inworld WebSocket TTS service.

        Args:
            frame: The end frame.
        N)rB   stop_disconnectr   s     r-   r  zInworldTTSService.stop  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zfCancel the Inworld WebSocket TTS service.

        Args:
            frame: The cancel frame.
        N)rB   cancelr  r   s     r-   r  zInworldTTSService.cancel  s6      gnU###    	$ r  r   c                    K   |xs | j                         }|r?| j                  r2t        j                  d|        | j	                  |       d{    yyy7 w)a  Flush any pending audio without closing the context.

        This triggers synthesis of all accumulated text in the buffer while
        keeping the context open for subsequent text. The context is only
        closed on interruption, disconnect, or end of session.
        zFlushing audio for context N)get_active_audio_context_id
_websocketr   trace_send_flush)ry   r   flush_ids      r-   flush_audiozInworldTTSService.flush_audio  sV      C!A!A!CLL6xjAB""8,,, (8,s   AAAAr   c                 Z  K   t         |   ||       d{    t        |t        t        f      rtt        j                  |  dt        |      j                   d| j                          d| _	        d| _
        t        |t              r| j                  dg       d{    yyy7 7 	w)r   Nz&: Resetting timestamp tracking due to z - cumulative_time was rm   r   )rB   r   r?   r    r   r   r  typerH   ru   r  r   r   s      r-   r   zInworldTTSService.push_frame  s      g 	222eo/@ABLL&>tE{?S?S>T U''+'<'<&=? %(D!(+D%%1..~>>> 2 C 	3 ?s"   B+B'BB+B) B+)B+r   c                 .   g }|j                  di       }|j                  dg       }|j                  dg       }|j                  dg       }|r|rt        |      t        |      k(  rt        |      D ]*  \  }}| j                  ||   z   }	|j	                  ||	f       , |r%t        |      dkD  r| j                  |d   z   | _        t        j                  |  d| d| j                   d	|D 
cg c]  \  }
}|	 c}}
 d
| j
                   	       |S c c}}
w )a  Calculate word timestamps from Inworld WebSocket API response.

        Adds cumulative time offset to maintain monotonically increasing timestamps
        across multiple generations within an agent turn. Also tracks the generation
        end time for updating cumulative time on flush.

        Args:
            timestamp_info: The timestamp information from Inworld API.

        Returns:
            List of (word, timestamp) tuples with cumulative offset applied.
        r   r   r   r   r   r   z$: Word timestamps - raw_start_times=z, cumulative_offset=z, adjusted_times=z, generation_end_time=)rA   r   r   ru   r   r  r   r  )ry   r   r   r   r   r   r   r   r   r   _ts               r-   r   z'InworldTTSService._calculate_word_times  s0    /1
"&&;	gr*mm$:B?MM"6;	[SZ3{3C%C$U+ 64!22[^C
!!4"456
 S^a/,0,A,AIbM,Q)LL&<[M J%%)%:%:$; <"1;"<A1"<!= >''+'@'@&AC 	 #=s   ,Dc                   K   |rA| j                   r5t        j                  |  d| d       	 | j                  |       d {    d| _        d| _        y 7 # t        $ r)}| j                  d| |       d {  7   Y d }~>d }~ww xY ww)Nz: Closing context z" due to interruption or completionr   r   rm   )r  r   info_send_close_contextr   r   ru   r  )ry   r   r   s      r-   _close_contextz InworldTTSService._close_context  s     $//KK4& 2:,>`ab]..z::: !$$'!	 ; ]oo2J1#0NZ[o\\\]sL   *BA AA BA 	B	 B9A<:B?BB		Bc                 B   K   | j                  |       d{    y7 w)z<Callback invoked when an audio context has been interrupted.Nr(  ry   r   s     r-   on_audio_context_interruptedz.InworldTTSService.on_audio_context_interrupted	       !!*---   c                 B   K   | j                  |       d{    y7 w)z:Callback invoked when an audio context has been completed.Nr*  r+  s     r-   on_audio_context_completedz,InworldTTSService.on_audio_context_completed  r-  r.  c                 H    | j                   r| j                   S t        d      )znGet the websocket for the Inworld WebSocket TTS service.

        Returns:
            The websocket.
        zWebsocket not connected)r  r   r}   s    r-   _get_websocketz InworldTTSService._get_websocket  s!     ????"122r,   c                 |  K   t         |           d{    | j                          d{    | j                  r;| j                  s/| j                  | j                  | j                              | _        | j                  r2| j                  s%| j                  | j                               | _        yyy7 7 w)cConnect to the Inworld WebSocket TTS service.

        Returns:
            The websocket.
        N)
rB   r  _connect_websocketr  r	  create_task_receive_task_handler_report_errorr
  _keepalive_task_handlerry   rG   s    r-   r  zInworldTTSService._connect  s      g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD??4#7#7#'#3#3D4P4P4R#SD  $8? 	!'s    B<B8B<B:BB<:B<c                 R  K   t         |           d{    | j                  r*| j                  | j                         d{    d| _        | j                  r*| j                  | j                         d{    d| _        | j                          d{    y7 7 \7 (7 w)hDisconnect from the Inworld WebSocket TTS service.

        Returns:
            The websocket.
        N)rB   r  r	  cancel_taskr
  _disconnect_websocketr:  s    r-   r  zInworldTTSService._disconnect+  s      g!###""4#5#5666!%D""4#7#7888#'D ((*** 	$ 7 9 	+sE   B'B.B'B!5B':B#;B'B%B'!B'#B'%B'deltac                    K   t         |   |       d{   }|s|S | j                          d{    | j                          d{    |S 7 :7  7 
w)zgApply a settings delta.

        Settings are stored but not applied to the active connection.
        N)rB   _update_settingsr  r  )ry   r?  changedrG   s      r-   rA  z"InworldTTSService._update_settings=  sX     
 077N   mmo 8
 	!s1   AAAAA
AAAAc                 J  K   	 | j                   r'| j                   j                  t        j                  u ryt	        t        j                               }t        j                  d| d       dd| j                   fdt        fd|fg}t        | j                  |       d{   | _         | j                  d	       d{    y7 #7 # t        $ rL}| j                  d
| |       d{  7   d| _         | j                  d|        d{  7   Y d}~yd}~ww xY ww)r4  Nz0Connecting to Inworld WebSocket TTS (request_id=r   r   r   r   r   )additional_headerson_connectedr   r   on_connection_error)r  stater   OPENrN   r   r   r   r   rp   r   websocket_connectr  _call_event_handlerr   r   )ry   r   r   r   s       r-   r5  z$InworldTTSService._connect_websocketL  s    	J4??#8#8EJJ#FTZZ\*JLLKJ<WXYZ F4==/":;,,G
 %6diiT[$\\DO**>::: ]: 	J//.Fqc,JVW/XXX"DO**+@QCIII	Jsu   D#2C D#A,C #C$C C	C D#C 	C 	D D-C0."DDDD#D  D#c                   K   	 | j                          d{    | j                  r~t        j                  d       | j	                         }|r |D ]  }| j                  |       d{     | j                  j                          d{    t        j                  d       | j                          d{    d| _        d| _
        d| _        | j                  d       d{    y7 7 7 d# t        $ r)}| j                  d| |       d{  7   Y d}~zd}~ww xY w7 n7 B# | j                          d{  7   d| _        d| _
        d| _        | j                  d       d{  7   w xY ww)r<  Nz(Disconnecting from Inworld WebSocket TTSz'Disconnected from Inworld WebSocket TTSr   r   rm   on_disconnected)r   r  r   r   get_audio_contextsr'  closer   r   remove_active_audio_contextru   r  rJ  )ry   audio_contextsctx_idr   s       r-   r>  z'InworldTTSService._disconnect_websocketd  sT    	>'')))GH!%!8!8!:!"0 ?"66v>>>?oo++---FG 22444"DO$'D!(+D%**+<===# * ?- 	Y//.Fqc,JVW/XXX	Y 5 >	 22444"DO$'D!(+D%**+<===s   E9C3 C-AC3 (C/)#C3 C1C3 &E99D(:-E9'D*(E9-C3 /C3 1C3 3	D%<D DD D,  D%%D, (E9*E9,E6 E.E6/E20E66E9c                 t  K   | j                         2 3 d{   }	 t        j                  |      }|j                  di       }|j                  d      xs |j                  d      }dD cg c]  }||v r|
 }}t	        j                  |  d| d|        |j                  d	i       }|j                  d
d      dk7  rv|j                  dd      }|j                  d
      }	|	dk(  r/d|j                         v rt	        j                  |  d| d       | j                  d|        d{    "d|v r(| j                  t        |d                d{    N|rD| j                  |      s3t	        j                  |  d|        | j                  |       d{    |j                  di       }
|
j                  d      }|rt	        j                  |  d|        t        j                  |      }t!        |      dkD  r|j#                  d      r|dd }t%        || j&                  d|      }|r| j)                  ||       d{    |
j                  d      }|r-| j+                  |      }|r| j-                  ||       d{    d|v rt	        j                  |  d|        d |v rBt	        j                  |  d!| j.                   d"| j0                          | j0                  | _        d#|v st	        j                  |  d$|        | j3                          d{    | j-                  d%d&g|       d{    | j5                  |       d{    T7 P# t        j                  $ r t	        j
                  |  d       Y w xY wc c}w 7 s7 J7 7 [7 7 7 j7 S6 yw)'z0Handle incoming WebSocket messages from Inworld.Nz received non-JSON messager   	contextIdr   )contextCreated
audioChunkflushCompletedcontextClosedz: Received message types=z	, ctx_id=r   coder   messagezUnknown error   z	not foundz
: Context z not found.r   )r   r   z0: Recreating audio context for current context: rU  r   z%: Processing audio chunk for context r   r   r   r   r   rT  z: Context created on server: rV  z3: Generation completed - updating cumulative_time: z -> rW  z: Context closed on server: )r    r   r   )r2  r   r   r   r   warningrA   r  lowerr   r   rN   audio_context_availablecreate_audio_contextr   r   r   r   r   r_   append_to_audio_contextr   r   ru   r  r   remove_audio_context)ry   rY  msgr   rQ  k	msg_typesr   r   
error_coder   	audio_b64r   r   r   r   s                   r-   _receive_messagesz#InworldTTSService._receive_messages~  s    !002 Q	8 Q	8'jj)
 WWXr*FZZ,H

<0HF
 ]; I 
 LLD6!:9+YvhWX ZZ"-Fzz&!$)"JJy/B	#ZZ/
 ?{ioo6G'GLLD6F8;!GH oo2Ei[0QoRRR#~ooCL0AoBBB d::6Bv%UV\U]^_//777 !**\26K#7Iv%J6(ST((3u:?u'7'7'@!"#JE(0@0@!PVW66vuEEE )___=N!77G
22:vFFF  6)v%B6(KL  6)fO,,-T$2K2K1LN )-(A(A% &(v%A&JK,,.....0F/UW]^^^//777cQ	8 '' $'ABC( S C 8 F G" /^7c 3s   N8N6M'N6N8M*:N8,N9B5N8.N!/+N8N$AN8"N'#B,N8N*?N8N-A,N8>-N8+N0,N8N2N8 N4!N8'N6*+NN8N	N8$N8'N8*N8-N80N82N84N86N8c                 4  K   d}	 t        j                  |       d{    	 | j                  r| j                  j                  t        j
                  u r| j                         }|r ddi|d}t        j                  d|        ndddii}t        j                  d       | j                  j                  t        j                  |             d{    7 7 # t        j                  $ r$}t        j                  |  d	|        Y d}~yd}~ww xY ww)
zBSend periodic keepalive messages to maintain WebSocket connection.<   Nr   r   	send_textrS  zSending keepalive for context rk  z!Sending keepalive without contextz keepalive error: )asynciosleepr  rG  r   rH  r  r   r  sendr   dumps
websocketsConnectionClosedr\  )ry   KEEPALIVE_SLEEPr   keepalive_messager   s        r-   r9  z)InworldTTSService._keepalive_task_handler  s     --000??t'<'<

'J!%!A!A!CJ!*0")3-) 'Ej\%RS-862,,G)%HI//..tzz:K/LMMM 0 N.. $'9!=>sK   DCDB0C CC DC D1DDDDc                   K   | j                   | j                  d}| j                  j                  | j                  j                  |d<   | j                  j                  | j                  j
                  |d}| j                  j                  | j                  j                  |d<   | j                  | j                  |d<   | j                  | j                  |d<   | j                  | j                  |d<   | j                  d	   xs d
|d	<   | j                  d   xs d|d<   | j                  |d<   ||d}t        j                  |  d|        | j                  t        j                   |      | j"                         d{    y7 w)ztSend a context to the Inworld WebSocket TTS service.

        Args:
            context_id: The context ID.
        r   Nr<   )r5   r6   r;   r2   applyTextNormalizationautoModer   r  i  r     r   )createrS  z: Sending context create: )rv   rw   rT   r0   r3   r4   r2   r  r  rx   r  rs   r   r  send_with_retryr   ro  r8  )ry   r   r   create_configrb  s        r-   _send_contextzInworldTTSService._send_context  sw     "11#66
 >>''3+/>>+G+GL( ~~++~~++')
 >>%%1+/>>+E+EM-())56:6T6TM23??&(,M*%--9:>:\:\M67 -1,A,ABT,U,]Y]()/3/D/DEZ/[/b_b+,)-)=)=o&&Z@v7GH""4::c?D4F4FGGGs   FFFFr   c                    K   d|i|d}| j                  t        j                  |      | j                         d{    y7 w)zSend text to the Inworld WebSocket TTS service.

        Args:
            context_id: The context ID.
            text: The text to send.
        r   rj  Nry  r   ro  r8  )ry   r   r   rb  s       r-   
_send_textzInworldTTSService._send_text  s:      $TND""4::c?D4F4FGGGs   :AAAc                    K   i |d}| j                  t        j                  |      | j                         d{    y7 w)zrSend a flush to the Inworld WebSocket TTS service.

        Args:
            context_id: The context ID.
        )flush_contextrS  Nr}  ry   r   rb  s      r-   r  zInworldTTSService._send_flush  6      !#<""4::c?D4F4FGGG   8AA Ac                    K   i |d}| j                  t        j                  |      | j                         d{    y7 w)zzSend a close context to the Inworld WebSocket TTS service.

        Args:
            context_id: The context ID.
        )close_contextrS  Nr}  r  s      r-   r'  z%InworldTTSService._send_close_context#  r  r  c                  K   t        j                  |  d| d       	 | j                  r&| j                  j                  t        j
                  u r| j                          d{    	 | j                  |      sY| j                  |       d{    | j                          d{    t        |       | j                  |       d{    | j                  ||       d{    | j                  |       d{    d y7 7 7 i7 C7 +7 # t        $ r+}t        d|        t!        |       Y d}~yd}~ww xY w# t        $ r}t        d|        Y d}~yd}~ww xY ww)a  Generate TTS audio for the given text using the Inworld WebSocket TTS service.

        Args:
            text: The text to generate TTS audio for.
            context_id: Unique identifier for this TTS context.

        Returns:
            An asynchronous generator of frames.
        z: Generating WebSocket TTS []Nr[  r   r   )r   r   r  rG  r   CLOSEDr  r^  r_  start_ttfb_metricsr   r{  r~  r   r   r   r    )ry   r   r   r   s       r-   r   zInworldTTSService.run_tts,  sO     	v9$qAB	C??doo&;&;u||&Kmmo%%33J?33J???11333)Z@@,,Z888ooj$777224888 J! & @3878  )A!'EFF%<<
  	C%=aS#ABBB	Cs   E8AE $D%E *%D DD 'D('D DD )D*D DD E E8E D D D D D 	E"!EE E8EE 	E5E0+E80E55E8r)   )7rH   rI   rJ   rK   r'   r   rM   r   r[   rN   r   r   r   r
   r"   r   ro   r~   r   r   r   r  r   r  r  r!   r   r   r   r   r	   r   rL   r   r(  r,  r0  r2  r  r  r   r=   rA  r5  r>  rg  r9  r{  r~  r  r'  r%   r   r   rP   rQ   s   @r-   r   r     s    "H!!Si S@ #'#J%)"$(26KR(,15.2?C&*!KJ KJ 3-	KJ
 }KJ KJ c]KJ KJ D>KJ #+3-KJ '/w/G&HKJ %KJ -.KJ &d^KJ  ((;<KJ   $!KJ" #KJZd  ! !!+ !
-HSM 
- JXIbIb ?e ? ?$$DcN $tERUW\R\L]G^ $L(s (.S ..3 .3T +$K DcN J0>4S8j,%Hc %HNH3 Hc HHC HHC H  C#  C3  C>%QU+;V  C  Cr,   r   )@rK   rl  r   r   r   dataclassesr   r   typingr   r   r   r   r	   r
   r   r   r   r   r   rp  logurur   pipecatr   pipecat_versionr   pydanticr   pipecat.services.settingsr   r   r   r   websockets.asyncio.clientr   rI  websockets.protocolr   ModuleNotFoundErrorr   r   r   pipecat.frames.framesr   r   r   r   r   r   r   r   r    "pipecat.processors.frame_processorr!   pipecat.services.tts_servicer"   r#   r$   (pipecat.utils.tracing.service_decoratorsr%   r'   rS   r   r+   r,   r-   <module>r     s        (      .)*+
  _ _,F)
 
 
 > ] ] ? * * *4@J @Fj	C+ j	Cg  ,FLL;qc"#FLL_`
&qc*
++,s   $C C?2C::C?