
    qi                        d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZ ddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z# dd	l$m%Z%m&Z&m'Z'm(Z( dd
l)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1 	 ddl2Z2ddl3m4Z5 ddl6m7Z7 ed   Z<ddhZ=de.dee>   fdZ?de@de>fdZAdeee>ef   df   deee>eeBeCf   f      fdZD G d de      ZEe G d d e&             ZFe G d! d"e&             ZG	 	 d,d#ee>ef   d$eBd%e>d&eBdeHeee>eBf      e>eBf   f
d'ZI G d( d)e,      ZJ G d* d+e+      ZKy# e8$ r7Z9 ejt                  de9         ejt                  d        e;de9       dZ9[9ww xY w)-zElevenLabs text-to-speech service implementations.

This module provides WebSocket and HTTP-based TTS services using ElevenLabs API
with support for streaming audio, word timestamps, and voice customization.
    N)	dataclassfield)
AnyAsyncGeneratorClassVarDictListLiteralMappingOptionalTupleUnion)logger)	BaseModel)
CancelFrameEndFrame
ErrorFrameFrameInterruptionFrameLLMFullResponseEndFrame
StartFrameTTSAudioRawFrameTTSStartedFrameTTSStoppedFrame)FrameDirection)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)TextAggregationMode
TTSServiceWebsocketTTSService)Languageresolve_language)
traced_tts)connect)StatezException: zMIn order to use ElevenLabs, you need to `pip install pipecat-ai[elevenlabs]`.zMissing module: )	pcm_16000	pcm_22050	pcm_24000	pcm_44100eleven_flash_v2_5eleven_turbo_v2_5languagereturnc                 H   i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  dt         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  d i}tC        | |d!"      S )#zConvert a Language enum to ElevenLabs language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding ElevenLabs language code, or None if not supported.
    arbgcsdadeelenesfifilfrhihrhuiditjakomsnlnoplptrorusksvtatrukvizhT)use_base_code)"r#   ARBGCSDADEELENESFIFILFRHIHRHUIDITJAKOMSNLNOPLPTRORUSKSVTATRUKVIZHr$   )r.   LANGUAGE_MAPs     Q/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/elevenlabs/tts.pylanguage_to_elevenlabs_languagert   O   s   !T!T! 	T! 	T	!
 	T! 	T! 	T! 	T! 	T! 	e! 	T! 	T! 	T! 	T! 	T!  	T!!" 	T#!$ 	TTTTTTTTTTTTTTTA!LF Hl$GG    sample_ratec                 z    | xdk(  r yxdk(  r yxdk(  r yxdk(  r yd	k(  ry
t        j                  d|  d       y)zGet the appropriate output format string for a given sample rate.

    Args:
        sample_rate: The audio sample rate in Hz.

    Returns:
        The ElevenLabs output format string.
    i@  pcm_8000i>  r(   i"V  r)   i]  r*   iD  r+   z5ElevenLabsTTSService: No output format available for z sample rate)r   warning)rv   s    rs   output_format_from_sample_raterz   ~   sB     
NN
?}LY ru   settingsr   c                     g d}i }|D ]8  }t        | t              rt        | |d      n| j                  |      }|4|||<   : |xs dS )a  Build voice settings dictionary for ElevenLabs based on provided settings.

    Args:
        settings: Dictionary or settings containing voice settings parameters.

    Returns:
        Dictionary of voice settings or None if no valid settings are provided.
    )	stabilitysimilarity_booststyleuse_speaker_boostspeedN)
isinstancer   getattrget)r{   voice_setting_keysvoice_settingskeyvals        rs   build_elevenlabs_voice_settingsr      sf     bN! &,6x,MGHc4(S[S_S_`cSd 	 ?"%N3& !T!ru   c                   &    e Zd ZU dZeed<   eed<   y)PronunciationDictionaryLocatorzLocator for a pronunciation dictionary.

    Parameters:
        pronunciation_dictionary_id: The ID of the pronunciation dictionary.
        version_id: The version ID of the pronunciation dictionary.
    pronunciation_dictionary_id
version_idN)__name__
__module____qualname____doc__str__annotations__ ru   rs   r   r      s     "%$Oru   r   c                   h   e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	edz  ez  ed<    ed       Z
edz  ez  ed	<    ed
       Zedz  ez  ed<    ed       Zedz  ez  ed<    ed       Zed   dz  ez  ed<    eh d      Zeee      ed<    eh d      Zeee      ed<   y)ElevenLabsTTSSettingsa  Settings for ElevenLabsTTSService.

    Fields that appear in the WebSocket URL (``voice``, ``model``,
    ``language``) require a full reconnect when changed.  Fields that
    affect the voice character (``stability``, ``similarity_boost``,
    ``style``, ``use_speaker_boost``, ``speed``) can be applied by closing
    the current audio context so a new one is opened with updated settings.

    Parameters:
        stability: Voice stability control (0.0 to 1.0).
        similarity_boost: Similarity boost control (0.0 to 1.0).
        style: Style control for voice expression (0.0 to 1.0).
        use_speaker_boost: Whether to use speaker boost enhancement.
        speed: Voice speed control (0.7 to 1.2).
        apply_text_normalization: Text normalization mode ("auto", "on", "off").
    c                      t         S Nr   r   ru   rs   <lambda>zElevenLabsTTSSettings.<lambda>       	 ru   default_factoryNr}   c                      t         S r   r   r   ru   rs   r   zElevenLabsTTSSettings.<lambda>       y ru   r~   c                      t         S r   r   r   ru   rs   r   zElevenLabsTTSSettings.<lambda>       I ru   r   c                      t         S r   r   r   ru   rs   r   zElevenLabsTTSSettings.<lambda>   r   ru   r   c                      t         S r   r   r   ru   rs   r   zElevenLabsTTSSettings.<lambda>   r   ru   r   c                      t         S r   r   r   ru   rs   r   zElevenLabsTTSSettings.<lambda>       	 ru   autoonoffapply_text_normalization>   modelvoicer.   
URL_FIELDS>   r   r   r}   r~   r   VOICE_SETTINGS_FIELDS)r   r   r   r   r   r}   floatr   r   r~   r   r   boolr   r   r
   	frozensetr   r   r   r   r   ru   rs   r   r      s    " +0@Q*RIut|i'R16GX1YedlY.Y&+<M&NE54<)#N16GX1Ytd{Y.Y&+<M&NE54<)#NPU)Qg&9:TAIM 
 ,55S+TJ3(T 7@P78IcN3 ru   r   c                   >   e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	e
dz  ez  ed<    ed       Ze
dz  ez  ed	<    ed
       Ze
dz  ez  ed<    ed       Zedz  ez  ed<    ed       Ze
dz  ez  ed<    ed       Zed   dz  ez  ed<   y)ElevenLabsHttpTTSSettingsa  Settings for ElevenLabsHttpTTSService.

    Parameters:
        optimize_streaming_latency: Latency optimization level (0-4).
        stability: Voice stability control (0.0 to 1.0).
        similarity_boost: Similarity boost control (0.0 to 1.0).
        style: Style control for voice expression (0.0 to 1.0).
        use_speaker_boost: Whether to use speaker boost enhancement.
        speed: Voice speed control (0.25 to 4.0).
        apply_text_normalization: Text normalization mode ("auto", "on", "off").
    c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   s    W` ru   r   Noptimize_streaming_latencyc                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r}   c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r~   c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r   c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r   c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r   c                      t         S r   r   r   ru   rs   r   z"ElevenLabsHttpTTSSettings.<lambda>   r   ru   r   r   )r   r   r   r   r   r   intr   r   r}   r   r~   r   r   r   r   r   r
   r   ru   rs   r   r      s    
 :?O`9ad
Y 6a*/@Q*RIut|i'R16GX1YedlY.Y&+<M&NE54<)#N16GX1Ytd{Y.Y&+<M&NE54<)#NPU)Qg&9:TAIM ru   r   alignment_infocumulative_timepartial_wordpartial_word_start_timec                    | d   }| d   }t        |      t        |      k7  r2t        j                  dt        |       dt        |              g ||fS g }g }|}|r|nd}	t        |      D ]F  \  }
}|dk(  r*|s|j	                  |       |j	                  |	       d}d}	5|	|||
   dz  z   }	||z  }H t        t        ||            }|r|nd}|	|	nd	}|||fS )
a  Calculate word timestamps from character alignment information.

    Args:
        alignment_info: Character alignment data from ElevenLabs API.
        cumulative_time: Base time offset for this chunk.
        partial_word: Partial word carried over from previous chunk.
        partial_word_start_time: Start time of the partial word.

    Returns:
        Tuple of (word_times, new_partial_word, new_partial_word_start_time):
        - word_times: List of (word, timestamp) tuples for complete words
        - new_partial_word: Incomplete word at end of chunk (empty if chunk ends with space)
        - new_partial_word_start_time: Start time of the incomplete word
    charscharStartTimesMsz.calculate_word_times: length mismatch - chars=, times=N       @@        )lenr   error	enumerateappendlistzip)r   r   r   r   r   char_start_times_mswordsword_start_timescurrent_wordword_start_timeichar
word_timesnew_partial_wordnew_partial_word_start_times                  rs   calculate_word_timesr      s*   ( 7#E();<
5zS,--<SZLQTUhQiPjk	
 L"9:: EL1=-4OU# !43;\* ''8!"& &"15H5Kf5T"UD L!  c%!123J (4|5D5P/VY(*EFFru   c                        e Zd ZU dZeZeed<    G d de      Zdddddddddddddd	e	d
e
e	   de
e	   de	de
e   dede
e   de
e   de
ee      de
e   de
e   de
e   de
e   f fdZdefdZdede
e	   fdZd Zdedee	ef   f fdZdef fdZdef fd Zdef fd!Zd3d"e
e	   fd#Ze jB                  fde"d$e f fd%Z# fd&Z$ fd'Z%d( Z&d) Z'd* Z(d"e	fd+Z)d"e	fd,Z*d"e	fd-Z+d. Z,d/ Z-d0e	d"e	fd1Z.e/d0e	d"e	de0e"df   fd2       Z1 xZ2S )4ElevenLabsTTSServicea8  ElevenLabs WebSocket-based TTS service with word timestamps.

    Provides real-time text-to-speech using ElevenLabs' WebSocket streaming API.
    Supports word-level timestamps, audio context management, and various voice
    customization options including stability, similarity boost, and speed controls.
    	_settingsc                       e Zd ZU dZdZee   ed<   dZee	   ed<   dZ
ee	   ed<   dZee	   ed<   dZee   ed<   dZee	   ed<   d	Zee   ed
<   dZee   ed<   dZee   ed<   dZeed      ed<   dZeee      ed<   y) ElevenLabsTTSService.InputParamsa  Input parameters for ElevenLabs TTS configuration.

        .. deprecated:: 0.0.105
            Use ``settings=ElevenLabsTTSSettings(...)`` instead.

        Parameters:
            language: Language to use for synthesis.
            stability: Voice stability control (0.0 to 1.0).
            similarity_boost: Similarity boost control (0.0 to 1.0).
            style: Style control for voice expression (0.0 to 1.0).
            use_speaker_boost: Whether to use speaker boost enhancement.
            speed: Voice speed control (0.7 to 1.2).
            auto_mode: Whether to enable automatic mode optimization.
            enable_ssml_parsing: Whether to parse SSML tags in text.
            enable_logging: Whether to enable ElevenLabs logging.
            apply_text_normalization: Text normalization mode ("auto", "on", "off").
            pronunciation_dictionary_locators: List of pronunciation dictionary locators to use.
        Nr.   r}   r~   r   r   r   T	auto_modeenable_ssml_parsingenable_loggingr   r   !pronunciation_dictionary_locators)r   r   r   r   r.   r   r#   r   r}   r   r~   r   r   r   r   r   r   r   r   r
   r   r	   r   r   ru   rs   InputParamsr   B  s    	& (,(8$+%)	8E?),0(5/0!%x%,08D>0!%x%$(	8D>(.2Xd^2)--KO (73F+G"HO\`)8D9W4X+Y`ru   r   Nzwss://api.elevenlabs.ioT)voice_idr   urlrv   r   r   r   r   paramsr{   text_aggregation_modeaggregate_sentencesapi_keyr   r   r   rv   r   r   r   r   r   r{   r   r   c                L   t        ddddddddd	      }|t        dt         d       ||_        |t        dt         d       ||_        |	}|
Ct        dt                |s0|
j                   | j                  |
j                        |_        |
j                  |
j                  |_        |
j                  |
j                  |_        |
j                  |
j                  |_        |
j                  |
j                  |_	        |
j                  |
j                  |_
        |
j                  |
j                  }|
j                  |
j                  }|
j                  |
j                  }|
j                  |
j                  |_        ||
j                  }||j!                  |       t#        | H  d||dd	d	||d
| || _        || _        || _        || _        || _        d| _        | j3                         | _        || _        d| _        d| _        d| _        d| _        d| _         y)aX  Initialize the ElevenLabs TTS service.

        Args:
            api_key: ElevenLabs API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsTTSSettings(voice=...)`` instead.

            model: TTS model to use (e.g., "eleven_turbo_v2_5").

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsTTSSettings(model=...)`` instead.

            url: WebSocket URL for ElevenLabs TTS API.
            sample_rate: Audio sample rate. If None, uses default.
            auto_mode: Whether to enable automatic mode optimization.
            enable_ssml_parsing: Whether to parse SSML tags in text.
            enable_logging: Whether to enable ElevenLabs server-side logging.
            pronunciation_dictionary_locators: List of pronunciation dictionary
                locators to use.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            text_aggregation_mode: How to aggregate incoming text before synthesis.
            aggregate_sentences: Whether to aggregate sentences within the TTSService.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            **kwargs: Additional arguments passed to the parent service.
        r-   N)	r   r   r.   r}   r~   r   r   r   r   r   r   r   r   FT)r   r   push_text_framespush_stop_framespause_frame_processingrv   r{   r   r   r   r   )!r   r   r   r   r.   language_to_service_languager}   r~   r   r   r   r   r   r   r   r   apply_updatesuper__init___api_key_url
_auto_mode_enable_ssml_parsing_enable_logging_output_format_set_voice_settings_voice_settings"_pronunciation_dictionary_locators_cumulative_time_partial_word_partial_word_start_time_receive_task_keepalive_task)selfr   r   r   r   rv   r   r   r   r   r   r{   r   r   kwargsdefault_settingsr   	__class__s                    rs   r   zElevenLabsTTSService.__init__b  sU   P 1%!"%)

 ":/DgN%-""7,A7K%*" .O*"8-BC??.040Q0QRXRaRa0b$-##/171A1A$.**68>8O8O$5<<+-3\\$*++79?9Q9Q$6<<+-3\\$*##/ & 0 0I--9*0*D*D'((4%+%:%:N22>@F@_@_$=5=9?9a9a6 ))(3 		
"7 3"!#'#%		
 		
  	 $$7!- #7792T/ !(+% "#ru   r/   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as ElevenLabs service supports metrics generation.
        Tr   r   s    rs   can_generate_metricsz)ElevenLabsTTSService.can_generate_metrics       ru   r.   c                     t        |      S )zConvert a Language enum to ElevenLabs language format.

        Args:
            language: The language to convert.

        Returns:
            The ElevenLabs-specific language code, or None if not supported.
        rt   r   r.   s     rs   r   z1ElevenLabsTTSService.language_to_service_language       /x88ru   c                 ,    t        | j                        S r   r   r   r  s    rs   r   z(ElevenLabsTTSService._set_voice_settings      .t~~>>ru   deltac                 b  K   t         |   |       d{   }|s|S | j                         | _        t	        |j                         t        j                  z        }t	        |j                         t        j                  z        }|rit        j                  d|j                         t        j                  z   d       | j                          d{    | j                          d{    nl|rjt        j                  d|j                         t        j                  z   d       | j                         }|r |D ]  }| j                  |       d{     |sCt        j                  t        j                  z  }| j                  |j                         |z
         |S 7 7 7 7 Tw)a  Apply a settings delta, reconnecting as needed.

        Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS``
        sets on :class:`ElevenLabsTTSSettings` to decide whether to
        reconnect the WebSocket or close the current audio context.

        Args:
            delta: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        NzURL-level setting changed (z), reconnecting WebSocketzVoice settings changed (z+), closing current context to apply changes)r   _update_settingsr   r   r   keysr   r   r   r   debug_disconnect_connectget_audio_contexts_close_context _warn_unhandled_updated_settings)	r   r  changedurl_changedvoice_settings_changedaudio_contextsctx_idhandledr  s	           rs   r  z%ElevenLabsTTSService._update_settings  s     077N  $7797<<>,A,L,LLM!%glln7L7b7b&b!cLL-glln?T?_?_._-` a) * ""$$$--/!!#LL*7<<><Q<g<g+g*h i; < "446N, 6F--f5556  ,669N9d9ddG11',,.72JKE 8  %! 6sH   F/F&B>F/F)F/-F+.A*F/F-AF/)F/+F/-F/framec                    K   t         |   |       d{    t        | j                        | _        | j                          d{    y7 77 w)zStart the ElevenLabs TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r   startrz   rv   r   r  r   r  r  s     rs   r!  zElevenLabsTTSService.startE  sF      gmE"""<T=M=MNmmo 	#s!   AA1AA	AAc                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zZStop the ElevenLabs TTS service.

        Args:
            frame: The end frame.
        N)r   stopr  r"  s     rs   r$  zElevenLabsTTSService.stopO  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z_Cancel the ElevenLabs TTS service.

        Args:
            frame: The cancel frame.
        N)r   cancelr  r"  s     rs   r'  zElevenLabsTTSService.cancelX  s6      gnU###    	$ r%  
context_idc                    K   |xs | j                         }|r| j                  syt        j                  |  d       |dd}| j                  j	                  t        j                  |             d{    y7 w)zFlush any pending audio and finalize the current context.

        Args:
            context_id: The specific context to flush. If None, falls back to the
                currently active context.
        Nz: flushing audioT)r(  flush)get_active_audio_context_id
_websocketr   tracesendjsondumps)r   r(  flush_idmsgs       rs   flush_audioz ElevenLabsTTSService.flush_audioa  se      C!A!A!Ctv-./%5oo""4::c?333s   A2A<4A:5A<	directionc                    K   t         |   ||       d{    t        |t        t        f      r;t        |t              r*| j                  dg| j                                d{    yyy7 V7 	w)Push a frame and handle state changes.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        NResetr   )r   
push_framer   r   r   add_word_timestampsr+  r   r  r4  r  s      rs   r9  zElevenLabsTTSService.push_frameo  sp      g 	222eo/@AB%1..~t?_?_?abbb 2 C 	3 cs"   A2A.AA2&A0'A20A2c                 |  K   t         |           d {    | j                          d {    | j                  r;| j                  s/| j                  | j                  | j                              | _        | j                  r2| j                  s%| j                  | j                               | _        y y y 7 7 wr   )
r   r  _connect_websocketr,  r   create_task_receive_task_handler_report_errorr   _keepalive_task_handlerr   r  s    rs   r  zElevenLabsTTSService._connect{  s     g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD??4#7#7#'#3#3D4P4P4R#SD  $8? 	!'s    B<B8B<B:BB<:B<c                 R  K   t         |           d {    | j                  r*| j                  | j                         d {    d | _        | j                  r*| j                  | j                         d {    d | _        | j                          d {    y 7 7 \7 (7 wr   )r   r  r   cancel_taskr   _disconnect_websocketrB  s    rs   r  z ElevenLabsTTSService._disconnect  s     g!###""4#5#5666!%D""4#7#7888#'D ((*** 	$ 7 9 	+sE   B'B.B'B!5B':B#;B'B%B'!B'#B'%B'c                   K   	 | j                   r'| j                   j                  t        j                  u ry t	        j
                  d       | j                  j                  }| j                  j                  }| j                  }| j                   d| d| d| dt        | j                        j                          	}| j                  r|d| j                   z  }| j                  r|d| j                   z  }| j                  j                   |d| j                  j                    z  }| j                  j"                  }|t$        v r#|!|d	| z  }t	        j
                  d
|        n9|7t	        j&                  d| ddj)                  t+        t$                            t-        |dd| j.                  i       d {   | _         | j1                  d       d {    y 7 #7 # t2        $ rL}d | _         | j5                  d| |       d {  7   | j1                  d|        d {  7   Y d }~y d }~ww xY ww)NzConnecting to ElevenLabs/v1/text-to-speech/z/multi-stream-input?model_id=z&output_format=z&auto_mode=z&enable_ssml_parsing=z&enable_logging=z&apply_text_normalization=z&language_code=Using language code: Language code [I] not applied. Language codes can only be used with multilingual models: , i   
xi-api-key)max_sizeadditional_headerson_connectedUnknown error occurred: 	error_msg	exceptionon_connection_error)r,  stater'   OPENr   r  r   r   r   r   r   r   r   lowerr   r   r   r.   ELEVENLABS_MULTILINGUAL_MODELSry   joinsortedwebsocket_connectr   _call_event_handler	Exception
push_error)r   r   r   output_formatr   r.   es          rs   r=  z'ElevenLabsTTSService._connect_websocket  st    '	J4??#8#8EJJ#FLL34~~++HNN((E //MYYK28*<YZ_Y``op}o~  J  KN  OS  O^  O^  K_  Ke  Ke  Kg  Jh  iC((.t/H/H.IJJ##)$*>*>)?@@~~66B3DNN4[4[3\]] ~~..H668;O
334XJ?@%%hZ/xy}  zC  zC  DJ  Ki  Dj  zk  yl  m
 %6.LRVR_R_C`% DO **>:::	 ; 	J"DO//.Fqc,JVW/XXX**+@QCIII	Jsu   I2G, IFG, G(G, "G*#G, 'I(G, *G, ,	I5 H<HH<1H42H<7I<IIc                   K   	 | j                          d {    | j                  rt        j                  d       | j                  j	                  t        j                  ddi             d {    | j                  j                          d {    t        j                  d       | j                          d {    d | _        | j                  d       d {    y 7 7 v7 V# t        $ r)}| j                  d| |       d {  7   Y d }~ld }~ww xY w7 `7 B# | j                          d {  7   d | _        | j                  d       d {  7   w xY ww)NzDisconnecting from ElevenLabsclose_socketTzDisconnected from ElevenLabsrP  rQ  on_disconnected)stop_all_metricsr,  r   r  r.  r/  r0  closer]  r^  remove_active_audio_contextr\  )r   r`  s     rs   rE  z*ElevenLabsTTSService._disconnect_websocket  s%    	>'')))<=oo**4::~t6L+MNNNoo++---;< 22444"DO**+<=== * O- 	Y//.Fqc,JVW/XXX	Y 5= 22444"DO**+<===s   E#C+ C%AC+ 0C'1!C+ C)C+ ,E#?D  E#D" E#%C+ 'C+ )C+ +	D4DDDD$ DD$  E#"E#$E 8D;9 E EE  E#c                 H    | j                   r| j                   S t        d      )NzWebsocket not connected)r,  r]  r  s    rs   _get_websocketz#ElevenLabsTTSService._get_websocket  s    ????"122ru   c                 h  K   |r`| j                   rTt        j                  |  d|        	 | j                   j                  t	        j
                  |dd             d {    d| _        d| _	        d| _
        y 7 # t        $ r)}| j                  d| |       d {  7   Y d }~Ed }~ww xY ww)Nz: Closing context T)r(  close_contextrP  rQ  r   r   )r,  r   r-  r.  r/  r0  r]  r^  r   r   r   )r   r(  r`  s      rs   r  z#ElevenLabsTTSService._close_context  s      $//LLD6!3J<@A] oo**JJj4PQ  
 !$(+%  ]oo2J1#0NZ[o\\\]sL   )B24A=  A;!A= %B2;A= =	B/B*B" B*%B2*B//B2c                 B   K   | j                  |       d{    y7 w)z9Close the ElevenLabs context when the bot is interrupted.Nr  r   r(  s     rs   on_audio_context_interruptedz1ElevenLabsTTSService.on_audio_context_interrupted  s     !!*---   c                 B   K   | j                  |       d{    y7 w)a
  Close the ElevenLabs context after all audio has been played.

        ElevenLabs does not send a server-side signal when a context is
        exhausted, so Pipecat must explicitly close it with
        ``close_context: True`` to free server-side resources.
        Nrl  rm  s     rs   on_audio_context_completedz/ElevenLabsTTSService.on_audio_context_completed  s      !!*---ro  c                 T  K   | j                         2 3 d{   }t        j                  |      }|j                  d      }|j                  d      du rt	        j
                  d|        [| j                  |      s^| j                         |k(  r2t	        j                  d|        | j                  |       d{    nt	        j                  d|        |j                  d      rKt        j                  |d         }t        || j                  d	|
      }| j                  ||       d{    |j                  d      s9|d   }t        || j                   | j"                  | j$                        \  }| _        | _        |s|| j'                  ||       d{    |j                  dg       }|j                  dg       }	|r)|	r'|d   |	d   z   }
|
dz  }| xj                   |z  c_        |d   d	   | _        t	        j(                  d       	7 7 a7 7 6 yw)z3Handle incoming WebSocket messages from ElevenLabs.N	contextIdisFinalTz#Received final message for context z4Received a delayed message, recreating the context: z+Ignoring message from unavailable context: audio   r(  	alignmentr   charDurationsMsr   za_receive_messages: using fallback timing method - consider investigating alignment data structure)rh  r/  loadsr   r   r-  audio_context_availabler+  r  create_audio_contextbase64	b64decoder   rv   append_to_audio_contextr   r   r   r   r:  ry   )r   messager2  received_ctx_idru  r  rx  r   r   char_durations_mschunk_end_time_mschunk_end_time_secondss               rs   _receive_messagesz&ElevenLabsTTSService._receive_messages  s'    !002 ;	 ;	'**W%C!ggk2O
 wwy!T)B?BSTU //@335HLLNN_` 33ODDD
 LL#NN_!`awww((W6(0@0@!P_`22?EJJJww{#,	(!--**55	 N
D.0M 22:OOO +4--8JB*O'(16G(L%*/@,?,CFWXZF[,[)1BV1K.--1GG- 1;2q0A-s;	$ E K PW 3sk   H(H&HH&B#H(>H?A5H(4H"5H(AH(H($H$%A7H(H&H("H($H(&H(c                 ,  K   d}	 t        j                  |       d{    	 | j                  r| j                  j                  t        j
                  u r| j                         }|rd|d}t        j                  d|        nddi}t        j                  d       | j                  j                  t        j                  |             d{    7 7 # t        j                  $ r$}t        j                  |  d|        Y d}~yd}~ww xY ww)	zBSend periodic keepalive messages to maintain WebSocket connection.
   Nr   textr(  zSending keepalive for context r  z!Sending keepalive without contextz keepalive error: )asynciosleepr,  rU  r'   rV  r+  r   r-  r.  r/  r0  
websocketsConnectionClosedry   )r   KEEPALIVE_SLEEPr(  keepalive_messager`  s        rs   rA  z,ElevenLabsTTSService._keepalive_task_handler3  s     --000??t'<'<

'J!%!A!A!CJ! %'*4-) 'Ej\%RS
 .4RL)%HI//..tzz:K/LMMM% 0" N.. $'9!=>sK   DCDB,C CC DC D-DDDDr  c                    K   | j                   r?|r<||d}| j                   j                  t        j                  |             d{    yyy7 w)z)Send text to the WebSocket for synthesis.r  N)r,  r.  r/  r0  )r   r  r(  r2  s       rs   
_send_textzElevenLabsTTSService._send_textM  sC     ??zz:C//&&tzz#777  *?7s   AAAAc                F  K   t        j                  |  d| d       	 | j                  r&| j                  j                  t        j
                  u r| j                          d{    	 | j                  |      s| j                  |       d{    | j                          d{    t        |       d| _        d| _        d| _        d|d	}| j                  r| j                  |d
<   | j                  r+| j                  D cg c]  }|j!                          c}|d<   | j                  j#                  t%        j&                  |             d{    t        j(                  d|        | j+                  ||       d{    | j-                  |       d{    d y7 J7 "7 c c}w 7 c7 37 # t.        $ r+}t1        |       t3        d|        Y d}~yd}~ww xY w# t.        $ r}t3        d|        Y d}~yd}~ww xY ww)a(  Generate speech from text using ElevenLabs' streaming WebSocket API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []Nrw  r   r   r   r   r  r   r   zCreated new context rP  r   )r   r  r,  rU  r'   CLOSEDr  r|  r}  start_ttfb_metricsr   r   r   r   r   r   
model_dumpr.  r/  r0  r-  r  start_tts_usage_metricsr]  r   r   )r   r  r(  r2  locatorr`  s         rs   run_ttszElevenLabsTTSService.run_ttsS  s     	v/vQ78!	C??doo&;&;u||&Kmmo%%33J?33J???11333)Z@@,-D))+D&47D1 $'jAC++040D0D,->> ,0+R+RD ' $..0D?@ //..tzz#???LL#7
|!DEoodJ777224888
 J; & @3D @ 88 %<< )A!'EFF
  	C%=aS#ABBB	Cs   H!AG9 $F.%G9 *%G F1G 'F4(A"G 
F7!7G F<1G 
F>G #G $G (G9 -H!.G9 1G 4G 7G >G  G 	G6!G1,G9 0H!1G66G9 9	HHH!HH!r   )3r   r   r   r   r   Settingsr   r   r   r   r   r   r   r	   r   r    r   r  r#   r   r   r   dictr   r  r   r!  r   r$  r   r'  r3  r   
DOWNSTREAMr   r9  r  r  r=  rE  rh  r  rn  rq  r  rA  r  r%   r   r  __classcell__r  s   @rs   r   r   7  sE    %H$$ai aH #'#,%).2)-\`(,48?C.2Z$ Z$ 3-	Z$
 }Z$ Z$ c]Z$ Z$ &d^Z$ !Z$ ,4D9W4X+YZ$ %Z$ 01Z$  ((;<Z$ &d^Z$xd 	9X 	9(3- 	9?/K /DcN /b ! !!+ !4HSM 4 JXIbIb 
ce 
c 
c	T+(JT> 3
,s ,*.S ..3 .=~48S 8c 8 -C# -C3 -C>%QU+;V -C -Cru   r   c                       e Zd ZU dZeZeed<    G d de      Zdddddddddd	de	d	e
e	   d
ej                  de
e	   de	de
e   de
ee      de
e   de
e   de
e   de
e   f fdZdede
e	   fdZdefdZd Zdedee	ef   f fdZd Zdef fdZej>                  fde def fdZ!d e"e	ef   dee#e	e$f      fd!Z%e&d"e	d#e	de'e df   fd$       Z( xZ)S )%ElevenLabsHttpTTSServicea  ElevenLabs HTTP-based TTS service with word timestamps.

    Provides text-to-speech using ElevenLabs' HTTP streaming API for simpler,
    non-WebSocket integration. Suitable for use cases where streaming WebSocket
    connection is not required or desired.
    r   c                       e Zd ZU dZdZee   ed<   dZee	   ed<   dZ
ee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed	<   dZeed
      ed<   dZeee      ed<   y)$ElevenLabsHttpTTSService.InputParamsa5  Input parameters for ElevenLabs HTTP TTS configuration.

        .. deprecated:: 0.0.105
            Use ``settings=ElevenLabsHttpTTSSettings(...)`` instead.

        Parameters:
            language: Language to use for synthesis.
            optimize_streaming_latency: Latency optimization level (0-4).
            stability: Voice stability control (0.0 to 1.0).
            similarity_boost: Similarity boost control (0.0 to 1.0).
            style: Style control for voice expression (0.0 to 1.0).
            use_speaker_boost: Whether to use speaker boost enhancement.
            speed: Voice speed control (0.25 to 4.0).
            apply_text_normalization: Text normalization mode ("auto", "on", "off").
            pronunciation_dictionary_locators: List of pronunciation dictionary locators to use.
        Nr.   r   r}   r~   r   r   r   r   r   r   )r   r   r   r   r.   r   r#   r   r   r   r}   r   r~   r   r   r   r   r   r
   r   r	   r   r   ru   rs   r   r    s    	" (,(8$+48"HSM8%)	8E?),0(5/0!%x%,08D>0!%x%KO (73F+G"HO\`)8D9W4X+Y`ru   r   Nzhttps://api.elevenlabs.io)	r   r   base_urlrv   r   r   r{   r   r   r   r   aiohttp_sessionr   r  rv   r   r   r{   r   r   c                   t        dddddddddd
      }|t        dt         d       ||_        |t        dt         d       ||_        |}|t        dt                |	s|j                   | j                  |j                        |_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_	        |j                  |j                  |_
        |j                  |j                  |_        |j                  |j                  |_        ||j                  }|	|j                  |	       t        | @  d|
|dd	d	||d
| || _        || _        || _        d| _        | j+                         | _        || _        d| _        d| _        d| _        d| _        y)a  Initialize the ElevenLabs HTTP TTS service.

        Args:
            api_key: ElevenLabs API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsHttpTTSSettings(voice=...)`` instead.

            aiohttp_session: aiohttp ClientSession for HTTP requests.
            model: TTS model to use (e.g., "eleven_turbo_v2_5").

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsHttpTTSSettings(model=...)`` instead.

            base_url: Base URL for ElevenLabs HTTP API.
            sample_rate: Audio sample rate. If None, uses default.
            pronunciation_dictionary_locators: List of pronunciation dictionary
                locators to use.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsHttpTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            text_aggregation_mode: How to aggregate incoming text before synthesis.
            aggregate_sentences: Whether to aggregate sentences within the TTSService.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            **kwargs: Additional arguments passed to the parent service.
        r-   N)
r   r   r.   r   r}   r~   r   r   r   r   r   r   r   r   FT)r   r   r   r   push_start_framerv   r{   r   r   r   r   )r   r   r   r   r.   r   r   r}   r~   r   r   r   r   r   r   r   r   r   	_base_url_sessionr   r   r   r   r   _previous_textr   r   )r   r   r   r  r   r  rv   r   r   r{   r   r   r  r  r   r  s                  rs   r   z!ElevenLabsHttpTTSService.__init__  s   f 5%'+!"%)
 ":/H'R%-""7,EwO%*" .O*"8-FG??.040Q0QRXRaRa0b$-44@BHBcBc$?##/171A1A$.**68>8O8O$5<<+-3\\$*++79?9Q9Q$6<<+-3\\$*22>@F@_@_$=5=9?9a9a6 ))(3 		
"7 3"!!#%		
 		
  !' #7792T/ !" !  (+%ru   r.   r/   c                     t        |      S )zConvert pipecat Language to ElevenLabs language code.

        Args:
            language: The language to convert.

        Returns:
            The ElevenLabs-specific language code, or None if not supported.
        r	  r
  s     rs   r   z5ElevenLabsHttpTTSService.language_to_service_language,  r  ru   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as ElevenLabs HTTP service supports metrics generation.
        Tr   r  s    rs   r  z-ElevenLabsHttpTTSService.can_generate_metrics7  r  ru   c                 ,    t        | j                        S r   r  r  s    rs   r   z,ElevenLabsHttpTTSService._set_voice_settings?  r  ru   r  c                 p   K   t         |   |       d{   }|r| j                         | _        |S 7 w)zApply a settings delta and rebuild voice settings.

        Args:
            delta: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r   r  r   r   )r   r  r  r  s      rs   r  z)ElevenLabsHttpTTSService._update_settingsB  s9      077#'#;#;#=D  8s   646c                 l    d| _         d| _        d| _        d| _        t	        j
                  |  d       y)zReset internal state variables.r   r   r   z: Reset internal stateN)r   r  r   r   r   r  r  s    rs   _reset_statez%ElevenLabsHttpTTSService._reset_stateP  s6     ! (+%v345ru   r  c                    K   t         |   |       d{    t        | j                        | _        | j                          y7 /w)zStart the ElevenLabs HTTP TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r   r!  rz   rv   r   r  r"  s     rs   r!  zElevenLabsHttpTTSService.startX  s?      gmE"""<T=M=MN 	#s   AA0Ar4  c                   K   t         |   ||       d{    t        |t        t        f      r<| j                          t        |t              r| j                  dg       d{    yyt        |t              rd| _        yy7 o7 !w)r6  Nr7  r   )	r   r9  r   r   r   r  r:  r   r  r;  s      rs   r9  z#ElevenLabsHttpTTSService.push_frameb  s      g 	222e/AB%1..~>>> 2 67"$D 8 	3 ?s"   BBAB'B	( B	Br   c                 B   |j                  dg       }|j                  dg       }|r|rt        |      t        |      k7  r/t        j                  dt        |       dt        |              g S g }g }| j                  }| j                  r| j
                  nd}t        |      D ]M  \  }}	|	dk(  r*|s|j                  |       |j                  |       d}d}5|| j                  ||   z   }||	z  }O |r|nd| _        ||nd| _        t        t        ||            }
|
S )	a+  Calculate word timing from character alignment data.

        This method handles partial words that may span across multiple alignment chunks.

        Args:
            alignment_info: Character timing data from ElevenLabs.

        Returns:
            List of (word, timestamp) pairs for complete words in this chunk.

        Example input data::

            {
                "characters": [" ", "H", "e", "l", "l", "o", " ", "w", "o", "r", "l", "d"],
                "character_start_times_seconds": [0.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
                "character_end_times_seconds": [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
            }

        Would produce word times (with cumulative_time=0)::

            [("Hello", 0.1), ("world", 0.5)]
        
characterscharacter_start_times_secondszInvalid alignment data: chars=r   Nr   r   r   )r   r   r   ry   r   r   r   r   r   r   r   )r   r   r   char_start_timesr   r   r   r   r   r   r   s              rs   r   z-ElevenLabsHttpTTSService.calculate_word_timesu  sA   . ""<4)--.MrR,E
cBR>S0SNN0UHSIYEZD[\ I ));?;M;M$77SW ' 	%GAts{LL.$++O<#%L&*O"*&*&;&;>Nq>Q&QO$	% .:\r;J;V\_% #e%567
ru   r  r(  c           
     
  K   t        j                  |  d| d       | j                   d| j                  j                   d}|| j                  j
                  d}| j                  r| j                  |d<   | j                  r| j                  |d<   | j                  r+| j                  D cg c]  }|j                          c}|d<   | j                  j                  | j                  j                  |d
<   | j                  j                  }| j                  j
                  t        v r |r||d<   t        j                  d|        n9|r7t        j                  d| ddj                  t        t                            | j                   dd}d| j"                  i}| j                  j$                  | j                  j$                  |d<   	 | j&                  j)                  ||||      4 d	{   }	|	j*                  dk7  rT|	j-                          d	{   }
t/        d|
        	 d	d	d	      d	{    | j1                          d	{    y	| j3                  |       d	{    d}|	j4                  2 3 d	{   }|j7                  d      j9                         }|s+	 t;        j<                  |      }|rPd|v rL| j1                          d	{    t?        j@                  |d         }tC        || jD                  d|       |r]d|v rY|d   }|rR|jG                  dg       }|r|d   }tI        ||      }| jK                  |      }|r| jM                  ||       d	{    c c}w 7 7 c7 C7 .7 7 7 7 # t:        jN                  $ r#}t        j                  d |        Y d	}~?d	}~wtP        $ r}t/        d!|        Y d	}~cd	}~ww xY w6 | jR                  rB| jR                  | jT                  fg}| jM                  ||       d	{  7   d"| _)        d#| _*        |dkD  r| xjV                  |z  c_+        | j                  r| xj                  d$|z   z  c_        n|| _        d	d	d	      d	{  7   n# 1 d	{  7  sw Y   nxY wn(# tP        $ r}t/        d!|        Y d	}~nd	}~ww xY w| j1                          d	{  7   y	# | j1                          d	{  7   w xY ww)%a  Generate speech from text using ElevenLabs streaming API with timestamps.

        Makes a request to the ElevenLabs API to generate audio and timing data.
        Tracks the duration of each utterance to ensure correct sequencing.
        Includes previous text as context for better prosody continuity.

        Args:
            text: Text to convert to speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio and control frames containing the synthesized speech.
        r  r  rG  z/stream/with-timestamps)r  model_idprevious_textr   r   Nr   language_coderH  rI  rJ  rK  zapplication/json)rL  zContent-Typer_  r   )r/  headersr      zElevenLabs API error: r  r   zutf-8audio_base64rv  rw  rx  character_end_times_secondsrz  z"Failed to parse JSON from stream: rP  r   r   r   ),r   r  r  r   r   r   r  r   r   r  r   r.   rX  ry   rY  rZ  r   r   r   r  poststatusr  r   stop_ttfb_metricsr  contentdecodestripr/  r{  r~  r  r   rv   r   maxr   r:  JSONDecodeErrorr]  r   r   r   )r   r  r(  r   payloadr  r.   r  r   response
error_textutterance_durationlineline_strdataru  rx  char_end_timeschunk_end_timer   r`  final_word_times                         rs   r  z ElevenLabsHttpTTSService.run_tts  s     	v/vQ78   3DNN4H4H3II`a ,,I
 '+':':GO$(,(<(<G$%22484[4[<)0""$<G78 >>22>26..2Y2YG./>>**>>#AAh'/GO$LL0
;<NN!(+tuyu~u~  @F  Ge  @f  vg  uh  i
 --.
 T00
 >>44@37>>3\3\F/0K	+}}))'76 *  E/ E/??c)'/}}!6J$-CJ<+PQQE/ E/T ((***E 224888 &'""*"2"2 %! %!$#{{7399;H#  !#zz(3  Nd$:"&"8"8":::$*$4$4T.5I$JE"2 %t'7'7z# 
  K4$7(,[(9I(1:?\^`1a#15CB5GN9<=OQ_9`$6 .2-F-Fy-Q
#-*.*B*B:z*Z$Z$ZQ<:E/ "7	E/T +E 9%! ;( %[// !)KA3'OP $ !(1I!/MNN !G #3R %%(,(:(:D<Y<Y'Z&[O22?JOOO)+D&47D1 &)))-??) &&''3:5'*.D'KE/ E/ E/ E/ E/N  	C%=aS#ABBB	C ((***$((***s  B+T-M6DT#R+ 4M;5R+ 8#RM>R3R+ >N?R+ TNTR0N1RO.N
O.#R/.NNBN0N1N5R6T;R+ >RR+ TR
O.NNO+$O<RO+O& R&O++>R)P,*ARR+ RR+ R'RR'#R+ *S- +	S4SS- SS- T&S)'T-TTTT)*r   r   r   r   r   r  r   r   r   r   r   aiohttpClientSessionr   r	   r   r    r   r   r#   r   r  r   r   r  r   r  r  r   r!  r   r  r   r9  r   r   r   r   r%   r   r  r  r  s   @rs   r  r    s    )H((ai a@ #'#3%)\`(,8<?C.2, , 3-	,
 !.., }, , c], ,4D9W4X+Y, %, 45,  ((;<, &d^,B	9X 	9(3- 	9d ?K DcN 6  JXIbIb %e % %&;7383D ;eTWY^T^N_I` ;z G+# G+3 G+>%QU+;V G+ G+ru   r  )r   r   )Lr   r  r~  r/  dataclassesr   r   typingr   r   r   r   r	   r
   r   r   r   r   r  logurur   pydanticr   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer    r!   r"   pipecat.transcriptions.languager#   r$   (pipecat.utils.tracing.service_decoratorsr%   r  websockets.asyncio.clientr&   r[  websockets.protocolr'   ModuleNotFoundErrorr`  r   r]  ElevenLabsOutputFormatrX  r   rt   r   rz   r   r   r   r   r   r   tupler   r   r  r   ru   rs   <module>r     s
      (        > _ _ 
 G ?,F) !!ST  " ,Hh ,H8C= ,H^  4"DcNM12"d3eTk**+,"0	Y 	 "K " "J   6 %(	9GCH%9G9G 9G #	9G
 4c5j!"C./9GxJ	C. J	CZv+z v+O  ,FLL;qc"#FLL`a
&qc*
++,s   D3 3E/82E**E/