
    qi                     R   d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZmZ ddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZ dd	lmZ dd
lm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 	 ddl1m2Z3 ddl4m5Z5 de+dee:   fdZ; G d de:e	      Z<e G d de!             Z=e G d de!             Z> G d de(      Z?d e@de:fd!ZA G d" d#e)      ZBy# e6$ r7Z7 ejp                  de7         ejp                  d        e9de7       dZ7[7ww xY w)$zElevenLabs speech-to-text service implementation.

This module provides integration with ElevenLabs' Speech-to-Text API for transcription
using segmented audio processing. The service uploads audio files and receives
transcription results directly.
    N)	dataclassfield)Enum)AnyAsyncGeneratorOptional)logger)	BaseModel)	CancelFrameEndFrame
ErrorFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)	NOT_GIVENSTTSettings	_NotGiven_warn_deprecated_param)ELEVENLABS_REALTIME_TTFS_P99ELEVENLABS_TTFS_P99)SegmentedSTTServiceWebsocketSTTService)Languageresolve_language)time_now_iso8601)
traced_stt)connect)StatezException: zZIn order to use ElevenLabs Realtime STT, you need to `pip install pipecat-ai[elevenlabs]`.zMissing module: languagereturnc                 @   i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  di t         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  d t         jB                  d!t         jD                  d"i t         jF                  d#t         jH                  d$t         jJ                  d%t         jL                  d&t         jN                  d't         jP                  d(t         jR                  d)t         jT                  d*t         jV                  d+t         jX                  d,t         jZ                  d-t         j\                  d.t         j^                  d/t         j`                  d0t         jb                  d1t         jd                  d2t         jf                  d3i t         jh                  d4t         jj                  d5t         jl                  d6t         jn                  d7t         jp                  d8t         jr                  d9t         jt                  d:t         jv                  d;t         jx                  d<t         jz                  d=t         j|                  d>t         j~                  d?t         j                  d@t         j                  dAt         j                  dBt         j                  dCt         j                  dDi t         j                  dEt         j                  dFt         j                  dGt         j                  dHt         j                  dIt         j                  dJt         j                  dKt         j                  dLt         j                  dMt         j                  dNt         j                  dOt         j                  dPt         j                  dQt         j                  dRt         j                  dSt         j                  dTt         j                  dUt         j                  dVt         j                  dWt         j                  dXt         j                  dYt         j                  dZt         j                  d[t         j                  d\t         j                  d]t         j                  d^t         j                  d_t         j                  d`t         j                  dat         j                  dbt         j                  dci}t        | |dde      S )fa"  Convert a Language enum to ElevenLabs language code.

    Source:
        https://elevenlabs.io/docs/capabilities/speech-to-text

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding ElevenLabs language code, or None if not supported.
    aframharahyeasmastazebelbenbosbulmyayuecatcebnyahrvcesdannldengestfilfinfrafulglglugkatdeuellgujhauhebhinhunisliboindgleitajpnjavkeakankazkhmkorkurkirlaolavlinlitluoltzmkdmsamalmltzhomrimarmonnepnsonorocioripusfaspolporpanronrussrpsnasndslkslvsomspaswaswetamtgktelthaturukrumburduzbviecymwolxhozulF)use_base_code)er   AFAMARHYASASTAZBEBNBSBGMYYUECACEBNYHRCSDANLENETFILFIFRFFGLLGKADEELGUHAHEHIHUISIGIDGAITJAJVKEAKNKKKMKOKUKYLOLVLNLTLUOLBMKMSMLMTZHMIMRMNNENSONOOCORPSFAPLPTPARORUSRSNSDSKSLSOESSWSVTATGTETHTRUKUMBURUZVICYWOXHZUr   )r#   LANGUAGE_MAPs     Q/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/elevenlabs/stt.pylanguage_to_elevenlabs_languager   8   s   dUdUd 	Ud 	U	d
 	Ud 	ed 	Ud 	Ud 	Ud 	Ud 	Ud 	Ud 	ed 	Ud 	ed  	U!d" 	U#d$ 	U%d& 	U'd( 	U)d* 	U+d, 	U-d. 	e/d0 	U1d2 	U3d4 	U5d6 	U7d8 	U9d: 	U;d< 	U=d> 	U?d@ 	UAdB 	UCdD 	UEdF 	UGdH 	UIdJ 	UKdL 	UMdN 	UOdP 	UQdR 	USdT 	UUdV 	UWdX 	eYdZ 	U[d\ 	U]d^ 	U_d` 	Uadb 	Ucdd 	Uedf 	Ugdh 	Uidj 	Ukdl 	Umdn 	eodp 	Uqdr 	Usdt 	Uudv 	Uwdx 	Uydz 	U{d| 	U}d~ 	Ud@ 	UAdB 	UCdD 	eEdF 	UGdH 	UIdJ 	UKdL 	UMdN 	UOdP 	UQdR 	USdT 	UUdV 	UWdX 	UYdZ 	U[d\ 	U]d^ 	U_d` 	Uadb 	Ucdd 	Uedf 	Ugdh 	Uidj 	Ukdl 	UUUUUUeUUUUUUUGdLL Hl%HH    c                       e Zd ZdZdZdZy)CommitStrategyz.Commit strategies for transcript segmentation.manualvadN)__name__
__module____qualname____doc__MANUALVAD r   r   r   r      s    8F
Cr   r   c                   <    e Zd ZU dZ ed       Zedz  ez  ed<   y)ElevenLabsSTTSettingszSettings for ElevenLabsSTTService.

    Parameters:
        tag_audio_events: Whether to include audio events like (laughter),
            (coughing) in the transcription.
    c                      t         S Nr   r   r   r   <lambda>zElevenLabsSTTSettings.<lambda>   s    i r   default_factoryNtag_audio_events)	r   r   r   r   r   r  boolr   __annotations__r   r   r   r   r      s#     16FW0XdTkI-Xr   r   c                       e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	edz  ez  ed<    ed       Z
edz  ez  ed	<    ed
       Zedz  ez  ed<   y)ElevenLabsRealtimeSTTSettingsa  Settings for ElevenLabsRealtimeSTTService.

    See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions.

    Parameters:
        vad_silence_threshold_secs: Seconds of silence before VAD commits (0.3-3.0).
        vad_threshold: VAD sensitivity (0.1-0.9, lower is more sensitive).
        min_speech_duration_ms: Minimum speech duration for VAD (50-2000ms).
        min_silence_duration_ms: Minimum silence duration for VAD (50-2000ms).
    c                      t         S r   r   r   r   r   r  z&ElevenLabsRealtimeSTTSettings.<lambda>   s    Yb r   r  Nvad_silence_threshold_secsc                      t         S r   r   r   r   r   r  z&ElevenLabsRealtimeSTTSettings.<lambda>   s    I r   vad_thresholdc                      t         S r   r   r   r   r   r  z&ElevenLabsRealtimeSTTSettings.<lambda>   s    S\ r   min_speech_duration_msc                      t         S r   r   r   r   r   r  z&ElevenLabsRealtimeSTTSettings.<lambda>   s    T] r   min_silence_duration_ms)r   r   r   r   r   r
  floatr   r  r  r  intr  r   r   r   r  r     sq    	 <AQb;cy 8c.3DU.VM54<)+V5:K\5]C$J2]6;L]6^S4Z)3^r   r  c                        e Zd ZU dZeZeed<    G d de      Zddddde	dde
d	ej                  d
e
dee
   dee   dee   dee   dee   f fdZdefdZdedee
   fdZdedefdZe	 dde
dedee
   fd       Zdedeedf   fdZ xZS )ElevenLabsSTTServiceaW  Speech-to-text service using ElevenLabs' file-based API.

    This service uses ElevenLabs' Speech-to-Text API to perform transcription on audio
    segments. It inherits from SegmentedSTTService to handle audio buffering and speech detection.
    The service uploads audio files to ElevenLabs and receives transcription results directly.
    	_settingsc                   4    e Zd ZU dZdZee   ed<   dZe	ed<   y) ElevenLabsSTTService.InputParamsaZ  Configuration parameters for ElevenLabs STT API.

        .. deprecated:: 0.0.105
            Use ``settings=ElevenLabsSTTSettings(...)`` instead.

        Parameters:
            language: Target language for transcription.
            tag_audio_events: Whether to include audio events like (laughter), (coughing), in the transcription.
        Nr#   Tr  )
r   r   r   r   r#   r   r   r  r  r  r   r   r   InputParamsr     s#    	 (,(8$+!%$%r   r  zhttps://api.elevenlabs.ioN)base_urlmodelsample_rateparamssettingsttfs_p99_latencyapi_keyaiohttp_sessionr  r  r  r  r  r  c                   t        dt        t        j                        d      }
|t	        dt         d       ||
_        |It	        dt                |s7|j                  t        |j                        |
_        |j                  |
_        ||
j                  |       t        | (  d|||
d|	 || _        || _        || _        y)aE  Initialize the ElevenLabs STT service.

        Args:
            api_key: ElevenLabs API key for authentication.
            aiohttp_session: aiohttp ClientSession for HTTP requests.
            base_url: Base URL for ElevenLabs API.
            model: Model ID for transcription.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsSTTSettings(model=...)`` instead.

            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
            params: Configuration parameters for the STT service.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsSTTSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to SegmentedSTTService.
        	scribe_v2N)r  r#   r  r  r  )r  r  r  r   )r   r   r   r   r   r  r#   r  apply_updatesuper__init___api_key	_base_url_session)selfr  r   r  r  r  r  r  r  kwargsdefault_settings	__class__s              r   r%  zElevenLabsSTTService.__init__   s    J 14X[[A!
 "7,A7K%*" "8-BC??.0OPVP_P_0`$-4:4K4K 1 ))(3 	
#-%	
 		
  !'r   r$   c                      y)zCheck if the service can generate processing metrics.

        Returns:
            True, as ElevenLabs STT service supports metrics generation.
        Tr   r)  s    r   can_generate_metricsz)ElevenLabsSTTService.can_generate_metrics3       r   r#   c                     t        |      S )zConvert a Language enum to ElevenLabs service-specific language code.

        Args:
            language: The language to convert.

        Returns:
            The ElevenLabs-specific language code, or None if not supported.
        )r   )r)  r#   s     r   language_to_service_languagez1ElevenLabsSTTService.language_to_service_language;  s     /x88r   
audio_datac                   K   | j                    d}d| j                  i}t        j                         }|j	                  dt        j                  |      dd       |j	                  d| j                  j                         |j	                  d| j                  j                         | j                  j                  =|j	                  d
t        | j                  j                        j                                | j                  j                  |||      4 d	{   }|j                  dk7  rK|j!                          d	{   }t#        j$                  d|        t'        d|j                   d|       |j)                          d	{   }|cd	d	d	      d	{    S 7 7 d7 7 # 1 d	{  7  sw Y   y	xY ww)a  Upload audio data to ElevenLabs and get transcription result.

        Args:
            audio_data: Raw audio bytes in WAV format.

        Returns:
            The transcription result data.

        Raises:
            Exception: If transcription fails or returns an error.
        z/v1/speech-to-text
xi-api-keyfilez	audio.wavzaudio/x-wav)filenamecontent_typemodel_idlanguage_codeNr  )dataheaders   z ElevenLabs transcription error: z!Transcription failed with status z: )r'  r&  aiohttpFormData	add_fieldioBytesIOr  r  r#   r  strlowerr(  poststatustextr	   error	Exceptionjson)r)  r3  urlr<  r;  response
error_textresults           r   _transcribe_audioz&ElevenLabsSTTService._transcribe_audioF  s{       23/ !JJz" &	 	 	
 	z4>>#7#78(?(?@>>**6NN-s4>>3R3R/S/Y/Y/[\==%%cg%F 	 	(#%#+==?2
?
|LM"CHOOCTTVWaVb cdd#==?*F	 	 	2 +	 	 	 	sm   DGF'G!#F/F)A
F/F+F/G!F-"G)F/+F/-G/G5F86G=G
transcriptis_finalc                 @   K   | j                          d{    y7 wz+Handle a transcription result with tracing.N)stop_processing_metricsr)  rP  rQ  r#   s       r   _handle_transcriptionz*ElevenLabsSTTService._handle_transcriptionm  s     
 **,,,   audioc                  K   	 | j                          d{    | j                  |       d{   }|j                  dd      j                         }|rl|j                  dd      }| j	                  |d|       d{    t        j                  d| d       t        || j                  t               ||	       yy7 7 7 H# t        $ r}t        d
|        Y d}~yd}~ww xY ww)a  Transcribe an audio segment using ElevenLabs' STT API.

        Args:
            audio: Raw audio bytes in WAV format (already converted by base class).

        Yields:
            Frame: TranscriptionFrame containing the transcribed text, or ErrorFrame on failure.

        Note:
            The audio is already in WAV format from the SegmentedSTTService.
            Only non-empty transcriptions are yielded.
        NrG   r:  r:   TzTranscription: []rN  zUnknown error occurred: )rH  )start_processing_metricsrO  getstriprV  r	   debugr   _user_idr   rI  r   )r)  rX  rN  rG  detected_languagees         r   run_sttzElevenLabsSTTService.run_sttt  s     	C//111  11%88F ::fb)//1D$*JJ$F!00t=NOOO/vQ78(MM$&%!   2 9 P  	C%=aS#ABBB	Csb   C2C
 CC
 CAC
 ?C AC
 C2C
 C
 C
 
	C/C*%C2*C//C2r   )r   r   r   r   r   Settingsr  r
   r  r   rC  r>  ClientSessionr   r  r  r%  r  r/  r   r2  bytesdictrO  r    rV  r   r   rd  __classcell__r,  s   @r   r  r     s:    %H$$&i && 4#%)(,48,?E( E( !..	E(
 E( }E( c]E( %E( 01E( #5/E(Nd 	9X 	9(3- 	9%% %D %N IM--)--9A#- -%C5 %C^E4K-H %Cr   r  r  c                     | xdk(  r yxdk(  r yxdk(  r yxdk(  r yxd	k(  r y
dk(  ryt        j                  d|  d       y)zGet the appropriate audio format string for a given sample rate.

    Args:
        sample_rate: The audio sample rate in Hz.

    Returns:
        The ElevenLabs audio format string.
    i@  pcm_8000i>  	pcm_16000i"V  	pcm_22050i]  	pcm_24000iD  	pcm_44100i  	pcm_48000z<ElevenLabsRealtimeSTTService: No audio format available for z sample rate, using pcm_16000)r	   warning)r  s    r   audio_format_from_sample_raters    sI     
NN
F{mSpq r   c                       e Zd ZU dZeZeed<    G d de      Zde	j                  ddddddded
d	ed
ede	dee   dee   dedededee   dee   dee   f fdZdefdZdedeeef   f fdZdef fdZdef fdZdef fdZd Zdedef fdZ d e!de"edf   fd!Z# fd"Z$ fd#Z%d$e!fd%Z&d& Z'd' Z(d( Z)d) Z*d*efd+Z+d*efd,Z,e-	 d3d-ed.ed/ee   fd0       Z.d*efd1Z/d*efd2Z0 xZ1S )4ElevenLabsRealtimeSTTServicea  Speech-to-text service using ElevenLabs' Realtime WebSocket API.

    This service uses ElevenLabs' Realtime Speech-to-Text API to perform transcription
    with ultra-low latency. It supports both partial (interim) and committed (final)
    transcripts, and can use either manual commit control or automatic Voice Activity
    Detection (VAD) for segment boundaries.

    By default, uses manual commit strategy where Pipecat's VAD controls when to
    commit transcript segments, providing consistency with other STT services.
    r  c                       e Zd ZU dZdZee   ed<   ej                  Z
eed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   d	Zeed
<   d	Zeed<   d	Zeed<   y)(ElevenLabsRealtimeSTTService.InputParamsa&  Configuration parameters for ElevenLabs Realtime STT API.

        .. deprecated:: 0.0.105
            Use ``settings=ElevenLabsRealtimeSTTSettings(...)`` instead.

        Parameters:
            language_code: ISO-639-1 or ISO-639-3 language code. Leave None for auto-detection.
            commit_strategy: How to segment speech - manual (Pipecat VAD) or vad (ElevenLabs VAD).
            vad_silence_threshold_secs: Seconds of silence before VAD commits (0.3-3.0).
                Only used when commit_strategy is VAD. None uses ElevenLabs default.
            vad_threshold: VAD sensitivity (0.1-0.9, lower is more sensitive).
                Only used when commit_strategy is VAD. None uses ElevenLabs default.
            min_speech_duration_ms: Minimum speech duration for VAD (50-2000ms).
                Only used when commit_strategy is VAD. None uses ElevenLabs default.
            min_silence_duration_ms: Minimum silence duration for VAD (50-2000ms).
                Only used when commit_strategy is VAD. None uses ElevenLabs default.
            include_timestamps: Whether to include word-level timestamps in transcripts.
            enable_logging: Whether to enable logging on ElevenLabs' side.
            include_language_detection: Whether to include language detection in transcripts.
        Nr:  commit_strategyr
  r  r  r  Finclude_timestampsenable_logginginclude_language_detection)r   r   r   r   r:  r   rC  r  r   r   rx  r
  r  r  r  r  r  ry  r  rz  r{  r   r   r   r  rw    s    	* (,x}+*8*?*??6:"HUO:)-x-04415#5#(D($$+0"D0r   r  zapi.elevenlabs.ioNF)
r  rx  r  r  ry  rz  r{  r  r  r  r  r  rx  r  r  ry  rz  r{  r  r  r  c          	         t        dddddd      }|t        dt         d       ||_        |	t        dt                |
s|	j                  |_        |	j
                  t        j                  k7  r|	j
                  }|	j                  |_        |	j                  |_	        |	j                  |_
        |	j                  |_        |	j                  }|	j                  }|	j                  }|
|j                  |
       t!        | D  d
||dd|d| || _        || _        d	| _        d| _        || _        || _        || _        || _        t5        j6                         | _        | j8                  j;                          y)u  Initialize the ElevenLabs Realtime STT service.

        Args:
            api_key: ElevenLabs API key for authentication.
            base_url: Base URL for ElevenLabs WebSocket API.
            commit_strategy: How to segment speech — ``CommitStrategy.MANUAL``
                (Pipecat VAD) or ``CommitStrategy.VAD`` (ElevenLabs VAD).
                Defaults to ``CommitStrategy.MANUAL``.
            model: Model ID for transcription.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsRealtimeSTTSettings(model=...)`` instead.

            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
            include_timestamps: Whether to include word-level timestamps in transcripts.
            enable_logging: Whether to enable logging on ElevenLabs' side.
            include_language_detection: Whether to include language detection in transcripts.
            params: Configuration parameters for the STT service.

                .. deprecated:: 0.0.105
                    Use ``settings=ElevenLabsRealtimeSTTSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to WebsocketSTTService.
        scribe_v2_realtimeN)r  r#   r
  r  r  r  r  r  
      )r  r  keepalive_timeoutkeepalive_intervalr  rZ  r   )r  r   r  r:  r#   rx  r   r   r
  r  r  r  ry  rz  r{  r#  r$  r%  r&  r'  _audio_format_receive_task_commit_strategy_include_timestamps_enable_logging_include_language_detectionasyncioEvent_connected_eventset)r)  r  r  rx  r  r  ry  rz  r{  r  r  r  r*  r+  r,  s                 r   r%  z%ElevenLabsRealtimeSTTService.__init__  s   Z 9&'+#'$(
 "7,I7S%*" "8-JK,2,@,@ )))^-B-BB&,&<&<O>D>_>_ ;171E1E .:@:W:W 7;A;Y;Y 8%+%>%>"!'!6!6-3-N-N* ))(3 	
#-  %	
 	
  !! !0#5 -+E( '!!#r   r$   c                      y)zCheck if the service can generate processing metrics.

        Returns:
            True, as ElevenLabs Realtime STT service supports metrics generation.
        Tr   r.  s    r   r/  z1ElevenLabsRealtimeSTTService.can_generate_metricsL  r0  r   deltac                    K   t         |   |       d{   }|s|S | j                  r0| j                          d{    | j	                          d{    |S 7 F7  7 
w)a  Apply a settings delta and reconnect if anything changed.

        Args:
            delta: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r$  _update_settings
_websocket_disconnect_connect)r)  r  changedr,  s      r   r  z-ElevenLabsRealtimeSTTService._update_settingsT  s_      077N??""$$$--/!! 8 %!s1   A#A'A#AA#A!A#A#!A#framec                    K   t         |   |       d{    t        | j                        | _        | j                          d{    y7 77 w)zStart the STT service and establish WebSocket connection.

        Args:
            frame: Frame indicating service should start.
        N)r$  startrs  r  r  r  r)  r  r,  s     r   r  z"ElevenLabsRealtimeSTTService.starth  sF      gmE""":4;K;KLmmo 	#s!   AA1AA	AAc                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zStop the STT service and close WebSocket connection.

        Args:
            frame: Frame indicating service should stop.
        N)r$  stopr  r  s     r   r  z!ElevenLabsRealtimeSTTService.stopr  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zCancel the STT service and close WebSocket connection.

        Args:
            frame: Frame indicating service should be cancelled.
        N)r$  cancelr  r  s     r   r  z#ElevenLabsRealtimeSTTService.cancel{  s6      gnU###    	$ r  c                 @   K   | j                          d{    y7 w)zBStart performance metrics collection for transcription processing.N)r]  r.  s    r   _start_metricsz+ElevenLabsRealtimeSTTService._start_metrics  s     ++---rW  	directionc                 x  K   t         |   ||       d{    t        |t              r| j	                          d{    yt        |t
              r| j                  t        j                  k(  r| j                  r| j                  j                  t        j                  u r^	 ddd| j                  d}| j                  j                  t        j                   |             d{    t#        j$                  d       yyyyy7 7 7 "# t&        $ r"}t#        j(                  d|        Y d}~yd}~ww xY ww)zProcess incoming frames and handle speech events.

        Args:
            frame: The frame to process.
            direction: Direction of frame flow in the pipeline.
        Ninput_audio_chunkrZ  Tmessage_typeaudio_base_64commitr  z Sent manual commit to ElevenLabszFailed to send commit: )r$  process_frame
isinstancer   r  r   r  r   r   r  stater"   OPENr  sendrJ  dumpsr	   tracerI  rr  )r)  r  r  commit_messagerc  r,  s        r   r  z*ElevenLabsRealtimeSTTService.process_frame  s     g#E9555e89%%''':;$$(=(==??t'<'<

'J
F,?-/&*+/+;+;	* #oo224::n3MNNN%GH (K? > < 	6 ( O$ F)@'DEEFs^   D:D'D:D A$D:%AD 'D
(D D:D:
D 	D7D2-D:2D77D:rX  c                  K   | j                   j                          d{    | j                  r&| j                  j                  t        j
                  u r| j                          d{    | j                  r| j                  j                  t        j                  u rl	 t        j                  |      j                  d      }d|d| j                  d}| j                  j                  t        j                  |             d{    d y7 7 7 # t        $ r$}t!        dt#        |              Y d}~4d}~ww xY ww)zProcess audio data for speech-to-text transcription.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            None - transcription results are handled via WebSocket responses.
        Nutf-8r  Fr  zElevenLabs Realtime STT error: )r  waitr  r  r"   CLOSEDr  r  base64	b64encodedecoder  r  rJ  r  rI  r   rC  )r)  rX  audio_base64messagerc  s        r   rd  z$ElevenLabsRealtimeSTTService.run_stt  s     ##((*** $//"7"75<<"G--/!!??t44

BM%//6==gF %8%1##'#3#3	 oo**4::g+>??? 
- 	+ " @ M #B3q6(!KLLLMs_   E	DA	E	*D+6E	"A&D D	D E	E	D 	E"E<E	EE	c                   K   | j                   j                          	 | j                          d{    t        |           d{    | j
                  r;| j                  s/| j                  | j                  | j                              | _        | j                   j                          y7 |7 h# | j                   j                          w xY ww)z:Establish WebSocket connection to ElevenLabs Realtime STT.N)r  clear_connect_websocketr$  r  r  r  create_task_receive_task_handler_report_errorr  r)  r,  s    r   r  z%ElevenLabsRealtimeSTTService._connect  s     ##%
	())+++'"$$$t'9'9%)%5%5..t/A/AB&" !!%%' ,$ !!%%'s?   CB3 B/B3 B1	AB3 C/B3 1B3 3CCc                    K   t         |           d{    | j                  r*| j                  | j                         d{    d| _        | j	                          d{    y7 S7 &7 	w)z-Close WebSocket connection and cleanup tasks.N)r$  r  r  cancel_task_disconnect_websocketr  s    r   r  z(ElevenLabsRealtimeSTTService._disconnect  sf     g!###""4#5#5666!%D((*** 	$ 7 	+s3   A/A).A/A+A/#A-$A/+A/-A/silencec                    K   t        j                  |      j                  d      }d|d| j                  d}| j                  j                  t        j                  |             d{    y7 w)zSend silent audio wrapped in ElevenLabs' JSON protocol.

        Args:
            silence: Silent 16-bit mono PCM audio bytes.
        r  r  Fr  N)r  r  r  r  r  r  rJ  r  )r)  r  r  r  s       r   _send_keepalivez,ElevenLabsRealtimeSTTService._send_keepalive  s_      ''077@/)++	
 oo""4::g#6777s   A'A1)A/*A1c                   K   	 | j                   r'| j                   j                  t        j                  u ryt	        j
                  d       d| j                  j                   g}| j                  j                  r(|j                  d| j                  j                          |j                  d| j                          |j                  d| j                  j                          | j                  r5|j                  dt        | j                        j                                 | j                   r5|j                  dt        | j                         j                                 | j"                  r5|j                  d	t        | j"                        j                                 | j                  t$        j&                  k(  r| j                  j(                  (|j                  d
| j                  j(                          | j                  j*                  (|j                  d| j                  j*                          | j                  j,                  (|j                  d| j                  j,                          | j                  j.                  (|j                  d| j                  j.                          d| j0                   ddj3                  |       }d| j4                  i}t7        ||       d{   | _         | j9                  d       d{    t	        j
                  d       y7 87 # t:        $ r)}| j=                  d| |       d{  7   Y d}~yd}~ww xY ww)z6Connect to ElevenLabs Realtime STT WebSocket endpoint.Nz%Connecting to ElevenLabs Realtime STTz	model_id=zlanguage_code=zaudio_format=zcommit_strategy=zinclude_timestamps=zenable_logging=zinclude_language_detection=zvad_silence_threshold_secs=zvad_threshold=zmin_speech_duration_ms=zmin_silence_duration_ms=zwss://z/v1/speech-to-text/realtime?&r5  )additional_headerson_connectedz$Connected to ElevenLabs Realtime STTz.Unable to connect to ElevenLabs Realtime STT: 	error_msg	exception)r  r  r"   r  r	   r`  r  r  r#   appendr  r  valuer  rC  rD  r  r  r   r   r
  r  r  r  r'  joinr&  websocket_connect_call_event_handlerrI  
push_error)r)  r  ws_urlr<  rc  s        r   r  z/ElevenLabsRealtimeSTTService._connect_websocket  s    4	4??#8#8EJJ#FLL@A "$.."6"6!789F~~&&t~~/F/F.GHIMMM$*<*<)=>?MM,T-B-B-H-H,IJK '' 3C8P8P4Q4W4W4Y3Z[\##D4H4H0I0O0O0Q/RST//1#d6V6V2W2]2]2_1`a
 $$(:(::>><<HMM5dnn6_6_5`a >>//;MMN4>>3O3O2P"QR>>88DMM$;DNN<a<a;b"cd>>99EMM24>>3Y3Y2Z[ dnn--I#((SYJZI[\F#T]]3G$5fQX$YYDO**>:::LL?@ Z: 	//J1#NZ[ "   	si   M32L> M3K
L> L:L> L< L> 9M3:L> <L> >	M0M+ M#!M+&M3+M00M3c                   K   	 | j                   r]| j                   j                  t        j                  u r7t	        j
                  d       | j                   j                          d{    d| _         | j                  d       d{    y7 %# t        $ r)}| j                  d| |       d{  7   Y d}~Pd}~ww xY w7 <# d| _         | j                  d       d{  7   w xY ww)z2Disconnect from ElevenLabs Realtime STT WebSocket.z*Disconnecting from ElevenLabs Realtime STTNzError closing websocket: r  on_disconnected)
r  r  r"   r  r	   r`  closerI  r  r  )r)  rc  s     r   r  z2ElevenLabsRealtimeSTTService._disconnect_websocket&  s     	>4??#8#8EJJ#FIJoo++--- #DO**+<=== . 	Z//.Gs,KWX/YYY	Z > #DO**+<===sw   C-A$B (B)B -C-C	C-B 	CB=2B53B=8C =CC C-C*#C&$C**C-c                 H    | j                   r| j                   S t        d      )zGet the current WebSocket connection.

        Returns:
            The WebSocket connection.

        Raises:
            Exception: If WebSocket is not connected.
        zWebsocket not connected)r  rI  r.  s    r   _get_websocketz+ElevenLabsRealtimeSTTService._get_websocket2  s!     ????"122r   c                 \  K   | j                         2 3 d{   }	 t        j                  |      }| j                  |       d{    97 47 # t        j                  $ r t        j                  d|        Y jt        $ r"}t        j                  d|        Y d}~d}~ww xY w6 yw)z4Continuously receive and process WebSocket messages.NzReceived non-JSON message: zError processing message: )	r  rJ  loads_process_responseJSONDecodeErrorr	   rr  rI  rH  )r)  r  r;  rc  s       r   _receive_messagesz.ElevenLabsRealtimeSTTService._receive_messages?  s     !002 	? 	?'?zz'*,,T222	? 3'' H!<WIFG ?9!=>>? 3sf   B,B*AB*B,)AAAB,B*A+B';B,=B'B"B,"B''B,r;  c                   K   |j                  d      }|dk(  rt        j                  d|        y|dk(  r| j                  |       d{    y|dk(  r| j	                  |       d{    y|dk(  r| j                  |       d{    y|dv r3|j                  d	d
|       }| j                  d|        d{    yt        j                  d|        y7 7 v7 Y7 $w)zpProcess a response message from ElevenLabs.

        Args:
            data: Parsed JSON response data.
        r  session_startedzElevenLabs session started: partial_transcriptNcommitted_transcript$committed_transcript_with_timestamps)rH  
auth_errorquota_exceeded_errortranscriber_errorinput_errorcommit_throttledr  unaccepted_terms_errorrate_limitedqueue_overflowresource_exhaustedsession_time_limit_exceededchunk_size_exceededinsufficient_audio_activityrH  zUnknown error - zError: )r  zUnknown message type: )r^  r	   r`  _on_partial_transcript_on_committed_transcript(_on_committed_transcript_with_timestampsr  )r)  r;  r  r  s       r   r  z.ElevenLabsRealtimeSTTService._process_responseK  s      xx/,,LL7v>?11--d33333//555CC??EEE 
 
  ,<\N*KLI//gi[,A/BBBLL1,@A7 4 6 F& CsH   A	C'CC'*C!+C'	C#
6C' C%C'!C'#C'%C'c           	        K   |j                  dd      j                         }|sy|j                  d      }t        j                  d| d       | j	                  t        || j                  t               ||             d{    y7 w)zoHandle partial transcript (interim results).

        Args:
            data: Partial transcript data.
        rG  rZ  Nr:  zPartial transcript: [r[  r\  )r^  r_  r	   r  
push_framer   ra  r   )r)  r;  rG  r#   s       r   r  z3ElevenLabsRealtimeSTTService._on_partial_transcriptt  s      xx#))+ 88O,,TF!45oo% "
 	
 	
s   BBB
BrP  rQ  r#   c                    K   ywrS  r   rU  s       r   rV  z2ElevenLabsRealtimeSTTService._handle_transcription  s     
 	s   c           
        K   | j                   ry|j                  dd      j                         }|sy| j                          d{    |j                  d      }t	        j
                  d| d       | j                  |d|       d{    | j                  t        j                  k(  }| j                  t        || j                  t               |||             d{    y7 7 ^7 	w)	zqHandle committed transcript (final results).

        Args:
            data: Committed transcript data.
        NrG  rZ  r:  zCommitted transcript: [r[  TrN  	finalized)r  r^  r_  rT  r	   r`  rV  r  r   r   r  r   ra  r   r)  r;  rG  r#   r  s        r   r  z5ElevenLabsRealtimeSTTService._on_committed_transcript  s      ##xx#))+**,,, 88O,.tfA67((tX>>>))^-B-BB	oo "#	
 		
 		
 	- 	?		
s8   AC.C(AC.C*AC."C,#C.*C.,C.c           
        K   |j                  dd      j                         }|sy| j                          d{    |j                  d      }t        j                  d| d       | j                  |d|       d{    | j                  t        j                  k(  }| j                  t        || j                  t               |||             d{    y7 7 ^7 	w)	a  Handle committed transcript with word-level timestamps.

        This message is sent when include_timestamps=true. The result data includes:
        - text: The transcribed text
        - language_code: Detected language (if available)
        - words: Array of word objects with timing information:
            - text: The word text
            - start: Start time in seconds
            - end: End time in seconds
            - type: "word" or "spacing"
            - speaker_id: Speaker identifier (if available)
            - logprob: Log probability score (if available)
            - characters: Array of character strings (if available)

        Args:
            data: Committed transcript data with timestamps.
        rG  rZ  Nr:  z'Committed transcript with timestamps: [r[  Tr  )r^  r_  rT  r	   r`  rV  r  r   r   r  r   ra  r   r  s        r   r  zEElevenLabsRealtimeSTTService._on_committed_transcript_with_timestamps  s     $ xx#))+**,,, 88O,>tfAFG((tX>>>))^-B-BB	 oo "#	
 		
 		
 	- 	?		
s5   7C!CAC!>C?AC!CC!C!C!r   )2r   r   r   r   r  re  r  r
   r  r   r   r   rC  r   r  r  r  r%  r/  r   rh  r   r  r   r  r   r  r   r  r  r   r   r  rg  r   rd  r  r  r  r  r  r  r  r  r  r    rV  r  r  ri  rj  s   @r   ru  ru    s   	 -H,,1i 1H ,*8*?*?#%)#($+0(,<@,Hc$ c$ 	c$
 (c$ }c$ c]c$ !c$ c$ %)c$ %c$ 89c$ #5/c$Jd K DcN ( ! !!+ !.F F> F8 5  ^E4K-H  D(+8U 86p
>3	?'BD 'BR
 
2 IM)-9A# #
4 #
J,
4 ,
r   ru  )Cr   r  r  rA  rJ  dataclassesr   r   enumr   typingr   r   r   r>  logurur	   pydanticr
   pipecat.frames.framesr   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   r   pipecat.services.stt_servicer   r   pipecat.transcriptions.languager   r   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr    websockets.asyncio.clientr!   r  websockets.protocolr"   ModuleNotFoundErrorrc  rH  rI  rC  r   r   r   r  r  r  rs  ru  r   r   r   <module>r     sK     	  (  0 0   
 
 
 > _ _ Z Q F / ?,F)rIh rI8C= rIjS$  YK Y Y _K _ _$FC. FCRs s 8m
#6 m
Q  ,FLL;qc"#FLLd &qc*
++,s   C* *D&/2D!!D&