
    qiX                     Z   d Z ddlZddlZddlmZmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' 	 ddl(m)Z* ddl+m,Z, dZ1dZ2dZ3dZ4 G d de      Z5 G d de      Z6 G d de      Z7 G d de      Z8d e9d!e:fd"Z;d#e#d!e<fd$Z=d%e
e	e#      d!e
e	e<      fd&Z>e G d' d(e             Z? G d) d*e!      Z@y# e-$ r7Z. ej^                  de.         ej^                  d        e0de.       dZ.[.ww xY w)+z-Soniox speech-to-text service implementation.    N)	dataclassfield)AnyAsyncGeneratorListOptional)logger)	BaseModel)CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameVADUserStoppedSpeakingFrame)FrameDirection)	NOT_GIVENSTTSettings	_NotGiven_warn_deprecated_param)SONIOX_TTFS_P99)WebsocketSTTService)Language)time_now_iso8601)
traced_stt)connect)StatezException: zEIn order to use Soniox, you need to `pip install pipecat-ai[soniox]`.zMissing module: z{"type": "keepalive"}z{"type": "finalize"}z<end>z<fin>c                   &    e Zd ZU dZeed<   eed<   y)SonioxContextGeneralItemzGRepresents a key-value pair for structured general context information.keyvalueN__name__
__module____qualname____doc__str__annotations__     M/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/soniox/stt.pyr   r   4   s    Q	HJr*   r   c                   &    e Zd ZU dZeed<   eed<   y)SonioxContextTranslationTermzORepresents a custom translation mapping for ambiguous or domain-specific terms.sourcetargetNr"   r)   r*   r+   r-   r-   ;   s    YKKr*   r-   c                   t    e Zd ZU dZdZeee      ed<   dZ	ee
   ed<   dZeee
      ed<   dZeee      ed<   y)SonioxContextObjectzContext object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

    Learn more about context in the documentation:
    https://soniox.com/docs/stt/concepts/context
    Ngeneraltexttermstranslation_terms)r#   r$   r%   r&   r2   r   r   r   r(   r3   r'   r4   r5   r-   r)   r*   r+   r1   r1   B   sU     9=GXd345<D(3-!%E8DI%FJx%A BCJr*   r1   c                       e Zd ZU dZdZeed<   dZee   ed<   dZ	ee
   ed<   dZeee      ed	<   dZee   ed
<   dZeeez     ed<   dZee   ed<   dZee   ed<   dZee   ed<   y)SonioxInputParamsa\  Real-time transcription settings.

    .. deprecated:: 0.0.105
        Use ``settings=SonioxSTTSettings(...)`` instead.

    See Soniox WebSocket API documentation for more details:
    https://soniox.com/docs/speech-to-text/api-reference/websocket-api#configuration-parameters

    Parameters:
        model: Model to use for transcription.
        audio_format: Audio format to use for transcription.
        num_channels: Number of channels to use for transcription.
        language_hints: List of language hints to use for transcription.
        language_hints_strict: If true, strictly enforce language hints (only transcribe in provided languages).
        context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
        enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
        enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
        client_reference_id: Client reference ID to use for transcription.
    	stt-rt-v4model	pcm_s16leaudio_format   num_channelsNlanguage_hintslanguage_hints_strictcontextFenable_speaker_diarizationenable_language_identificationclient_reference_id)r#   r$   r%   r&   r9   r'   r(   r;   r   r=   intr>   r   r   r?   boolr@   r1   rA   rB   rC   r)   r*   r+   r7   r7   O   s    ( E3"-L(3--"#L(3-#/3NHT(^,3,08D>037GX)C/071665:"HTN:)-#-r*   r7   tokenreturnc                 6    | d   t         k(  xs | d   t        k(  S )z%Determine if a token is an end token.r3   )	END_TOKENFINALIZED_TOKENrF   s    r+   is_end_tokenrL   s   s    =I%Iv/)IIr*   languagec                 |    t        | j                        j                         }d|v r|j                  d      d   S |S )zPipecat Language enum uses same ISO 2-letter codes as Soniox, except with added regional variants.

    For a list of all supported languages, see: https://soniox.com/docs/speech-to-text/core-concepts/supported-languages
    -r   )r'   r!   lowersplit)rM   lang_strs     r+   language_to_soniox_languagerS   x   s;    
 8>>"((*H
h~~c"1%%Or*   r>   c                 j    | y | D cg c]  }t        |       }}t        t        |            S c c}w N)rS   listset)r>   langprepared_languagess      r+   _prepare_language_hintsrZ      s>     HVW5d;WW&'(( Xs   0c                      e Zd ZU dZ ed       Zee   dz  ez  e	d<    ed       Z
edz  ez  e	d<    ed       Zeez  dz  ez  e	d	<    ed
       Zedz  ez  e	d<    ed       Zedz  ez  e	d<    ed       Zedz  ez  e	d<   y)SonioxSTTSettingsa]  Settings for SonioxSTTService.

    Parameters:
        language_hints: List of language hints to use for transcription.
        language_hints_strict: If true, strictly enforce language hints.
        context: Customization for transcription. String for models with
            context_version 1 and SonioxContextObject for models with
            context_version 2.
        enable_speaker_diarization: Whether to enable speaker diarization.
        enable_language_identification: Whether to enable language identification.
        client_reference_id: Client reference ID to use for transcription.
    c                      t         S rU   r   r)   r*   r+   <lambda>zSonioxSTTSettings.<lambda>   s    V_ r*   )default_factoryNr>   c                      t         S rU   r^   r)   r*   r+   r_   zSonioxSTTSettings.<lambda>   s    S\ r*   r?   c                      t         S rU   r^   r)   r*   r+   r_   zSonioxSTTSettings.<lambda>   s    Zc r*   r@   c                      t         S rU   r^   r)   r*   r+   r_   zSonioxSTTSettings.<lambda>   s    Xa r*   rA   c                      t         S rU   r^   r)   r*   r+   r_   zSonioxSTTSettings.<lambda>   s    	 r*   rB   c                      t         S rU   r^   r)   r*   r+   r_   zSonioxSTTSettings.<lambda>   s    PY r*   rC   )r#   r$   r%   r&   r   r>   r   r   r   r(   r?   rE   r@   r1   r'   rA   rB   rC   r)   r*   r+   r\   r\      s     9>N_8`NDNT)I5`5:K\5]4$;2]<ARc<dG 3&-	9d:?Pa:bti 7b>C)?"D4K)$;  38HY2Zti/Zr*   r\   c                   ~    e Zd ZU dZeZeed<   dddddddded	d	ed
ede	e
   de	e   dede
de	e   dede	e   de	e   f fdZdefdZdef fdZdedeeef   f fdZdef fdZdef fdZdedeedf   fdZe	 d-deded e	e   fd!       Zded"e f fd#Z!d$ Z" fd%Z# fd&Z$d' Z%d( Z&d) Z'd* Z(d+efd,Z) xZ*S ).SonioxSTTServiceaS  Speech-to-Text service using Soniox's WebSocket API.

    This service connects to Soniox's WebSocket API for real-time transcription
    with support for multiple languages, custom context, speaker diarization,
    and more.

    For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api
    	_settingsz,wss://stt-rt.soniox.com/transcribe-websocketNr:   r<   T)	urlsample_rater9   r;   r=   paramsvad_force_turn_endpointsettingsttfs_p99_latencyapi_keyri   rj   r9   r;   r=   rk   rl   rm   rn   c       
   
         t        dddddddd      }|t        dt         d       ||_        |t        dt                |	s|j                  |_        |j                  |j                  }|j                  |j                  }|j
                  |_        |j                  |_        |j                  |_        |j                  |_        |j                  |_	        |j                  |_
        |	|j                  |	       t        | 4  d
||
dd|d	| || _        || _        || _        || _        || _        g | _        d| _        d| _        y)aM  Initialize the Soniox STT service.

        Args:
            api_key: Soniox API key.
            url: Soniox WebSocket API URL.
            sample_rate: Audio sample rate.
            model: Soniox model to use for transcription.

                .. deprecated:: 0.0.105
                    Use ``settings=SonioxSTTSettings(model=...)`` instead.

            audio_format: Audio format for transcription. Defaults to ``"pcm_s16le"``.
            num_channels: Number of audio channels. Defaults to 1.
            params: Additional configuration parameters, such as language hints, context and
                speaker diarization.

                .. deprecated:: 0.0.105
                    Use ``settings=SonioxSTTSettings(...)`` instead.

            vad_force_turn_endpoint: Listen to `VADUserStoppedSpeakingFrame` to send finalize message to Soniox.
                If disabled, Soniox will detect the end of the speech. Defaults to True.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to the STTService.
        r8   NF)r9   rM   r>   r?   r@   rA   rB   rC   r9   rk   r<      )rj   rn   keepalive_timeoutkeepalive_intervalrm   r)   )r\   r   r9   r;   r=   r>   r?   r@   rA   rB   rC   apply_updatesuper__init___api_key_url_vad_force_turn_endpoint_audio_format_num_channels_final_transcription_buffer_last_tokens_received_receive_task)selfro   ri   rj   r9   r;   r=   rk   rl   rm   rn   kwargsdefault_settings	__class__s                r+   rv   zSonioxSTTService.__init__   sw   V -"&',+0 $	
 "7,=wG%*" "8->?)/ &&&2#)#6#6L&&2#)#6#6L282G2G /9?9U9U 6+1>> (>D>_>_ ;99 !? 8>7Q7Q 4 ))(3 	
#- %	
 	
  	(?% *)+-(6:"!r*   rG   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Soniox STT supports metrics generation.
        Tr)   r   s    r+   can_generate_metricsz%SonioxSTTService.can_generate_metrics  s     r*   framec                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zStart the Soniox STT websocket connection.

        Args:
            frame: The start frame containing initialization parameters.
        N)ru   start_connectr   r   r   s     r+   r   zSonioxSTTService.start#  s3      gmE"""mmo 	#   848688deltac                    K   t         |   |       d{   }|r0| j                          d{    | j                          d{    |S 7 87  7 
w)zApply settings delta and reconnect if anything changed.

        Args:
            delta: A settings delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)ru   _update_settings_disconnectr   )r   r   changedr   s      r+   r   z!SonioxSTTService._update_settings,  sS      077""$$$--/!! 8 %!s1   AAAAAA	AAAc                    K   t         |   |       d{    | j                          d{    | j                          d{    y7 57 7 	w)a   Stop the Soniox STT websocket connection.

        Stopping waits for the server to close the connection as we might receive
        additional final tokens after sending the stop recording message.

        Args:
            frame: The end frame.
        N)ru   stop_send_stop_recordingr   r   s     r+   r   zSonioxSTTService.stop=  sL      gl5!!!'')))    	") s1   AAAAAAAAAc                 t   K   t         |   |       d{    | j                          d{    y7 7 w)ae  Cancel the Soniox STT websocket connection.

        Compared to stop, this method closes the connection immediately without waiting
        for the server to close it. This is useful when we want to stop the connection
        immediately without waiting for the server to send any final tokens.

        Args:
            frame: The cancel frame.
        N)ru   cancelr   r   s     r+   r   zSonioxSTTService.cancelJ  s6      gnU###    	$ r   audioc                   K   | j                   rI| j                   j                  t        j                  u r#| j                   j	                  |       d{    d y7 
w)zSend audio data to Soniox STT Service.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            Frame: None (transcription results come via WebSocket callbacks).
        N
_websocketstater   OPENsend)r   r   s     r+   run_sttzSonioxSTTService.run_sttW  sF      ??t44

B//&&u---
 .s   AA AA 
transcriptis_finalrM   c                    K   yw)z+Handle a transcription result with tracing.Nr)   )r   r   r   rM   s       r+   _handle_transcriptionz&SonioxSTTService._handle_transcriptione  s     
 	s   	directionc                   K   t         |   ||       d{    t        |t              r| j                  r| j
                  rt| j
                  j                  t        j                  u rM| j
                  j                  t               d{    t        j                  d|j                  d|       yyyyy7 7 0w)zProcesses a frame of audio data, either buffering or transcribing it.

        Args:
            frame: The frame to process.
            direction: The direction of frame processing.
        Nz(Triggered finalize event on: frame.name=z, direction=)ru   process_frame
isinstancer   ry   r   r   r   r   r   FINALIZE_MESSAGEr	   debugname)r   r   r   r   s      r+   r   zSonioxSTTService.process_framel  s      g#E9555e89d>[>[4??#8#8EJJ#Foo**+;<<<HUZZMI<XY $G ?\9 	6
 =s"   B?B;A4B?B=/B?=B?c                    K   | j                   rK| j                   j                  t        j                  u r$| j                   j	                  d       d{    yyy7 w)z&Send stop recording message to Soniox. Nr   r   s    r+   r   z%SonioxSTTService._send_stop_recording{  sE     ??t44

B//&&r***  C?*s   AAAAc                   K   | j                          d{    t        | 	          d{    | j                  r=| j                  s0| j                  | j                  | j                              | _        yyy7 d7 Pw)zyConnect to the Soniox service.

        Establishes websocket connection and starts receive and keepalive tasks.
        N)_connect_websocketru   r   r   r~   create_task_receive_task_handler_report_errorr   r   s    r+   r   zSonioxSTTService._connect  so     
 %%'''g   ??4#5#5!%!1!1$2L2LTM_M_2`!aD $6?	 	( s    B A<B A>AB >B c                    K   t         |           d{    | j                  r*| j                  | j                         d{    d| _        | j	                          d{    y7 S7 &7 	w)zfDisconnect from the Soniox service.

        Cleans up tasks and closes websocket connection.
        N)ru   r   r~   cancel_task_disconnect_websocketr   s    r+   r   zSonioxSTTService._disconnect  sh     
 g!###""4#5#5666!%D((*** 	$ 7 	+s3   A/A).A/A+A/#A-$A/+A/-A/c                 8  K   	 | j                   r'| j                   j                  t        j                  u ryt	        j
                  d       t        | j                         d{   | _         | j                   s?| j                  d| j                          d{    t        d| j                         | j                   }| j                  }|j                  }t        |t              r|j                         }| j                   |j"                  | j$                  | j&                  || j(                  t+        |j,                        |j.                  ||j0                  |j2                  |j4                  d}| j                   j7                  t9        j:                  |             d{    | j=                  d       d{    t	        j
                  d       y7 {7 F7 97 "# t        $ r%}| j                  d| |	       d{  7    d}~ww xY ww)
z-Establish the websocket connection to Soniox.NzConnecting to Soniox STTz#Unable to connect to Soniox API at 	error_msg)ro   r9   r;   r=   enable_endpoint_detectionrj   r>   r?   r@   rA   rB   rC   on_connectedzConnected to Soniox STTzUnable to connect to Soniox: r   	exception)r   r   r   r   r	   r   websocket_connectrx   
push_error	Exceptionry   rh   r@   r   r1   
model_dumprw   r9   rz   r{   rj   rZ   r>   r?   rA   rB   rC   r   jsondumps_call_event_handler)r   r   sr@   configes         r+   r   z#SonioxSTTService._connect_websocket  s    -	4??#8#8EJJ#FLL34$5dii$@@DO??oo2UVZV_V_U`0aobbb"Edii[ QRR -1,I,I(I%AiiG'#67!,,.  == $ 2 2 $ 2 2-F#//"9!:J:J"K)*)@)@"./.J.J232R2R'('<'<F  //&&tzz&'9:::**>:::LL23I A c< ;: 	//.KA3,O[\/]]]	s   H2G) H-G) $G%7G) G"DG) +G%,G) G'G) HG) "G) %G) 'G) )	H2HHHHHc                   K   	 | j                   r7t        j                  d       | j                   j                          d{    d| _         | j                  d       d{    y7 %# t        $ r)}| j                  d| |       d{  7   Y d}~Pd}~ww xY w7 <# d| _         | j                  d       d{  7   w xY ww)z)Close the websocket connection to Soniox.zDisconnecting from Soniox STTNzError closing websocket: r   on_disconnected)r   r	   r   closer   r   r   )r   r   s     r+   r   z&SonioxSTTService._disconnect_websocket  s     	><=oo++--- #DO**+<=== . 	Z//.Gs,KWX/YYY	Z > #DO**+<===sv   C>A* A(A* C"B#C(A* *	B3BBBB! BB! C!C=C >CCc                 H    | j                   r| j                   S t        d      )zGet the current WebSocket connection.

        Returns:
            The WebSocket connection.

        Raises:
            Exception: If WebSocket is not connected.
        zWebsocket not connected)r   r   r   s    r+   _get_websocketzSonioxSTTService._get_websocket  s!     ????"122r*   c           
          K   g  _          fd} j                         2 3 d{   }	 t        j                  |      }|d   }|r7t	        |      dk(  r|d   d   t
        k(  rnt        j                          _        g }|D ]s  }|d   r[t        |      r |        d{    # j                   s j                          d{     j                   j                  |       c|j                  |       u  j                   s|rdj                  t        d	  j                               }dj                  t        d
 |            } j                  t        ||z    j                  t!                j                   |z                d{    |j#                  d      }	|j#                  d      }
|	s|
r/ |        d{     j%                  d|	 d|
        d{    |j#                  d      }|r& |        d{    t'        j(                  d        y7 7 7 b7 7 j7 L7 ,# t        j*                  $ r t'        j,                  d|        Y >t.        $ r#}t'        j,                  d|        Y d}~dd}~ww xY w6 yw)zxReceive and process websocket messages.

        Continuously processes messages from the websocket connection.
        c            	      z  K   j                   rdj                  t        d j                               } j                  t	        | j
                  t               j                   d             d {    j                  | d       d {    j                          d {    g _         y y 7 @7 '7 w)Nr   c                     | d   S Nr3   r)   rK   s    r+   r_   zVSonioxSTTService._receive_messages.<locals>.send_endpoint_transcript.<locals>.<lambda>  s
    v r*   T)r3   user_id	timestampresult	finalized)r   )	r|   joinmap
push_framer   _user_idr   r   stop_processing_metrics)r3   r   s    r+   send_endpoint_transcriptzDSonioxSTTService._receive_messages.<locals>.send_endpoint_transcript  s     //wws#>@`@`ab oo&! $"2"4#??"&   000EEE22444350 0 F4s6   A1B;4B55B;B7B;'B9(B;7B;9B;Ntokensr<   r   r3   r   r   c                     | d   S r   r)   rK   s    r+   r_   z4SonioxSTTService._receive_messages.<locals>.<lambda>  
    %- r*   c                     | d   S r   r)   rK   s    r+   r_   z4SonioxSTTService._receive_messages.<locals>.<lambda>!  r   r*   )r3   r   r   r   
error_codeerror_messagezError: z (_receive_messages) - r   finishedzTranscription finished.zReceived non-JSON message: zError processing message: )r|   r   r   loadslenrJ   timer}   rL   start_processing_metricsappendr   r   r   r   r   r   getr   r	   r   JSONDecodeErrorwarningr   )r   r   messagecontentr   non_final_transcriptionrF   
final_textnon_final_textr   r   r   r   s   `            r+   _receive_messagesz"SonioxSTTService._receive_messages  s     ,.(	6$ "002 D	A D	A'CA**W- *6{a'F1If,=,P 6:YY[2 +-'# >EZ('. #;"<<<#'#C#C&*&C&C&E E E <<CCEJ/66u=> 337N!#79Y9YZ"J &(WW79PQ&N //1 ",n!<$(MM&6&8#'#C#CF]#]	 	 	 %[[6
 'O <2444//$+J<7N}o"^ *    #;;z22444LL!:;	 wD	A, = !F	 5 5 '' H!<WIFG A!;A3?@@AG 3s   JJHJJA4H0H"$H0H%CH0
H(4H0?H* H0H, !H0H.H0JJ"H0%H0(H0*H0,H0.H00+J	JJ	&J>JJ		Jsilencec                 ^   K   | j                   j                  t               d{    y7 w)zSend a Soniox protocol-level keepalive message.

        Args:
            silence: Silent PCM audio bytes (unused, Soniox uses a protocol message).
        N)r   r   KEEPALIVE_MESSAGE)r   r   s     r+   _send_keepalivez SonioxSTTService._send_keepaliveD  s!      oo""#4555s   #-+-rU   )+r#   r$   r%   r&   r\   Settingsr(   r   r'   r   rD   r7   rE   floatrv   r   r   r   dictr   r   r   r   r   r   bytesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)r   s   @r+   rg   rg      s    !H   B%)#'.2(,04,;e" e" 	e"
 c]e" }e" e" e" *+e" "&e" ,-e" #5/e"Nd  ,= $sCx. "! !!+ !5 ^E4K-H  NR)-9A(9K Z Z> Z+
b+/b
>3^A@6U 6r*   rg   )Ar&   r   r   dataclassesr   r   typingr   r   r   r   logurur	   pydanticr
   pipecat.frames.framesr   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   r   websockets.protocolr   ModuleNotFoundErrorr   errorr   r   r   rI   rJ   r   r-   r1   r7   r   rE   rL   r'   rS   rZ   r\   rg   r)   r*   r+   <module>r     se   4   ( 6 6     > _ _ 8 < 4 / ?,F) , ) 	y 9 
K) 
K!.	 !.HJ J J
( s )T(^,)d3i) [ [ [0c6* c6E  ,FLL;qc"#FLLXY
&qc*
++,s   .C. .D*32D%%D*