
    qi                      (   d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 	 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@  e         G d deEe      ZFe G d de'             ZG G d de-      ZHy# eA$ r7ZB ej                  deB         ej                  d        eDdeB       dZB[Bww xY w)z%Speechmatics STT service integration.    N)	dataclassfield)Enum)AnyAsyncGeneratorClassVar)load_dotenv)logger)	BaseModel)version)BotStartedSpeakingFrameBotStoppedSpeakingFrameCancelFrameEndFrame
ErrorFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameUserStartedSpeakingFrameUserStoppedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)	NOT_GIVENSTTSettings	_NotGiven_warn_deprecated_param)SPEECHMATICS_TTFS_P99)
STTService)Languageresolve_language)
traced_stt)AdditionalVocabEntryAgentClientMessageTypeAgentServerMessageTypeAudioEncodingEndOfUtteranceModeOperatingPointSpeakerFocusConfigSpeakerFocusModeSpeakerIdentifierSpeechSegmentConfigVoiceAgentClientVoiceAgentConfigVoiceAgentConfigPresetzException: zQIn order to use Speechmatics, you need to `pip install pipecat-ai[speechmatics]`.zMissing module: c                        e Zd ZdZdZdZdZdZy)TurnDetectionModea  Endpoint and turn detection handling mode.

    How the STT engine handles the endpointing of speech. If using Pipecat's built-in endpointing,
    then use `TurnDetectionMode.EXTERNAL` (default).

    To use the STT engine's built-in endpointing, then use `TurnDetectionMode.ADAPTIVE` for simple
    voice activity detection or `TurnDetectionMode.SMART_TURN` for more advanced ML-based
    endpointing.
    fixedexternaladaptive
smart_turnN)__name__
__module____qualname____doc__FIXEDEXTERNALADAPTIVE
SMART_TURN     S/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/speechmatics/stt.pyr1   r1   E   s     EHHJr?   r1   c                      e Zd ZU dZ ed       Zeez  ed<    ed       Z	e
ez  ed<    ed       Zeez  ed<    ed	       Zeez  ed
<    ed       Zee   ez  ed<    ed       Zee   ez  ed<    ed       Zeez  ed<    ed       Zee   ez  ed<    ed       Zee   ez  ed<    ed       Zeez  ed<    ed       Zeez  ed<    ed       Zeez  ed<    ed       Zeez  ed<    ed       Zeeef   ez  ed<    ed       Ze ez  ed <    ed!       Z!e ez  ed"<    ed#       Z"e ez  ed$<    ed%       Z#eez  ed&<    ed'       Z$e%ez  ed(<    ed)       Z&e ez  ed*<    ed+       Z'eeef   ez  ed,<    e(h d-      Z)e*e(e      ed.<    e(dd
h      Z+e*e(e      ed/<   y0)1SpeechmaticsSTTSettingsa  Settings for SpeechmaticsSTTService.

    See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field.

    Parameters:
        domain: Domain for Speechmatics API.
        turn_detection_mode: Endpoint handling mode.
        speaker_active_format: Formatter for active speaker ID.
        speaker_passive_format: Formatter for passive speaker ID.
        focus_speakers: List of speaker IDs to focus on.
        ignore_speakers: List of speaker IDs to ignore.
        focus_mode: Speaker focus mode for diarization.
        known_speakers: List of known speaker labels and identifiers.
        additional_vocab: List of additional vocabulary entries.
        operating_point: Operating point for accuracy vs. latency.
        max_delay: Maximum delay in seconds for transcription.
        end_of_utterance_silence_trigger: Maximum delay for end of utterance trigger.
        end_of_utterance_max_delay: Maximum delay for end of utterance.
        punctuation_overrides: Punctuation overrides.
        include_partials: Include partial segment fragments.
        split_sentences: Emit finalized sentences mid-turn.
        enable_diarization: Enable speaker diarization.
        speaker_sensitivity: Diarization sensitivity.
        max_speakers: Maximum number of speakers to detect.
        prefer_current_speaker: Prefer current speaker ID.
        extra_params: Extra parameters for the STT engine.
    c                      t         S Nr   r>   r?   r@   <lambda>z SpeechmaticsSTTSettings.<lambda>t   s    I r?   )default_factorydomainc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>u   s    W` r?   turn_detection_modec                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>v       9 r?   speaker_active_formatc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>w   s    I r?   speaker_passive_formatc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>x   s    ) r?   focus_speakersc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>y   rL   r?   ignore_speakersc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>z       Y r?   
focus_modec                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>{       Xa r?   known_speakersc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>}   s    	 r?   additional_vocabc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    PY r?   operating_pointc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s     r?   	max_delayc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   rX   r?    end_of_utterance_silence_triggerc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    R[ r?   end_of_utterance_max_delayc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    V_ r?   punctuation_overridesc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    y r?   include_partialsc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    i r?   split_sentencesc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s     r?   enable_diarizationc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   rL   r?   speaker_sensitivityc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   s    ) r?   max_speakersc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   rU   r?   prefer_current_speakerc                      t         S rD   rE   r>   r?   r@   rF   z SpeechmaticsSTTSettings.<lambda>   rU   r?   extra_params>   rV   rQ   rS   
HOT_FIELDSLOCAL_FIELDSN),r6   r7   r8   r9   r   rH   strr   __annotations__rJ   r1   rM   rO   rQ   listrS   rV   r*   rY   r+   r[   r#   r]   r(   r_   floatra   rc   re   dictr   rg   boolri   rk   rm   ro   intrq   rs   	frozensetrt   r   ru   r>   r?   r@   rB   rB   V   s>   8 $4EFFC)OF9>O`9a*Y6a-2CT-U3?U.3DU.VC)OV,1BS,TNDI	)T-2CT-UOT#Y*U/4EV/WJ 9,W:?Pa:bND*+i7b?D)@d/09<  38HY2ZO^i/Z#(9J#KIuy K:?Pa:b$ei&7b49J[4\	 1\8=N_8`4S>I5`).?P)QdY&Q(->O(POTI%P+0AR+Sy(S-2CT-U*U$):K$LL#	/L/4EV/WD9,W/4EV/WL$sCx.9,W ,5	
,J3(  .7#$	
.L(9S>* r?   rB   c                       e Zd ZU dZeZeed<   eZeZe	Z	e
Z
eZeZeZ G d de      Z G d de      Zdddej"                  ddded	d
edz  dedz  dedz  dededz  dededz  dedz  f fdZdef fdZdedeeef   f fdZdef fdZdef fdZ d9dZ!d9dZ"d9dZ#dede$fdZ%deddfdZ&deeef   ddfd Z'deeef   ddfd!Z(deeef   ddfd"Z)deeef   ddfd#Z*deeef   ddfd$Z+deeef   ddfd%Z,de-d&e.f fd'Z/d:d(e0eeef      d)eddfd*Z1de2ez  d+eddfd,Z3defd-Z4e5d.ed/ed0e6fd1       Z7d2e8de9e-df   fd3Z:d0e6defd4Z;d5ed6e6dedz  fd7Z<d+ededdfd8Z= xZ>S );SpeechmaticsSTTServicea$  Speechmatics STT service implementation.

    This service provides real-time speech-to-text transcription using the Speechmatics API.
    It supports partial and final transcriptions, multiple languages, various audio formats,
    and speaker diarization.

    Event handlers available (in addition to STTService events):

    - on_speakers_result(service, speakers): Speaker diarization results received

    Example::

        @stt.event_handler("on_speakers_result")
        async def on_speakers_result(service, speakers):
            ...
    	_settingsc                      e Zd ZU dZdZedz  ed<   ej                  Z	eez  ed<   e
j                  Ze
ed<   dZedz  ed<   dZedz  ed<   g Zee   ed<   g Zee   ed	<   ej&                  Zeed
<   g Zee   ed<   g Zee   ed<   ej4                  Zeed<   dZedz  ed<   dZedz  ed<   dZ edz  ed<   dZ!edz  ed<   dZ"e#dz  ed<   dZ$e%dz  ed<   dZ&e%dz  ed<   dZ'e%dz  ed<   dZ(edz  ed<   dZ)e*dz  ed<   dZ+e%dz  ed<   dZ,e#dz  ed<   y)"SpeechmaticsSTTService.InputParamsa  Configuration parameters for Speechmatics STT service.

        Parameters:
            domain: Domain for Speechmatics API. Defaults to None.

            language: Language code for transcription. Defaults to `Language.EN`.

            turn_detection_mode: Endpoint handling, one of `TurnDetectionMode.FIXED`,
                `TurnDetectionMode.EXTERNAL`, `TurnDetectionMode.ADAPTIVE` and
                `TurnDetectionMode.SMART_TURN`. Defaults to `TurnDetectionMode.EXTERNAL`.

            speaker_active_format: Formatter for active speaker ID. This formatter is used to format
                the text output for individual speakers and ensures that the context is clear for
                language models further down the pipeline. The attributes `text` and `speaker_id` are
                available. The system instructions for the language model may need to include any
                necessary instructions to handle the formatting.
                Example: `@{speaker_id}: {text}`. Defaults to None.

            speaker_passive_format: Formatter for passive speaker ID. As with the
                speaker_active_format, the attributes `text` and `speaker_id` are available.
                Example: `@{speaker_id} [background]: {text}`. Defaults to None.

            focus_speakers: List of speaker IDs to focus on. When enabled, only these speakers are
                emitted as finalized frames and other speakers are considered passive. Words from
                other speakers are still processed, but only emitted when a focussed speaker has
                also said new words. A list of labels (e.g. `S1`, `S2`) or identifiers of known
                speakers (e.g. `speaker_1`, `speaker_2`) can be used.
                Defaults to [].

            ignore_speakers: List of speaker IDs to ignore. When enabled, these speakers are
                excluded from the transcription and their words are not processed. Their speech
                will not trigger any VAD or end of utterance detection. By default, any speaker
                with a label starting and ending with double underscores will be excluded (e.g.
                `__ASSISTANT__`).
                Defaults to [].

            focus_mode: Speaker focus mode for diarization. When set to `SpeakerFocusMode.RETAIN`,
                the STT engine will retain words spoken by other speakers (not listed in `ignore_speakers`)
                and process them as passive speaker frames. When set to `SpeakerFocusMode.IGNORE`,
                the STT engine will ignore words spoken by other speakers and they will not be processed.
                Defaults to `SpeakerFocusMode.RETAIN`.

            known_speakers: List of known speaker labels and identifiers. If you supply a list of
                labels and identifiers for speakers, then the STT engine will use them to attribute
                any spoken words to that speaker. This is useful when you want to attribute words
                to a specific speaker, such as the assistant or a specific user. Labels and identifiers
                can be obtained from a running STT session and then used in subsequent sessions.
                Identifiers are unique to each Speechmatics account and cannot be used across accounts.
                Refer to our examples on the format of the known_speakers parameter.
                Defaults to [].

            additional_vocab: List of additional vocabulary entries. If you supply a list of
                additional vocabulary entries, the this will increase the weight of the words in the
                vocabulary and help the STT engine to better transcribe the words.
                Defaults to [].

            audio_encoding: Audio encoding format. Defaults to AudioEncoding.PCM_S16LE.

            operating_point: Operating point for transcription accuracy vs. latency tradeoff. It is
                recommended to use OperatingPoint.ENHANCED for most use cases. Default to enhanced.

            max_delay: Maximum delay in seconds for transcription. This forces the STT engine to
                speed up the processing of transcribed words and reduces the interval between partial
                and final results. Lower values can have an impact on accuracy.

            end_of_utterance_silence_trigger: Maximum delay in seconds for end of utterance trigger.
                The delay is used to wait for any further transcribed words before emitting the final
                word frames. The value must be lower than max_delay.

            end_of_utterance_max_delay: Maximum delay in seconds for end of utterance delay.
                The delay is used to wait for any further transcribed words before emitting the final
                word frames. The value must be greater than end_of_utterance_silence_trigger.

            punctuation_overrides: Punctuation overrides. This allows you to override the punctuation
                in the STT engine. This is useful for languages that use different punctuation
                than English. See documentation for more information.

            include_partials: Include partial segment fragments (words) in the output of
                AddPartialSegment messages. Partial fragments from the STT will always be used for
                speaker activity detection. This setting is used only for the formatted text output
                of individual segments.

            split_sentences: Emit finalized sentences mid-turn. When enabled, as soon as a sentence
                is finalized, it will be emitted as a final segment. This is useful for applications
                that need to process sentences as they are finalized. Defaults to False.

            enable_diarization: Enable speaker diarization. When enabled, the STT engine will
                determine and attribute words to unique speakers. The speaker_sensitivity
                parameter can be used to adjust the sensitivity of diarization.

            speaker_sensitivity: Diarization sensitivity. A higher value increases the sensitivity
                of diarization and helps when two or more speakers have similar voices.

            max_speakers: Maximum number of speakers to detect. This forces the STT engine to cluster
                words into a fixed number of speakers. It should not be used to limit the number of
                speakers, unless it is clear that there will only be a known number of speakers.

            prefer_current_speaker: Prefer current speaker ID. When set to true, groups of words close
                together are given extra weight to be identified as the same speaker.

            extra_params: Extra parameters to pass to the STT engine. This is a dictionary of
                additional parameters that can be used to configure the STT engine.
                Default to None.

        NrH   languagerJ   rM   rO   rQ   rS   rV   rY   r[   audio_encodingr]   r_   ra   rc   re   rg   ri   rk   rm   ro   rq   rs   )-r6   r7   r8   r9   rH   rv   rw   r    ENr   r1   r;   rJ   rM   rO   rQ   rx   rS   r*   RETAINrV   rY   r+   r[   r#   r&   	PCM_S16LEr   r]   r(   r_   ry   ra   rc   re   rz   rg   r{   ri   rk   rm   ro   r|   rq   rs   r>   r?   r@   InputParamsr      sz   h	V "d
!#+;;(S.. 2C1K1K.K -1sTz0-1d
1 %'S	&%'c''7'>'>
$>24./4 8:$349 )6(?(?? 26$.5"&	54<&9=(%$,=37"EDL7-1td{1(,$+,'++ +/D4K.,0UT\0#'cDj'.2t2 %)dTk(r?   r   c                   \    e Zd ZU dZg Zee   ed<   g Zee   ed<   e	j                  Ze	ed<   y)#SpeechmaticsSTTService.UpdateParamsaC  Update parameters for Speechmatics STT service.

        .. deprecated:: 0.0.104
            Use ``SpeechmaticsSTTSettings`` with ``STTUpdateSettingsFrame`` instead.

        Parameters:
            focus_speakers: List of speaker IDs to focus on. When enabled, only these speakers are
                emitted as finalized frames and other speakers are considered passive. Words from
                other speakers are still processed, but only emitted when a focussed speaker has
                also said new words. A list of labels (e.g. `S1`, `S2`) or identifiers of known
                speakers (e.g. `speaker_1`, `speaker_2`) can be used.
                Defaults to [].

            ignore_speakers: List of speaker IDs to ignore. When enabled, these speakers are
                excluded from the transcription and their words are not processed. Their speech
                will not trigger any VAD or end of utterance detection. By default, any speaker
                with a label starting and ending with double underscores will be excluded (e.g.
                `__ASSISTANT__`).
                Defaults to [].

            focus_mode: Speaker focus mode for diarization. When set to `SpeakerFocusMode.RETAIN`,
                the STT engine will retain words spoken by other speakers (not listed in `ignore_speakers`)
                and process them as passive speaker frames. When set to `SpeakerFocusMode.IGNORE`,
                the STT engine will ignore words spoken by other speakers and they will not be processed.
                Defaults to `SpeakerFocusMode.RETAIN`.
        rQ   rS   rV   N)r6   r7   r8   r9   rQ   rx   rv   rw   rS   r*   r   rV   r>   r?   r@   UpdateParamsr   V  s6    	6 %'S	&%'c''7'>'>
$>r?   r   NT)api_keybase_urlsample_rateencodingparamsshould_interruptsettingsttfs_p99_latencyr   r   r   r   r   r   r   r   c                   |xs t        j                  d      | _        |xs t        j                  d      xs d| _        | j                  st	        d      | j                  st	        d      || _        |xs t        j                         }
| j                  |	|
       t        d$i dddt        j                  d	dd
t        j                  dddddg dg dt        j                  dg dg dddddddddddddddddddddddd}|t!        dt               |s|
j"                  |_        |
j$                  |_        |
j&                  |_        |
j(                  }||
j*                  rd nd}||_        |
j,                  xs ||_        |
j.                  |_        |
j0                  |_        |
j2                  |_        |
j4                  |_        |
j6                  |_        |
j8                  }|
j:                  |_        |
j<                  |_        |
j>                  |_        |
j@                  |_         |
jB                  |_!        |
jD                  |_"        |
jF                  |_#        |
j*                  |_        |
jH                  |_$        |
jJ                  |_%        |
jL                  |_&        |
jN                  |_'        ||jQ                  |       d| _)        || _*        | jW                  |      | _,        | jX                  j:                  jZ                  |_.        t_        |   d$|||d!|	 tc        jd                         | _3        | jX                  jh                  tj        jl                  tj        j                  fv| _7        tc        jd                         | _8        d| _9        d"| _:        d"| _;        |j*                  r| jy                  d#       yy)%a  Initialize the Speechmatics STT service.

        Args:
            api_key: Speechmatics API key for authentication. Uses environment variable
                `SPEECHMATICS_API_KEY` if not provided.
            base_url: Base URL for Speechmatics API. Uses environment variable `SPEECHMATICS_RT_URL`
                or defaults to `wss://eu2.rt.speechmatics.com/v2`.
            sample_rate: Optional audio sample rate in Hz.
            encoding: Audio encoding format. Defaults to ``AudioEncoding.PCM_S16LE``.
            params: Input parameters for the service.

                .. deprecated:: 0.0.105
                    Use ``settings=SpeechmaticsSTTSettings(...)`` instead.

            should_interrupt: Determine whether the bot should be interrupted when Speechmatics turn_detection_mode is configured to detect user speech.
            settings: Runtime-updatable settings. When provided alongside deprecated
                ``params``, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to STTService.
        SPEECHMATICS_API_KEYSPEECHMATICS_RT_URLz wss://eu2.rt.speechmatics.com/v2zMissing Speechmatics API keyzMissing Speechmatics base URLmodelNr   rH   rJ   rM   z{text}rO   rQ   rS   rV   rY   r[   r]   r_   ra   rc   re   rg   ri   rk   rm   ro   rq   rs   r   z@{speaker_id}: {text})r   r   r   Fon_speakers_resultr>   )=osgetenv_api_key	_base_url
ValueError_should_interruptr   r   _check_deprecated_argsrB   r    r   r1   r;   r*   r   r   r   rH   rJ   rM   rk   rO   rQ   rS   rV   rY   r[   r   r]   r_   ra   rc   re   rg   ri   rm   ro   rq   rs   apply_update_client_audio_encoding_build_config_configvaluer   super__init__asyncioQueue_outbound_framesend_of_utterance_moder'   r:   _enable_vad_stt_msg_queue_stt_msg_task_is_speaking_bot_speaking_register_event_handler)selfr   r   r   r   r   r   r   r   kwargs_paramsdefault_settingsrM   	__class__s                r@   r   zSpeechmaticsSTTService.__init__v  s.   F %I		2H(I^		"78^<^ 	
 }};<<~~<==!1 @2>>@##FG4 3 

[[
 
 !2 : :	

 #+
 $,
 
 
 (..
 
  
 !
 
 .2
 (,
  #'!
" "#
$ !%
&  $'
( !%)
* +
, $(-
. /
: "8-DE,3,<,< )*1.. '7>7R7R 4(/(E(E%(03:3M3M/S[ * :O 622K6K !7 3:2H2H /3:3J3J 0.5.@.@ +292H2H /4;4L4L 1"113:3J3J 0-4->-> *<< !A ?F>`>` ;9@9V9V 64;4L4L 13:3J3J 06=6P6P 37>7R7R 4070D0D -:A:X:X 7070D0D - ))(3 15')-););<L)M!%!=!=!C!C 	
#-%	
 		
 7>mmo "&!C!C$$''L
 "
 >E]]_26 #(#( ..(()=> /r?   framec                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z#Called when the new session starts.N)r   start_connectr   r   r   s     r@   r   zSpeechmaticsSTTService.start  s1     gmE"""mmo 	#   848688deltareturnc                   K   t         |   |       d{   }|s|S t        j                  t        j                  z  }t        |j                         |z
        }|rzt        j                  |  d|j                                 | j                  | j                        | _        | j                          d{    | j                          d{    |S |j                         t        j                  z  r3t        j                  |  d|j                                 | j                  j                  r| j                  j                  | j                  j                   _        | j                  j"                  | j                  j                   _        | j                  j$                  | j                  j                   _        | j&                  rZ| j&                  j)                  | j                  j                          |S t        j                  |  d|j                          d       |S |j                         t        j                  z  r(t        j                  |  d|j                                 |S 7 b7 7 w)u  Apply settings delta, reconnecting only when necessary.

        Fields are classified into three categories (see
        ``SpeechmaticsSTTSettings``):

        * **HOT_FIELDS** – diarization speaker settings that can be pushed
          to a live Speechmatics connection without reconnecting.
        * **LOCAL_FIELDS** – formatting templates evaluated locally; no
          reconnect or API call needed.
        * Everything else – baked into ``VoiceAgentConfig`` at connection
          time and therefore require a full disconnect / reconnect.

        Args:
            delta: A settings delta.

        Returns:
            Dict mapping changed field names to their previous values.
        Nz% settings update requires reconnect: z applying hot settings update: z3 hot settings updated but diarization not enabled: z. ignoring.z4 local settings update, no special action required: )r   _update_settingsrB   rt   ru   r{   keysr
   debugr   r   r   _disconnectr   rk   rQ   speaker_configrS   rV   r   update_diarization_config)r   r   changedno_reconnectneeds_reconnectr   s        r@   r   z'SpeechmaticsSTTService._update_settings  s    & 077N.99<S<`<``w||~<=LLD6!Fw||~FVWX  --dnn=DL""$$$--/!!. - \\^5@@@LLD6!@@PQR||..=A^^=Z=Z++:>Bnn>\>\++;9=9R9R++6<<LL::4<<;V;VW  fOPWP\P\P^O__jk  \\^5BBBLL&LW\\^L\] K 8 %!s5   JI8B"J9I;:JI>F'J;J>Jc                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zCalled when the session ends.N)r   stopr   r   s     r@   r   zSpeechmaticsSTTService.stopV  s4     gl5!!!    	" r   c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z%Called when the session is cancelled.N)r   cancelr   r   s     r@   r   zSpeechmaticsSTTService.cancel[  s4     gnU###    	$ r   c                     K   t        j                    d        j                   j                  _        t	         j
                   j                  dt                 j                         _        dt        t        t        f   f fd} j                  j                  t        j                  |        j                  j                  t        j                  |        j                   rT j                  j                  t        j"                  |        j                  j                  t        j$                  |        j                  j&                  r* j                  j                  t        j(                  |        j                  j                  t        j*                  |        j                  j                  t        j,                  |        j                  j                  t        j.                  |        j                  j                  t        j0                  |        j                  j                  t        j2                  |       	  j                  j5                          d{    t        j                    d        j:                  s% j=                   j?                                _        yy7 N# t6        $ r0}d _         j9                  d| |	       d{  7   Y d}~hd}~ww xY ww)
zConnect to the STT service.

        - Create STT client
        - Register handlers for messages
        - Connect to the client
        - Start message processing task
        z' connecting to Speechmatics STT servicezpipecat/)r   urlappconfigmessagec                 <    j                   j                  |        y rD   )r   
put_nowait)r   r   s    r@   add_messagez4SpeechmaticsSTTService._connect.<locals>.add_messagew  s    **73r?   Nz
 connectedz!Error connecting to STT service: 	error_msg	exception) r
   r   r   r   r-   r   r   pipecat_versionr   rz   rv   r   onr%   ADD_PARTIAL_SEGMENTADD_SEGMENTr   START_OF_TURNEND_OF_TURNrk   SPEAKERS_RESULTERRORWARNINGINFOEND_OF_TURN_PREDICTIONEND_OF_UTTERANCEconnect	Exception
push_errorr   create_task_process_stt_messages)r   r   es   `  r@   r   zSpeechmaticsSTTService._connect`  s&     	vDEF $(#3#3  *:MM?,-.<<	*
	4c3h 	4 	.BBKP.::KH LLOO2@@+NLLOO2>>L <<**LLOO2BBKP 	.44kB.66D.33[A.EE{S.??M	b,,&&(((LLD6,- !!!%!1!1$2L2L2N!OD " ) 	bDL//.OPQs,S_`/aaa	bsN   IK>K 1K 2K 2K> K 	K; K6+K.,K61K>6K;;K>c                 `  K   | j                   r*| j                  | j                          d{    d| _         t        j                  |  d       	 | j                  r"| j                  j                          d{    d| _        | j                  d       d{    y7 s7 '# t        j                  $ r t        j                  |  d       Y Rt        $ r)}| j                  d| |       d{  7   Y d}~~d}~ww xY w7 j# d| _        | j                  d       d{  7   w xY ww)zDisconnect from the STT service.

        - Cancel message processing task
        - Disconnect the client
        - Emit on_disconnected event handler for clients
        Nz, disconnecting from Speechmatics STT servicez5 timeout while closing Speechmatics client connectionz#Error closing Speechmatics client: r   on_disconnected)r   cancel_taskr
   r   r   
disconnectr   TimeoutErrorwarningr   r   _call_event_handler)r   r   s     r@   r   z"SpeechmaticsSTTService._disconnect  s     ""4#5#5666!%D 	vIJK		>||ll--///  DL**+<=== 7 0## 	[NNdV#XYZ 	d//.QRSQT,Uab/ccc	d >  DL**+<===s   +D.B!#D.)B% ;B#<B%  D.DD.#B% %+DD DC>3C64C>9D >DD D.D+$D'%D++D.c                    K   	 	 | j                   j                          d{   }| j                  |       d{    <7 7 # t        j                  $ r Y yw xY ww)zProcess messages from the STT client.

        Messages from the STT client are processed in a separate task to avoid blocking the main
        thread. They are handled in strict order in which they are received.
        N)r   get_handle_messager   CancelledErrorr   r   s     r@   r   z,SpeechmaticsSTTService._process_stt_messages  s\     	 $ 3 3 7 7 99**7333 93%% 		s<   AA AA AA A AAAAc                    |}t        j                  |j                  j                        }| j                  |_        |j                  }| j                  |      |_        |j                  |j                  nd|_        | j                  |j                  |      |_
        t        |j                  |j                  ng |j                  |j                  ng |j                  |j                  nt        j                         |_        |j$                  |j$                  ng |_        |j&                  |j&                  ng |_        dD ]  }t)        ||      }|t+        |||         t-        |j.                  t0              r<|j.                  j3                         D ]  \  }}t5        ||      st+        |||       ! |j6                  |j6                  nd}	t9        |	xs d      |_        |S )a8  Build a ``VoiceAgentConfig`` from the given settings.

        Used both at init time (with explicit settings, before
        ``super().__init__`` has run) and before reconnecting so the
        connection always reflects the latest settings.

        Args:
            settings: Settings to build from.
        N)rQ   rS   rV   )r]   r_   ra   rc   re   rg   ri   rk   rm   ro   rq   F)emit_sentences)r/   loadrJ   r   r   r   r   "_language_to_speechmatics_languagerH   _locale_to_speechmatics_localeoutput_localer)   rQ   rS   rV   r*   r   r   rY   r[   getattrsetattr
isinstancers   rz   itemshasattrri   r,   speech_segment_config)
r   r   sr   r   paramvalkeyr   splits
             r@   r   z$SpeechmaticsSTTService._build_config  s     (,,Q-B-B-H-HI !% 4 4 ::AA(K$%HH$8d#BB6??T\] !3/0/?/?/K1++QS121B1B1NA--TV'(||'?q||EUE\E\!

 564D4D4P 0 0VX 9:8J8J8V!"4"4\^
 	,E !U#Cs+	,$ annd+nn224 0
U63'FC/0
 &'%6%6%B!!':%.SX'Y$r?   c                    t        j                         5  t        j                  d       t        j                  dt               ddd       | j
                  j                  st        d      |j                  %|j                  | j
                  j                  _        |j                  %|j                  | j
                  j                  _
        |j                  %|j                  | j
                  j                  _        | j                  r0| j                  j                  | j
                  j                         yy# 1 sw Y   xY w)a  Updates the speaker configuration.

        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame`` with
            ``SpeechmaticsSTTSettings(...)`` instead.

        This can update the speakers to listen to or ignore during an in-flight
        transcription. Only available if diarization is enabled.

        Args:
            params: Update parameters for the service.
        alwayszdupdate_params() is deprecated. Use STTUpdateSettingsFrame with SpeechmaticsSTTSettings(...) instead.NzDiarization is not enabled)warningscatch_warningssimplefilterwarnDeprecationWarningr   rk   r   rQ   r   rS   rV   r   r   )r   r   s     r@   update_paramsz$SpeechmaticsSTTService.update_params  s      $$& 	!!(+MM8"	 ||..9::   ,9?9N9NDLL''6!!-:@:P:PDLL''7(5;5F5FDLL''2 <<LL224<<3N3NO )	 	s   0D>>Er   c                 J  K   |j                  dd      }|xt        j                  k(  r | j                  |       d{    yxt        j                  k(  r | j                  |       d{    yxt        j                  k(  r | j                  |       d{    yxt        j                  k(  r | j                  |       d{    yt        j                  k(  r| j                  |       d{    y	 t        j                  |  d| d|        y7 7 7 7 V7 ,w)z%Handle a message from the STT client.r    N z -> )r   r%   r   _handle_partial_segmentr   _handle_segmentr   _handle_start_of_turnr   _handle_end_of_turnr   _handle_speakers_resultr
   r   )r   r   events      r@   r   z&SpeechmaticsSTTService._handle_message0  s     Ir* ;';;227;;;3'33**73335'55009993'33..w777'77227;;;vQugT';< <397;sW   <D#D-D#,D--D#D-D#D	+D#4D!5%D#D#D#D#!D#c                 l   K   |j                  dg       }|r| j                  |       d{    yy7 w)a.  Handle AddPartialSegment events.

        AddPartialSegment events are triggered by Speechmatics STT when it detects a
        partial segment of speech. These events provide the partial transcript for
        the current speaking turn.

        Args:
            message: the message payload.
        segmentsNr   _send_framesr   r   r  s      r@   r  z.SpeechmaticsSTTService._handle_partial_segmentC  s7      *1Z)D##H--- -s   )424c                 p   K   |j                  dg       }|r| j                  |d       d{    yy7 w)a  Handle AddSegment events.

        AddSegment events are triggered by Speechmatics STT when it detects a
        final segment of speech. These events provide the final transcript for
        the current speaking turn.

        Args:
            message: the message payload.
        r  T)	finalizedNr  r  s      r@   r  z&SpeechmaticsSTTService._handle_segmentR  s<      *1Z)D##H#=== =s   +646c                    K   t        j                  |  d       | j                  t               d{    | j                  r| j                          d{    yy7 *7 w)a8  Handle StartOfTurn events.

        When Speechmatics STT detects the start of a new speaking turn, a StartOfTurn
        event is triggered. This triggers bot interruption to stop any ongoing speech
        synthesis and signals the start of user speech detection.

        The service will:
        - Send a BotInterruptionFrame upstream to stop bot speech
        - Send a UserStartedSpeakingFrame downstream to notify other components
        - Start metrics collection for measuring response times

        Args:
            message: the message payload.
        z StartOfTurn receivedN)r
   r   broadcast_framer   r   broadcast_interruptionr   s     r@   r  z,SpeechmaticsSTTService._handle_start_of_turna  sZ      	v234""#;<<<!!--/// " 	=/s!   1A"A#A"A A" A"c                 z   K   t        j                  |  d       | j                  t               d{    y7 w)a  Handle EndOfTurn events.

        EndOfTurn events are triggered by Speechmatics STT when it concludes a
        speaking turn. This occurs either due to silence or reaching the
        end-of-turn confidence thresholds. These events provide the final
        transcript for the completed turn.

        The service will:
        - Stop processing metrics collection
        - Send a UserStoppedSpeakingFrame to signal turn completion

        Args:
            message: the message payload.
        z EndOfTurn receivedN)r
   r   r   r   r   s     r@   r  z*SpeechmaticsSTTService._handle_end_of_turnv  s1      	v012""#;<<<s   1;9;c                 t   K   t        j                  |  d       | j                  d|       d{    y7 w)zHandle SpeakersResult events.

        SpeakersResult events are triggered by Speechmatics STT when it provides
        speaker information for the current speaking turn.

        Args:
            message: the message payload.
        z" speakers result received from STTr   N)r
   r   r   r   s     r@   r  z.SpeechmaticsSTTService._handle_speakers_result  s3      	v?@A&&';WEEEs   .868	directionc                   K   t         |   ||       d{    t        |t              rd| _        nt        |t
              rd| _        t        |t              rj| j                  rt        j                  |  d       y| j                  s8| j                  +| j                          | j                  j                          yyyy7 w)zProcess frames for VAD and metrics handling.

        Args:
            frame: Frame to process.
            direction: Direction of frame processing.
        NTFzD VADUserStoppedSpeakingFrame received but internal VAD is being used)r   process_framer   r   r   r   r   r   r
   r   r   request_finalizefinalize)r   r   r$  r   s      r@   r&  z$SpeechmaticsSTTService.process_frame  s      g#E9555 e45!%D67!&D e89f`a %%$,,*B%%'%%' +C% : 	6s   CCB/Cr  r  c                 $   K   |syg }dt         t        t        f   dt         t        t        f   f fd}|rt        d |D              r j                  r j                          ||D cg c]  }t        di  ||       c}z  }dj                  |D cg c]  }|d   	 c}      } j                  |d|d	   d
          d{    t        j                    d|D cg c]  }|j                   c}        nT||D cg c]  }t        di  ||       c}z  }t        j                    d|D cg c]  }|j                   c}        |D ]  }	 j                  |	       d{     yc c}w c c}w 7 c c}w c c}w c c}w 7 "w)zSend frames to the pipeline.

        Args:
            segments: The segments to send.
            finalized: Whether the data is final or partial.
        Nsegmentr   c           
          | j                  dd      rj                  j                  nj                  j                  j                  di | j                  dd      | j                  dd      | j                  d      | j                  d      d	}|| j                  d      xs d| j                  d      | j                  d      | j                  d
g       dS )N	is_activeT
speaker_idUUtextr  	timestampr   )r-  r/  tslangresults)r/  user_idr0  r   resultr>   )r   r   rM   rO   format)r*  r/  r   s     r@   attr_from_segmentz>SpeechmaticsSTTService._send_frames.<locals>.attr_from_segment  s    ;;{D1 44^^::f	 #*++lD"A#KK3!++k2#KK
3	D ";;|4:$[[5#KK
3!++i4 r?   c              3   @   K   | ]  }|j                  d d        yw)is_eouFN)r   ).0r*  s     r@   	<genexpr>z6SpeechmaticsSTTService._send_frames.<locals>.<genexpr>  s     IWGKK%0Is   |r/  Tr   r   )is_finalr   z finalized transcript: z interim transcript: r>   )rz   rv   r   any_finalize_requestedconfirm_finalizer   join_handle_transcriptionr
   r   r/  r   
push_frame)
r   r  r  framesr7  r*  r  finalized_textfr   s
   `         r@   r  z#SpeechmaticsSTTService._send_frames  s      !	tCH~ 	$sCx. 	2  III,,%%' W_`G)G,=g,FG``F !XX(&CQqy&CDN,,J8O -   
 LLD6!8&9QQ!&&9Q8RST
 W_LS)G,=g,FG F
 LLD6!67O17O6PQR  	)E//%(((	)- a 'D
 :R

 8P )s`   A'F*E3FE8"$FE=F!E?4FFF6F		"F+F,F?Fr   c                   K   	 d|i}|j                  |       t        j                  |  d|        | j                  | j                  j                  |             y# t        $ r}t        |  d|       d}~ww xY ww)a  Send a message to the STT service.

        This sends a message to the STT service via the underlying transport. If the session
        is not running, this will raise an exception. Messages in the wrong format will also
        cause an error.

        Args:
            message: Message to send to the STT service.
            **kwargs: Additional arguments passed to the underlying transport.
        r   z sending message to STT: z error sending message to STT: N)updater
   r   r   r   send_messager   RuntimeError)r   r   r   payloadr   s        r@   rI  z#SpeechmaticsSTTService.send_message  s     	L '*GNN6"LLD6!:7)DET\\66w?@ 	L$'FqcJKK	Ls)   A?AA A?	A<'A77A<<A?c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Speechmatics STT supports generation of metrics.
        Tr>   )r   s    r@   can_generate_metricsz+SpeechmaticsSTTService.can_generate_metrics  s     r?   
transcriptr=  r   c                    K   yw)z'Record transcription event for tracing.Nr>   )r   rN  r=  r   s       r@   rB  z,SpeechmaticsSTTService._handle_transcription   s      	s   audioc                   K   	 | j                   r#| j                   j                  |       d{    d y7 
# t        $ r4}t        d|        | j	                          d{  7   Y d}~yd}~ww xY ww)z/Adds audio to the audio buffer and yields None.NzSpeechmatics error: )r   
send_audior   r   r   )r   rP  r   s      r@   run_sttzSpeechmaticsSTTService.run_stt%  sh     	%||ll--e444J 5 	%3A3788""$$$	%sC   A;*; 9	; A;; 	A8$A3(A+)A3.A;3A88A;c                 x   i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  di t         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  d t         jB                  d!t         jD                  d"i t         jF                  d#t         jH                  d$t         jJ                  d%t         jL                  d&t         jN                  d't         jP                  d(t         jR                  d)t         jT                  d*t         jV                  d+t         jX                  d,t         jZ                  d-t         j\                  d.t         j^                  d/t         j`                  d0t         jb                  d1t         jd                  d2t         jf                  d3t         jh                  d4t         jj                  d5t         jl                  d6i}to        ||d78      }|stq        d9|       |S ):zConvert a Language enum to a Speechmatics language code.

        Args:
            language: The Language enum to convert.

        Returns:
            str: The Speechmatics language code, if found.
        arbaeubebgbnyuecahrcsdanleneoetfafifrgldeelhehihuitidgajakolvltmsmtcmnmrmnnoplptroruskslessvswtathtrugukurvicyT)use_base_codezUnsupported language: )9r    ARBAEUBEBGBNYUECAHRCSDANLr   EOETFAFIFRGLDEELHEHIHUITIDGAJAKOLVLTMSMTCMNMRMNNOPLPTRORUSKSLESSVSWTATHTRUGUKURVICYr!   r   )r   r   BASE_LANGUAGESr5  s       r@   r   z9SpeechmaticsSTTService._language_to_speechmatics_language3  s   7
KK7
KK7
 KK7
 KK	7

 KK7
 KK7
 LL%7
 KK7
 KK7
 KK7
 KK7
 KK7
 KK7
 KK7
 KK7
  KK!7
" KK#7
$ KK%7
& KK'7
( KK)7
* KK+7
, KK-7
. KK/7
0 KK17
2 KK37
4 KK57
6 KK77
8 KK97
: KK;7
< KK=7
> KK?7
@ KKA7
B KKC7
D LL%E7
F KKG7
H KKI7
J KKK7
L KKM7
N KKO7
P KKQ7
R KKS7
T KKU7
V KKW7
X KKY7
Z KK[7
\ KK]7
^ KK_7
` KKa7
b KKc7
d KKe7
f KKg7
h KKKKKKm7
t "(N$O 5hZ@AA r?   	base_codelocalec                    dt         j                  dt         j                  dt         j                  dii}dt	        |      vs||vry|j                  |      j                  |d      }|st        j                  |  d| d|        |S )	a  Convert a Language enum to a Speechmatics language / locale code.

        Args:
            base_code: The language code.
            locale: The Language enum to convert.

        Returns:
            str: The Speechmatics language code, if found.
        ra  zen-GBzen-USzen-AU-Nz Unsupported output locale: z, defaulting to )r    EN_GBEN_USEN_AUrv   r   r
   r   )r   r  r  LOCALESr5  s        r@   r   z5SpeechmaticsSTTService._locale_to_speechmatics_locale  s     
 c&k!Yg%= Y'++FD9 NNdV#?xGWXaWbcd r?   c                     ddt         dt         dz  ddfd}dddd	d
dddddddhdddddg}|D ];  \  }}||v s |||       |j                  |d      '|j                  |||          = y)a  Check arguments for deprecation and update params if necessary.

        This function will show deprecation warnings for deprecated arguments and
        migrate them to the new location in the params object. If the new location
        is None, the argument is not used.

        Args:
            kwargs: Keyword arguments passed to the constructor.
            params: Input parameters for the service.
        Noldnewr   c                     dd l } |j                         5   |j                  d       |r
d|  d| d}nd|  d} |j                  |t               d d d        y # 1 sw Y   y xY w)Nr   r  `z"` is deprecated, use `InputParams.z` is deprecated and not used)r	  r
  r  r  r  )r  r  r	  r   s       r@   _deprecation_warningzKSpeechmaticsSTTService._check_deprecated_args.<locals>._deprecation_warning  sp    (((* ;%%%h/ !#&HQOG !#&BCGg'9:; ; ;s   <AA$)r   r   )language_coder   )rH   rH   )r   N)output_locale_codeN)enable_partialsN)r_   r_   )
chunk_sizeN)r   r   )ra   ra   enable_speaker_diarizationrk   )text_formatrM   )ro   ro   )transcription_configN)
enable_vadN)r   NrD   )rv   r   __setattr__)r   r   r   r  deprecated_argsr  r  s          r@   r   z-SpeechmaticsSTTService._check_deprecated_args  s    		;c 		;d
 		;d 		; %) #(%& 0T)+?@4,* +!
( ( 	9HCf}$S#.::c4(4&&sF3K8		9r?   )r   N)F)?r6   r7   r8   r9   rB   Settingsrw   r1   r&   r(   r*   r)   r+   r#   r   r   r   r   r   rv   r|   r{   ry   r   r   r   rz   r   r   r   r   r   r   r   r   r   r.   r   r  r   r  r  r  r  r  r   r   r&  rx   r  r$   rI  rM  r"   r    rB  bytesr   rS  r   r   r   __classcell__)r   s   @r@   r   r      s(   " 'H&& *!M#N'+)/V)i V)p?y ?F ##"&"/"9"9%)!%37)>[? t[? *	[?
 4Z[?  [? d"[? [? *D0[?  $,[?B 
8,C 8SRUX 8t! !
!+ !
8Pt>2"?&= ?BR ?B%P%P 
%PV=T#s(^ = =&.T#s(^ . .>T#s(^ > >04S> 0d 0*=c3h =D =&
FT#s(^ 
F 
F ( (> (4H)4S#X+? H)D H)]a H)\L*@3*F LRU LZ^ L.d  c T U]  %5 %^E4K-H %K8 K KZ X RUX\R\ B19T 19; 194 19r?   r   )Ir9   r   r   r	  dataclassesr   r   enumr   typingr   r   r   dotenvr	   logurur
   pydanticr   pipecatr   r   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager    r!   (pipecat.utils.tracing.service_decoratorsr"   speechmatics.voicer#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   ModuleNotFoundErrorr   errorr   rv   r1   rB   r   r>   r?   r@   <module>r     s    ,  	  (  0 0    .    > _ _ > 3 F ?,   . T " Fk F FRr9Z r9M  ,FLL;qc"#FLL[ &qc*
++,s   C D2DD