
    qi{                        d Z ddlZddlZddlmZmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ dd	lmZ dd
l m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 	 ddl6m7Z8 ddl9m:Z: de?de*fdZ@e G d de"             ZA G d de(      ZBy# e;$ r7Z< ejz                  de<         ejz                  d        e>de<       dZ<[<ww xY w)zAssemblyAI speech-to-text service implementation.

This module provides integration with AssemblyAI's real-time speech-to-text
WebSocket API for streaming audio transcription.
    N)	dataclassfield)AnyAsyncGeneratorDictListOptional)	urlencode)logger)version)
CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameUserStartedSpeakingFrameUserStoppedSpeakingFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)	NOT_GIVENSTTSettings	_NotGiven_warn_deprecated_param)ASSEMBLYAI_TTFS_P99)WebsocketSTTService)Language)time_now_iso8601)
traced_stt   )AssemblyAIConnectionParamsBaseMessageBeginMessageSpeechStartedMessageTerminationMessageTurnMessage)connect)StatezException: zOIn order to use AssemblyAI, you need to `pip install "pipecat-ai[assemblyai]"`.zMissing module: language_codereturnc                     	 t        | j                               S # t        $ r, t        j                  d|  d       t         j
                  cY S w xY w)a|  Map AssemblyAI language codes to Pipecat Language enum.

    AssemblyAI returns simple language codes like "es", "fr", etc.
    This function maps them to the corresponding Language enum values.

    Args:
        language_code: AssemblyAI language code (e.g., "es", "fr", "de")

    Returns:
        Corresponding Language enum value, defaulting to Language.EN if not found.
    z'Unknown language code from AssemblyAI: z, defaulting to English)r   lower
ValueErrorr   warningEN)r*   s    Q/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/assemblyai/stt.pymap_language_from_assemblyair2   <   sO    ++-.. 5m_D[\	
 {{	s    2AAc                      e Zd ZU dZ ed       Zeez  ed<    ed       Z	e
dz  ez  ed<    ed       Zedz  ez  ed	<    ed
       Ze
dz  ez  ed<    ed       Ze
dz  ez  ed<    ed       Zee   dz  ez  ed<    ed       Zedz  ez  ed<    ed       Zedz  ez  ed<    ed       Zeez  ed<    ed       Zedz  ez  ed<    ed       Zedz  ez  ed<   y)AssemblyAISTTSettingsu  Settings for AssemblyAISTTService.

    Parameters:
        formatted_finals: Whether to enable transcript formatting.
        word_finalization_max_wait_time: Maximum time to wait for word
            finalization in milliseconds.
        end_of_turn_confidence_threshold: Confidence threshold for
            end-of-turn detection.
        min_turn_silence: Minimum silence duration when confident about
            end-of-turn.
        max_turn_silence: Maximum silence duration before forcing
            end-of-turn.
        keyterms_prompt: List of key terms to guide transcription.
        prompt: Optional text prompt to guide the transcription. Only
            used when model is "u3-rt-pro".
        language_detection: Enable automatic language detection.
        format_turns: Whether to format transcript turns.
        speaker_labels: Enable speaker diarization.
        vad_threshold: VAD confidence threshold (0.0–1.0) for classifying
            audio frames as silence. Only applicable to u3-rt-pro.
    c                      t         S Nr        r1   <lambda>zAssemblyAISTTSettings.<lambda>j   s    y r9   )default_factoryformatted_finalsc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>l       	 r9   Nword_finalization_max_wait_timec                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>o   r>   r9    end_of_turn_confidence_thresholdc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>q       Y r9   min_turn_silencec                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>r   rC   r9   max_turn_silencec                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>s   s    R[ r9   keyterms_promptc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>t       9 r9   promptc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>u   s    PY r9   language_detectionc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>v   rJ   r9   format_turnsc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>w       I r9   speaker_labelsc                      t         S r6   r7   r8   r9   r1   r:   zAssemblyAISTTSettings.<lambda>x   rQ   r9   vad_threshold)__name__
__module____qualname____doc__r   r<   boolr   __annotations__r?   intrA   floatrD   rF   rH   r   strrK   rM   rO   rR   rT   r8   r9   r1   r4   r4   R   s.   , */?P)QdY&Q>C)?#S4Z)%;  BG)B$edlY&>  05EV/WcDj9,W/4EV/WcDj9,W49J[4\OT#Y%	1\%*;L%MFC$J"M27HY2Zti/Z%*;L%ML$"M.3DU.VND4K)+V.3DU.VM54<)+Vr9   r4   c                       e Zd ZU dZeZeed<   ddddddddded
d	ed
e	e
   dededede	e   dedede	e   de	e   de	e   f fdZdedefdZdefdZdedeeef   f fdZdef fdZdef fdZdef fdZdedeedf   fd Zded!ef fd"Z e!d#ed$ed
e
fd%       Z"defd&Z# fd'Z$ fd(Z%d) Z&d* Z'd+ Z(d, Z)d-e*eef   de+fd.Z,d-e*eef   fd/Z-d-e.fd0Z/d-e0fd1Z1d-e2fd2Z3 xZ4S )3AssemblyAISTTServicea  AssemblyAI real-time speech-to-text service.

    Provides real-time speech transcription using AssemblyAI's WebSocket API.
    Supports both interim and final transcriptions with configurable parameters
    for audio processing and connection management.
    	_settingsNz$wss://streaming.assemblyai.com/v3/wsi>  	pcm_s16leT)
languageapi_endpoint_base_urlsample_rateencodingconnection_paramsvad_force_turn_endpointshould_interruptspeaker_formatsettingsttfs_p99_latencyapi_keyrb   rc   rd   re   rf   rg   rh   ri   rj   rk   c                   t        dt        j                  ddddddddddd      }|t        dt         d       ||_        |t        dt                |
s|j
                  }|j                  }|j                  |_        |j                  |_	        |j                  |_
        |j                  |_        |j                  |_        |j                  |_        |j                  |_        |j                  |_        |j                   |_        |j"                  |_        |j$                  |_        |j&                  |_        |
|j)                  |
       |j                  dk(  }|s|st+        d|j                   d      |j                  |j                  t+        d	      |j                  t-        j.                  d
       |r| j1                  ||       t3        | h  d|||d| || _        || _        || _        || _        |	| _        || _         tC        jD                         | _#        d| _$        d| _%        d| _&        tO               | _(        d| _)        d| _*        d| _+        y)a}
  Initialize the AssemblyAI STT service.

        Args:
            api_key: AssemblyAI API key for authentication.
            language: Language code for transcription. Defaults to English (Language.EN).

                .. deprecated:: 0.0.105
                    Use ``settings=AssemblyAISTTSettings(language=...)`` instead.

            api_endpoint_base_url: WebSocket endpoint URL. Defaults to AssemblyAI's streaming endpoint.
            sample_rate: Audio sample rate in Hz. Defaults to 16000.
            encoding: Audio encoding format. Defaults to "pcm_s16le".
            connection_params: Connection configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=AssemblyAISTTSettings(...)`` instead.

            vad_force_turn_endpoint: Controls turn detection mode.
                When True (Pipecat mode, default): Forces AssemblyAI to return finals ASAP
                so Pipecat's turn detection (e.g., Smart Turn) decides when the user is done.
                - min_turn_silence defaults to 100ms (user can override)
                - max_turn_silence is ALWAYS set equal to min_turn_silence
                - VAD stop sends ForceEndpoint as ceiling
                - No UserStarted/StoppedSpeakingFrame emitted from STT
                When False (AssemblyAI turn detection mode, u3-rt-pro only): AssemblyAI's model
                controls turn endings using built-in turn detection.
                - Uses AssemblyAI API defaults for all parameters (unless user explicitly sets them)
                - Emits UserStarted/StoppedSpeakingFrame from STT
                - No ForceEndpoint on VAD stop
            should_interrupt: Whether to interrupt the bot when the user starts speaking
                in AssemblyAI turn detection mode (vad_force_turn_endpoint=False). Only applies
                when using AssemblyAI's built-in turn detection. Defaults to True.
            speaker_format: Optional format string for speaker labels when diarization is enabled.
                Use {speaker} for speaker label and {text} for transcript text.
                Example: "<{speaker}>{text}</{speaker}>" or "{speaker}: {text}"
                If None, transcript text is not modified. Defaults to None.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent STTService class.
        z	u3-rt-proTN)modelrb   r<   r?   rA   rD   rF   rH   rK   rM   rO   rR   rT   rb   rf   zAssemblyAI turn detection mode (vad_force_turn_endpoint=False) requires u3-rt-pro for SpeechStarted support. Either set vad_force_turn_endpoint=True for z, or use model='u3-rt-pro'.a  The prompt and keyterms_prompt parameters cannot be used in the same request. Please choose either one or the other based on your use case. When you use keyterms_prompt, your boosted words are appended to the default prompt automatically. Or to boost within prompt: <prompt> + Make sure to boost the words <keyterms> in the audio. For more info go to: https://www.assemblyai.com/docs/streaming/universal-3-proaC  Custom prompt detected. Prompting is a beta feature. We recommend testing with no prompt first, as this will use our optimized default prompt for voice agents. Bad prompts may lead to bad results. If you'd like to create your own prompt, check out our prompting guide at: https://www.assemblyai.com/docs/streaming/prompting)rd   rk   rj   F2   r   r8   ),r4   r   r0   r   rb   rd   re   speech_modelrn   r<   r?   rA   rD   rF   rH   rK   rM   rO   rR   rT   apply_updater.   r   r/   _configure_pipecat_turn_modesuper__init___api_key_api_endpoint_base_url_vad_force_turn_endpoint_should_interrupt_speaker_format	_encodingasyncioEvent_termination_event_received_termination
_connected_receive_task	bytearray_audio_buffer_chunk_size_ms_chunk_size_bytes_user_speaking)selfrl   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   kwargsdefault_settings	is_u3_pro	__class__s                  r1   rt   zAssemblyAISTTService.__init__   s   v 1[[!,0-1!! #
" ":/DjQ(0% ("#68MN/;;,55):)G)G &4E4V4V 1%EE !@ &FF !A 5F4V4V 14E4V4V 13D3T3T 0*;*B*B '6G6Z6Z 30A0N0N -2C2R2R /1B1P1P . ))(3 %**k9	&y44D4J4J3K L,-  "".3C3S3S3_a  "".NNF #--.>	J 	
#-%	
 		
  &;#(?%!1- "")--/%*"!&[ !"#r9   r   c                     |rQ|j                   }|d}|j                  &t        j                  d|j                   d| d       ||_         ||_        yd|_        d|_         y)a  Configure settings for Pipecat turn detection mode.

        When vad_force_turn_endpoint is enabled, force AssemblyAI to return
        finals as fast as possible so Pipecat's smart turn analyzer can decide
        when the user is done speaking. VAD stop is the absolute ceiling.

        u3-rt-pro:
        - min_turn_silence defaults to 100ms (user can override)
        - max_turn_silence is ALWAYS set equal to min_turn_silence
          to avoid double turn detection (AssemblyAI + Pipecat both analyzing)
        - If user sets max_turn_silence, it's ignored with a warning
        - end_of_turn_confidence_threshold: not set (API default)

        universal-streaming-*:
        - end_of_turn_confidence_threshold=0.0 (disable semantic turn detection)
        - min_turn_silence=160
        - max_turn_silence: not set (API default)

        Args:
            settings: The settings to configure in place.
            is_u3_pro: Whether using u3-rt-pro model.
        Nd   zYour max_turn_silence value (zYms) will be OVERRIDDEN in Pipecat mode (vad_force_turn_endpoint=True). It will be set to zms (matching min_turn_silence) and SENT to AssemblyAI to avoid double turn detection. To use your max_turn_silence as-is, switch to AssemblyAI turn detection mode (vad_force_turn_endpoint=False).g      ?   )rD   rF   r   r/   rA   )r   rj   r   min_silences       r1   rr   z1AssemblyAISTTService._configure_pipecat_turn_mode.  s    . "33K"! ((43H4M4M3N Od"m $`a )4H%(3H% 9<H5(+H%r9   r+   c                      y)zzCheck if the service can generate metrics.

        Returns:
            True if metrics generation is supported.
        Tr8   r   s    r1   can_generate_metricsz)AssemblyAISTTService.can_generate_metrics\  s     r9   deltac                    K   t         |   |       d{   }|s|S | j                          d{    | j                          d{    |S 7 :7  7 
w)zApply a settings delta and reconnect to apply changes.

        Args:
            delta: A settings delta with updated values.

        Returns:
            Dict mapping changed field names to their previous values.
        N)rs   _update_settings_disconnect_connect)r   r   changedr   s      r1   r   z%AssemblyAISTTService._update_settingsd  sZ      077N    mmo 8 	!s1   AAAAA
AAAAframec                    K   t         |   |       d{    t        | j                  | j                  z  dz  dz        | _        | j                          d{    y7 J7 w)zmStart the speech-to-text service.

        Args:
            frame: Start frame to begin processing.
        N   i  )rs   startr[   r   rd   r   r   r   r   r   s     r1   r   zAssemblyAISTTService.startx  s\      gmE"""!$T%8%84;K;K%Ka%ORV%V!Wmmo 	#s"   A%A!AA%A#A%#A%c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)ziStop the speech-to-text service.

        Args:
            frame: End frame to stop processing.
        N)rs   stopr   r   s     r1   r   zAssemblyAISTTService.stop  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zoCancel the speech-to-text service.

        Args:
            frame: Cancel frame to abort processing.
        N)rs   cancelr   r   s     r1   r   zAssemblyAISTTService.cancel  s6      gnU###    	$ r   audioc                  K   | j                   j                  |       | j                  r| j                  j                  t        j
                  u rt        | j                         | j                  k\  rt        | j                   d| j                         }| j                   | j                  d | _         | j                  j                  |       d{    t        | j                         | j                  k\  rd y7 -w)zProcess audio data for speech-to-text conversion.

        Args:
            audio: Raw audio bytes to process.

        Yields:
            None (processing handled via WebSocket messages).
        N)
r   extend
_websocketstater)   OPENlenr   bytessend)r   r   chunks      r1   run_sttzAssemblyAISTTService.run_stt  s      	!!%(??t44

Bd(()T-C-CCd001I43I3IJK%)%7%78N8N8P%Q"oo**5111 d(()T-C-CC
 
 2s   CD C>&D 8D 	directionc                   K   t         |   ||       d{    t        |t              ryt        |t              r| j
                  rz| j                  rn| j                  j                  t        j                  u rH| j                          | j                  j                  t        j                  ddi             d{    | j                          d{    yy7 7  7 
w)zProcess frames for VAD and metrics handling.

        Args:
            frame: Frame to process.
            direction: Direction of frame processing.
        NtypeForceEndpoint)rs   process_frame
isinstancer   r   rw   r   r   r)   r   request_finalizer   jsondumpsstart_processing_metrics)r   r   r   r   s      r1   r   z"AssemblyAISTTService.process_frame  s      g#E9555e89:;--OOOO))UZZ7%%'oo**4::v6O+PQQQ//111 < 	6 R1s4   C#CB&C#>C?C#C!C#C#!C#
transcriptis_finalc                    K   yw)z'Record transcription event for tracing.Nr8   )r   r   r   rb   s       r1   _trace_transcriptionz)AssemblyAISTTService._trace_transcription  s      	s   c                    | j                   }i }| j                  |d<   | j                  |d<   |j                  |j                  |d<   |j                  |j
                  |j                  |j                  |j                  |j                  |j                  |j                  |j                  |j                  d
}|j                         D ]:  \  }}|	t        |t               rt#        |      j%                         ||<   6|||<   < |j&                  "t)        j*                  |j&                        |d<   |rt-        |      }| j.                   d| S | j.                  S )zGBuild WebSocket URL with query parameters using urllib.parse.urlencode.rd   re   rp   )
r<   r?   rA   rD   rF   rK   rM   rO   rR   rT   rH   ?)r`   rd   rz   rn   r<   r?   rA   rD   rF   rK   rM   rO   rR   rT   itemsr   rY   r]   r-   rH   r   r   r
   rv   )r   sparamsoptional_fieldskvquery_strings          r1   _build_ws_urlz"AssemblyAISTTService._build_ws_url  sI   NN!# !% 0 0}!^^z 77%&WWF>" !" 2 2/0/P/P010R0R ! 2 2 ! 2 2hh"#"6"6NN..__
 $))+ 	"DAq}a& #AF1I !F1I	" ((,

13D3D(EF$%$V,L112!L>BB***r9   c                   K   t         |           d{    | j                          d{    | j                  r=| j                  s0| j                  | j                  | j                              | _        yyy7 f7 Pw)znConnect to the AssemblyAI service.

        Establishes websocket connection and starts receive task.
        N)rs   r   _connect_websocketr   r   create_task_receive_task_handler_report_error)r   r   s    r1   r   zAssemblyAISTTService._connect  so     
 g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD $6?	 	!'s    B A<B A>AB >B c                 >  K   t         |           d{    | j                  r| j                  sy	 | j                  j                          d| _        | j                  j                  t        j                  u rt        | j                        dkD  rP| j                  j                  t        | j                               d{    | j                  j                          	 | j                  j                  t        j                  ddi             d{    	 t!        j"                  | j                  j%                         d       d{    | j0                  r*| j3                  | j0                         d{    d| _        | j5                          d{    y7 7 7 7 Z# t         j&                  $ r t)        j*                  d       Y w xY w# t,        $ r)}| j/                  d	| |
       d{  7   Y d}~d}~ww xY w# t,        $ r)}| j/                  d	| |
       d{  7   Y d}~d}~ww xY w7 7 # | j0                  r+| j3                  | j0                         d{  7   d| _        | j5                          d{  7   w xY ww)z}Disconnect from the AssemblyAI service.

        Sends termination message, waits for acknowledgment, and cleans up.
        NFr   r   	Terminateg      @)timeoutz5Timed out waiting for termination message from serverUnknown error occurred: 	error_msg	exception)rs   r   r   r   r}   clearr~   r   r)   r   r   r   r   r   r   r   r{   wait_forwaitTimeoutErrorr   r/   	Exception
push_errorr   cancel_task_disconnect_websocket)r   er   s     r1   r   z AssemblyAISTTService._disconnect  s,    
 g!###doo	/##))+).D&$$

2t))*Q.//..uT5G5G/HIII&&,,.	a//..tzz6;:O/PQQQ`%..t/F/F/K/K/MWZ[[[ !!&&t'9'9:::%)",,...G 	$ J
 R \"// `'^_` ! a//6Nqc4R^_/```a  	Y//.Fqc,JVW/XXX	Y
 ; /	 !!&&t'9'9:::%)",,...s  JF#JBH F&H $3G F(G 2F, F*F, *J>I?JIJ&H (G *F, ,(GG GG 	H#H<G?=HH HH 	IH<1H42H<7I <II JJ+J3I64JJJJc                   K   	 | j                   r'| j                   j                  t        j                  u ryt	        j
                  d       | j                         }| j                  dt                dd}t        ||       d{   | _         d| _
        | j                  d       d{    t	        j
                  |  d	       y7 B7 # t        $ r,}d
| _
        | j                  d| |       d{  7    d}~ww xY ww)z1Establish the websocket connection to AssemblyAI.Nz"Connecting to AssemblyAI WebSocketz$AssemblyAI/1.0 (integration=Pipecat/))Authorizationz
User-Agent)additional_headersTon_connectedz" Connected to AssemblyAI WebSocketFz!Unable to connect to AssemblyAI: r   )r   r   r)   r   r   debugr   ru   pipecat_versionwebsocket_connectr   _call_event_handlerr   r   )r   ws_urlheadersr   s       r1   r   z'AssemblyAISTTService._connect_websocket!  s     	4??#8#8EJJ#FLL=>'')F!% D_EVDWWXYG %6#*% DO #DO**>:::LLD6!CDE
 ; 	#DO//.OPQs,S_`/aaa	sc   D2C DAC C
$C ,C-C 	D
C C 	D C>7C:8C>>DDc                   K   	 | j                   r7t        j                  d       | j                   j                          d{    d| _         d| _        | j                  d       d{    y7 ,# t        $ r)}| j                  d| |       d{  7   Y d}~Wd}~ww xY w7 <# d| _         d| _        | j                  d       d{  7   w xY ww)z-Close the websocket connection to AssemblyAI.z'Disconnecting from AssemblyAI WebSocketNzError closing websocket: r   Fon_disconnected)r   r   r   closer   r   r   r   )r   r   s     r1   r   z*AssemblyAISTTService._disconnect_websocket:  s     		>FGoo++--- #DO#DO**+<=== . 	Z//.Gs,KWX/YYY	Z
 > #DO#DO**+<===sv   C>A1 A/A1 "C)B&*C/A1 1	B#:BBBB( B##B( &C(#CCCCc                 H    | j                   r| j                   S t        d      )zGet the current WebSocket connection.

        Returns:
            The WebSocket connection.

        Raises:
            Exception: If WebSocket is not connected.
        zWebsocket not connected)r   r   r   s    r1   _get_websocketz#AssemblyAISTTService._get_websocketG  s!     ????"122r9   c           	        K   | j                         2 3 d{   }	 t        j                  |      }|j                  d      dk(  r/t	        j
                  |  dt        j                  |d              | j                  |       d{    |7 w7 # t        j                  $ r t	        j                  d|        Y w xY w6 yw)zxReceive and process websocket messages.

        Continuously processes messages from the websocket connection.
        Nr   Turnz RAW JSON from AssemblyAI: r   )indentzReceived non-JSON message: )
r   r   loadsgetr   tracer   _handle_messageJSONDecodeErrorr/   )r   messagedatas      r1   _receive_messagesz&AssemblyAISTTService._receive_messagesT  s     
 "002 	H 	H'Hzz'*88F#v-LLD6)DTZZPT]^E_D`!ab**4000	H 1'' H!<WIFGH 3sU   CCBCCA,B	B
BCCB+C>C CCr   c                    |j                  d      }|dk(  rt        j                  |      S |dk(  rt        j                  |      S |dk(  rt	        j                  |      S |dk(  rt        j                  |      S t        d|       )z6Parse a raw message into the appropriate message type.r   Beginr   SpeechStartedTerminationzUnknown message type: )r   r$   model_validater'   r%   r&   r.   )r   r   msg_types      r1   _parse_messagez#AssemblyAISTTService._parse_messagec  s    ;;v&w..w77--g66('66w??&%44W==5hZ@AAr9   c                 $  K   	 | j                  |      }t        |t              r1t        j                  d|j
                   d|j                   d       yt        |t              r| j                  |       d{    yt        |t              r| j                  |       d{    yt        |t              r| j                  |       d{    yy7 Z7 27 
# t        $ r)}| j                  d| |       d{  7   Y d}~yd}~ww xY ww)z%Handle AssemblyAI WebSocket messages.zSession Begin: z (expires at r   Nr   r   )r   r   r$   r   r   id
expires_atr'   _handle_transcriptionr%   _handle_speech_startedr&   _handle_terminationr   r   )r   r   parsed_messager   s       r1   r   z$AssemblyAISTTService._handle_messager  s     	Y!009N.,7%n&7&7%8nF_F_E``ab NK800@@@N,@A11.AAAN,>?..~>>> @ AA> 	Y//.Fqc,JVW/XXX	Ys   DAC D$C :C;C ?D $C $C%C )D*$C CC DC C C 	D$D=D >DDDDc                    K   | j                   ry| j                          d{    | j                  t               d{    | j                  r| j                          d{    d| _        y7 M7 27 w)u  Handle SpeechStarted event — fast barge-in for AssemblyAI turn detection.

        Broadcasts UserStartedSpeakingFrame to signal the start of user
        speech, then pushes an interruption to cancel any bot audio.
        SpeechStarted fires before any transcript arrives, so the turn
        is cleanly started before any transcription frames are pushed.

        Only applies when using AssemblyAI's built-in turn detection. When using
        Pipecat turn detection, VAD + smart turn analyzer handle interruptions.
        NT)rw   r   broadcast_framer   rx   broadcast_interruptionr   r   r   s     r1   r   z+AssemblyAISTTService._handle_speech_started  sk      ((++---""#;<<<!!--///"	 	.</s3   !A7A1A7 A3#A7$A5%A73A75A7c                    K   d| _         | j                  j                          t        j                  d|j
                   d|j                   d       | j                  t                      d{    y7 w)zHandle termination message.Tz#Session Terminated: Audio Duration=zs, Session Duration=r   N)	r~   r}   setr   infoaudio_duration_secondssession_duration_seconds
push_framer   r  s     r1   r  z(AssemblyAISTTService._handle_termination  sj     %)"##%1'2P2P1Q R  ' @ @AD	
 oohj)))s   A.A80A61A8c           
        K   |j                   syt        j                  }|j                  rb|j                  rV|j                  dk\  rt        |j                        }n1t        j                  d|j                  dd|j                   d       | j                  }|j                   }|j                  rI|j                  }| j                  r1| j                  j                  |j                  |j                         }|j                  xr% | j                  j                   xs |j                  }| j                   r|rt#        |j                        }|r| j%                          t        j&                  |  d| d	       | j)                  t+        ||t-               ||             d{    | j/                  |d
|       d{    | j1                          d{    y| j)                  t3        ||t-               ||             d{    y|r| j)                  t+        ||t-               ||d
             d{    | j/                  |d
|       d{    | j1                          d{    | j5                  t6               d{    d| _        y| j)                  t3        ||t-               ||             d{    y7 !7 	7 7 7 7 |7 f7 K7 w)um  Handle transcription results with two turn detection modes.

        Pipecat turn detection (vad_force_turn_endpoint=True):
            - No UserStarted/StoppedSpeakingFrame from STT
            - end_of_turn → TranscriptionFrame (finalized set by base class
              if this is a ForceEndpoint response)
            - else → InterimTranscriptionFrame

        AssemblyAI turn detection (vad_force_turn_endpoint=False):
            - UserStartedSpeakingFrame on first transcript
            - end_of_turn → TranscriptionFrame + UserStoppedSpeakingFrame
            - else → InterimTranscriptionFrame
        Ngffffff?z#Low language detection confidence (z.2fz) for language 'z', falling back to English)speakertextz Transcript: ""T)	finalizedF)r   r   r0   r*   language_confidencer2   r   r/   _user_idr  ry   formatend_of_turnr`   rO   turn_is_formattedrw   rY   confirm_finalizer   r  r   r   r   stop_processing_metricsr   r  r   r   )r   r   rb   
speaker_idtranscript_textis_final_turnfinalize_confirmeds          r1   r   z*AssemblyAISTTService._handle_transcription  s     !! ;;  W%@%@**c178M8MN9':U:UVY9Z [%%,%:%:$;;UW ]]
!,,?? J##"&"6"6"="=#OO'2D2D #> #
  ++ 
+++Hw/H/H 	 (( %)'*C*C%D"%))+v^O3DAFGoo&'"(*    //xPPP22444oo-'"(*    oo&'"(* "&	 	 	 //xPPP22444 **+CDDD&+#oo-'"(*   [ Q4	 Q4 Es   F%K'K(KKKK.K	K
2K<K=KKK/K0KK5KKKKKKKKKKK)5rU   rV   rW   rX   r4   SettingsrZ   r   r]   r	   r   r[   r"   rY   r\   rt   rr   r   dictr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   r   r   r   r   r#   r   r   r%   r   r&   r  r'   r   __classcell__)r   s   @r1   r_   r_   {   s    %H$$ (,%K #BF(,!%(,48,?f$ f$ 8$	f$
  #f$ f$ f$ $$>?f$ "&f$ f$ !f$ 01f$ #5/f$P,,5J ,,W[ ,,\d ,A d3PS8n ( ! !!+ !5 ^E4K-H &2 2> 2( S D T\  )+s )+V
b(/T2>3HBd38n B BYT#s(^ Y$#4H #(	*1C 	*j; jr9   r_   )CrX   r{   r   dataclassesr   r   typingr   r   r   r   r	   urllib.parser
   logurur   pipecatr   r   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr    modelsr"   r#   r$   r%   r&   r'   websockets.asyncio.clientr(   r   websockets.protocolr)   ModuleNotFoundErrorr   errorr   r]   r2   r4   r_   r8   r9   r1   <module>r2     s      ( < < "  .   > _ _ < < 4 / ? ,F)  , %WK %W %WPR
. R
K  ,FLL;qc"#FLLbc
&qc*
++,s   B< <C82C33C8