
    qim                        d Z ddlZddlZddlmZmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZm Z m!Z! dd
l"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 	 ddl1m2Z3 ddl4m5Z5 e G d de)             Z7 G d de(      Z8dZ9e G d de             Z: G d de&      Z;y# e6$ r dZ3dZ5Y Dw xY w)a$  OpenAI Speech-to-Text service implementations.

Provides two STT services:

- ``OpenAISTTService``: REST-based transcription using the Audio API
  (Whisper / GPT-4o).
- ``OpenAIRealtimeSTTService``: WebSocket-based streaming transcription
  using the Realtime API in transcription-only mode.
    N)	dataclassfield)AnyAsyncGeneratorLiteralOptionalUnion)logger)create_stream_resampler)
CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameUserStartedSpeakingFrameUserStoppedSpeakingFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)	NOT_GIVENSTTSettings	_NotGiven_warn_deprecated_param)OPENAI_REALTIME_TTFS_P99OPENAI_TTFS_P99)WebsocketSTTService)BaseWhisperSTTServiceBaseWhisperSTTSettingsTranscription)Language)time_now_iso8601)
traced_stt)connect)Statec                       e Zd ZdZy)OpenAISTTSettingsz$Settings for the OpenAI STT service.N)__name__
__module____qualname____doc__     M/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/openai/stt.pyr'   r'   :   s    .r-   r'   c                        e Zd ZU dZeZeed<   dddej                  ddde	dde
e   de
e   de
e   de
e   d	e
e   d
e
e   de
e   de
e   f fdZdedefdZ xZS )OpenAISTTServicezOpenAI Speech-to-Text service that generates text from audio.

    Uses OpenAI's transcription API to convert audio to text. Requires an OpenAI API key
    set via the api_key parameter or OPENAI_API_KEY environment variable.
    	_settingsN)modelapi_keybase_urllanguageprompttemperaturesettingsttfs_p99_latencyr2   r3   r4   r5   r6   r7   r8   r9   c                T   |xs t         j                  }
t        d| j                  |
      dd      }|t	        dt        d       ||_        |t	        dt        d       ||_        |t	        dt        d       ||_        ||j                  |       t        | (  d||||d|	 y)	uW  Initialize OpenAI STT service.

        Args:
            model: Model to use — either gpt-4o or Whisper.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAISTTSettings(model=...)`` instead.

            api_key: OpenAI API key. Defaults to None.
            base_url: API base URL. Defaults to None.
            language: Language of the audio input. Defaults to English.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAISTTSettings(language=...)`` instead.

            prompt: Optional text to guide the model's style or continue a previous segment.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAISTTSettings(prompt=...)`` instead.

            temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAISTTSettings(temperature=...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to BaseWhisperSTTService.
        gpt-4o-transcribeN)r2   r5   r6   r7   r2   r6   r7   )r3   r4   r8   r9   r,   )r!   ENr'   language_to_service_languager   r2   r6   r7   apply_updatesuper__init__)selfr2   r3   r4   r5   r6   r7   r8   r9   kwargs	_languagedefault_settings	__class__s               r.   r@   zOpenAISTTService.__init__K   s    Z +	,%66yA	
 "7,=wG%*""8->I&,#""=2C]S+6(
 ))(3 	
%-		

 	
r-   audioreturnc                 6  K   | j                   j                  J d|df| j                   j                  | j                   j                  d}| j                  r)| j                   j                  dv rd|d<   dg|d<   nd	|d<   | j                   j                  | j                   j                  |d
<   | j                   j
                  | j                   j
                  |d<    | j                  j                  j                  j                  di | d {   S 7 w)Nz	audio.wavz	audio/wav)filer2   r5   )r;   zgpt-4o-mini-transcribejsonresponse_formatlogprobsincludeverbose_jsonr6   r7   r,   )
r1   r5   r2   _include_prob_metricsr6   r7   _clientrF   transcriptionscreate)rA   rF   rB   s      r.   _transcribezOpenAISTTService._transcribe   s     ~~&&222 !%5^^))//
 %%~~##'VV,2()%/Ly! -;()>>  ,#~~44F8>>%%1$(NN$>$>F=!=T\\''66==GGGGGs   DDDD)r(   r)   r*   r+   r'   Settings__annotations__r!   r<   r   r   strfloatr@   bytesr    rS   __classcell__rE   s   @r.   r0   r0   A   s     !H  
  $!%"&'/{{ $'+04,;L
 }L
 #	L

 3-L
 8$L
 L
 e_L
 ,-L
 #5/L
\Hu H Hr-   r0   i]  c                   <    e Zd ZU dZ ed       Zedz  ez  ed<   y)OpenAIRealtimeSTTSettingszSettings for OpenAIRealtimeSTTService.

    Parameters:
        prompt: Optional prompt text to guide transcription style.
    c                      t         S N)r   r,   r-   r.   <lambda>z"OpenAIRealtimeSTTSettings.<lambda>   s    9 r-   )default_factoryNr6   )	r(   r)   r*   r+   r   r6   rV   r   rU   r,   r-   r.   r\   r\      s"     &+;L%MFC$J"Mr-   r\   c                   @    e Zd ZU dZeZeed<   ddej                  ddddde	d	de
d	ee
   d
e
dee   dee
   deeeed   f      deed      dedee   dee   f fdZedede
fd       ZdefdZdedee
ef   f fdZdef fdZdef fdZdef fdZdedee df   fdZ!de de"f fd Z# fd!Z$ fd"Z%d# Z&d$ Z'd%efd&Z(d' Z)defd(Z*d) Z+d* Z,d+ Z-d,efd-Z.d,efd.Z/d,efd/Z0d,efd0Z1e2	 d8d1e
d2edee   fd3       Z3d,efd4Z4d,efd5Z5d,efd6Z6d,efd7Z7 xZ8S )9OpenAIRealtimeSTTServiceuh  OpenAI Realtime Speech-to-Text service using WebSocket transcription sessions.

    Uses OpenAI's Realtime API in transcription-only mode for real-time streaming
    speech recognition with optional server-side VAD and noise reduction. The model
    does not generate conversational responses — only transcription output.

    This service supports two VAD modes:

    **Local VAD** (default): Disable server-side VAD and use
    a local VAD processor in the pipeline instead. When a
    ``VADUserStoppedSpeakingFrame`` is received, the service commits the
    audio buffer so that the server begins transcription for the completed
    speech segment.

    **Server-side VAD** (``turn_detection=None``): The OpenAI server performs voice-activity
    detection. The service broadcasts ``UserStartedSpeakingFrame`` and
    ``UserStoppedSpeakingFrame`` when the server detects speech boundaries.
    Do **not** use a separate VAD processor in the pipeline in this mode.

    Audio is sent as 24 kHz 16-bit mono PCM as required by the OpenAI Realtime
    API. If the pipeline runs at a different sample rate (e.g. 16 kHz for Silero
    VAD compatibility), audio is automatically upsampled before sending.

    Example::

        stt = OpenAIRealtimeSTTService(
            api_key="sk-...",
            model="gpt-4o-transcribe",
            noise_reduction="near_field",
        )
    r1   Nz wss://api.openai.com/v1/realtimeFT)	r2   r4   r5   r6   turn_detectionnoise_reductionshould_interruptr8   r9   r3   r2   r4   r5   r6   rc   rd   )
near_field	far_fieldre   r8   r9   c       
            t         t        d      t        dt        j                  d      }|t        dt        d       ||_        |+|t        j                  k7  rt        dt        d       ||_        |t        dt        d       ||_        |	|j                  |	       t        | ,  d
|
|d| || _        || _        || _        || _        || _        d| _        d	| _        t'               | _        |d	u| _        y)aK  Initialize the OpenAI Realtime STT service.

        Args:
            api_key: OpenAI API key for authentication.
            model: Transcription model. Supported values are
                ``"gpt-4o-transcribe"`` and ``"gpt-4o-mini-transcribe"``.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAIRealtimeSTTSettings(model=...)`` instead.

            base_url: WebSocket base URL for the Realtime API.
                Defaults to ``"wss://api.openai.com/v1/realtime"``.
            language: Language of the audio input. Defaults to English.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAIRealtimeSTTSettings(language=...)`` instead.

            prompt: Optional prompt text to guide transcription style
                or provide keyword hints.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAIRealtimeSTTSettings(prompt=...)`` instead.

            turn_detection: Server-side VAD configuration. Defaults to
                ``False`` (disabled), which relies on a local VAD
                processor in the pipeline. Pass ``None`` to use server
                defaults (``server_vad``), or a dict with custom
                settings (e.g. ``{"type": "server_vad", "threshold": 0.5}``).
            noise_reduction: Noise reduction mode. ``"near_field"`` for
                close microphones, ``"far_field"`` for distant
                microphones, or ``None`` to disable.
            should_interrupt: Whether to interrupt bot output when
                speech is detected by server-side VAD. Only applies when
                turn detection is enabled. Defaults to True.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent
                WebsocketSTTService.
        Nzdwebsockets is required for OpenAIRealtimeSTTService. Install it with: pip install pipecat-ai[openai]r;   )r2   r5   r6   r2   r5   r6   )r9   r8   Fr,   )websocket_connectImportErrorr\   r!   r<   r   r2   r5   r6   r>   r?   r@   _api_key	_base_url_turn_detection_noise_reduction_should_interrupt_receive_task_session_readyr   
_resampler_server_vad_enabled)rA   r3   r2   r4   r5   r6   rc   rd   re   r8   r9   rB   rD   rE   s                r.   r@   z!OpenAIRealtimeSTTService.__init__   s#   p $B  5%[[
 "7,EwO%*"H$;":/H*U(0%"8-FQ&,#
 ))(3 	
-%	
 	
  !- /!1!#13 $2#> r-   rG   c                 Z    | j                   j                  d      d   j                         S )zConvert a Language enum value to an ISO-639-1 code.

        Args:
            language: The Language enum value.

        Returns:
            Two-letter ISO-639-1 language code.
        -r   )valuesplitlower)r5   s    r.   _language_to_codez*OpenAIRealtimeSTTService._language_to_codeR  s'     ~~##C(+1133r-   c                      y)zCheck if the service can generate processing metrics.

        Returns:
            True, as this service supports metrics generation.
        Tr,   rA   s    r.   can_generate_metricsz-OpenAIRealtimeSTTService.can_generate_metrics_  s     r-   deltac                    K   t         |   |       d{   }|r$| j                  r| j                          d{    |S 7 ,7 w)aL  Apply a settings delta and send session update if needed.

        Sends a ``session.update`` to the server when the session is active.

        Args:
            delta: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r?   _update_settingsrq   _send_session_update)rA   r}   changedrE   s      r.   r   z)OpenAIRealtimeSTTService._update_settingsg  sH      077t**++--- 8 .s   AA%AAAAframec                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zStart the service and establish WebSocket connection.

        Args:
            frame: The start frame triggering service initialization.
        N)r?   start_connectrA   r   rE   s     r.   r   zOpenAIRealtimeSTTService.starty  s3      gmE"""mmo 	#   848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zStop the service and close WebSocket connection.

        Args:
            frame: The end frame triggering service shutdown.
        N)r?   stop_disconnectr   s     r.   r   zOpenAIRealtimeSTTService.stop  s6      gl5!!!    	" r   c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zCancel the service and close WebSocket connection.

        Args:
            frame: The cancel frame triggering service cancellation.
        N)r?   cancelr   r   s     r.   r   zOpenAIRealtimeSTTService.cancel  s6      gnU###    	$ r   rF   c                L   K   | j                  |       d{    d y7 
w)u  Send audio data to the transcription session.

        Audio is streamed over the WebSocket. Transcription results arrive
        asynchronously via the receive task and are pushed as
        ``InterimTranscriptionFrame`` or ``TranscriptionFrame``.

        Args:
            audio: Raw audio bytes (16-bit mono PCM at the pipeline
                sample rate). Automatically resampled to 24 kHz.

        Yields:
            None — results are delivered via the WebSocket receive task.
        N)_send_audio)rA   rF   s     r.   run_sttz OpenAIRealtimeSTTService.run_stt  s&      u%%%
 	&s   $"$	directionc                   K   t         |   ||       d{    | j                  sSt        |t              r| j                          d{    yt        |t              r| j                          d{    yyy7 d7 27 w)a  Process frames from the pipeline.

        Extends the base STT service to handle local VAD events when
        server-side VAD is disabled. On ``VADUserStoppedSpeakingFrame``,
        commits the audio buffer so the server begins transcription for
        the completed speech segment.

        Args:
            frame: The frame to process.
            direction: The direction of frame flow in the pipeline.
        N)r?   process_framers   
isinstancer   start_processing_metricsr   _commit_audio_buffer)rA   r   r   rE   s      r.   r   z&OpenAIRealtimeSTTService.process_frame  s}      g#E9555 ''%!<=33555E#>?//111 @ ( 	6
 61s3   BA<3BA>(B4B 5B>B Bc                   K   t         |           d{    | j                          d{    | j                  r=| j                  s0| j                  | j                  | j                              | _        yyy7 f7 Pw)z:Connect to the transcription endpoint and start receiving.N)r?   r   _connect_websocket
_websocketrp   create_task_receive_task_handler_report_errorrA   rE   s    r.   r   z!OpenAIRealtimeSTTService._connect  sm     g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD $6? 	!'s    B A<B A>AB >B c                    K   t         |           d{    | j                  r,| j                  | j                  d       d{    d| _        | j	                          d{    y7 U7 &7 	w)z)Disconnect and clean up background tasks.Ng      ?)timeout)r?   r   rp   cancel_task_disconnect_websocketr   s    r.   r   z$OpenAIRealtimeSTTService._disconnect  sg     g!###""4#5#5s"CCC!%D((***	 	$C*s3   A1A+0A1A-A1%A/&A1-A1/A1c                   K   	 | j                   r'| j                   j                  t        j                  u ryd| _        | j
                   d}t        |dd| j                   i       d{   | _         | j                  d       d{    y7 #7 # t        $ r0}| j                  d| |	       d{  7   d| _         Y d}~yd}~ww xY ww)
zAEstablish the WebSocket connection to the transcription endpoint.NFz?intent=transcriptionAuthorizationzBearer )uriadditional_headerson_connectedz)Error connecting to OpenAI Realtime STT: 	error_msg	exception)r   stater%   OPENrq   rl   ri   rk   _call_event_handler	Exception
push_error)rA   urles      r.   r   z+OpenAIRealtimeSTTService._connect_websocket  s     	#4??#8#8EJJ#F"'D^^$$9:C$5#wt}}o%>$% DO **>::: ; 	#//EaSI "    #DOO	#sh   C2B C5B ,B-B 
BB CB B 	CC6B97CCCCc                 v  K   	 d| _         | j                  r"| j                  j                          d{    d| _        | j                  d       d{    y7 %# t        $ r)}| j	                  d| |       d{  7   Y d}~Pd}~ww xY w7 <# d| _        | j                  d       d{  7   w xY ww)zClose the WebSocket connection.FNzError disconnecting: r   on_disconnected)rq   r   closer   r   r   )rA   r   s     r.   r   z.OpenAIRealtimeSTTService._disconnect_websocket  s     	>"'Doo++--- #DO**+<=== . 	//1!5 "   	 > #DO**+<===ss   B90A AA B9BB9A 	B%B	>B?B	B 	BB B9B6/B20B66B9messagec                 V  K   	 | j                   sD| j                  r7| j                  j                  t        j                  |             d{    yyy7 # t
        $ rF}| j                   s| j                  sY d}~y| j                  d| |       d{  7   Y d}~yd}~ww xY ww)z|Send a JSON message over the WebSocket.

        Args:
            message: The message dict to serialize and send.
        NzError sending message: r   )_disconnectingr   sendrJ   dumpsr   r   )rA   r   r   s      r.   _ws_sendz!OpenAIRealtimeSTTService._ws_send  s     		&&4??oo**4::g+>??? ,;&? 	""$////3A37 "   	sY   B)A	A AA B)A 	B& B!8B)=B!BB!B)!B&&B)c                   K   d| j                   j                  i}| j                   j                  r%| j                  | j                   j                        nd}|r||d<   | j                   j                  r| j                   j                  |d<   dt
        d|d}| j                  du rd|d	<   n| j                  | j                  |d	<   | j                  rd
| j                  i|d<   | j                  ddd|idd       d{    y7 w)z?Send ``session.update`` to configure the transcription session.r2   Nr5   r6   z	audio/pcm)typerate)formattranscriptionFrc   r   rd   zsession.updater   inputr   rF   )r   session)	r1   r2   r5   ry   r6   _OPENAI_SAMPLE_RATErm   rn   r   )rA   r   language_codeinput_audios       r.   r   z-OpenAIRealtimeSTTService._send_session_update  s     &(<(<= @D~~?V?VD""4>>#:#:;\` 	 (5M*%>>  &*nn&;&;M(# $+ +
 5(,0K()!!-,0,@,@K()   --.K)* mm(+

 
	
 
	
s   D D
DD
c                    K   | j                   j                  || j                  t               d{   }|syt	        j
                  |      j                  d      }| j                  d|d       d{    y7 H7 w)zSend audio data via ``input_audio_buffer.append``.

        Resamples from the pipeline sample rate to 24 kHz if needed.

        Args:
            audio: Raw audio bytes at the pipeline sample rate.
        Nzutf-8zinput_audio_buffer.appendr   )rr   resamplesample_rater   base64	b64encodedecoder   )rA   rF   payloads      r.   r   z$OpenAIRealtimeSTTService._send_audio4  sx      oo..ud6F6FH[\\""5)009mm3 
 	
 	
	 ]	
s"   /A>A:AA>4A<5A><A>c                 F   K   | j                  ddi       d{    y7 w)z2Commit the current audio buffer for transcription.r   zinput_audio_buffer.commitNr   r{   s    r.   r   z-OpenAIRealtimeSTTService._commit_audio_bufferG  s     mmV%@ABBB   !!c                 F   K   | j                  ddi       d{    y7 w)zClear the current audio buffer.r   zinput_audio_buffer.clearNr   r{   s    r.   _clear_audio_bufferz,OpenAIRealtimeSTTService._clear_audio_bufferK  s     mmV%?@AAAr   c                   K   | j                   2 3 d{   }	 t        j                  |      }|j                  dd      }|dk(  r| j                  |       d{    P|dk(  r| j                  |       d{    o|dk(  r| j                  |       d{    |dk(  r| j                  |       d{    |d	k(  r| j                  |       d{    |d
k(  r| j                  |       d{    |dk(  r| j                  |       d{    |dk(  r*t	        j                  d|j                  dd              :|dk(  r| j                  |       d{    Zt	        j                  d|        t7 p# t        j                  $ r t	        j
                  d       Y w xY w7 \7 @7 $7 7 7 7 7 d6 yw)zReceive and dispatch server events from the transcription session.

        Called by ``WebsocketService._receive_task_handler`` which wraps
        this method with automatic reconnection on connection errors.
        Nz!Failed to parse WebSocket messager    zsession.createdzsession.updatedz1conversation.item.input_audio_transcription.deltaz5conversation.item.input_audio_transcription.completedz2conversation.item.input_audio_transcription.failedz!input_audio_buffer.speech_startedz!input_audio_buffer.speech_stoppedzinput_audio_buffer.committedz Audio buffer committed: item_id=item_iderrorzUnhandled event: )r   rJ   loadsJSONDecodeErrorr
   warningget_handle_session_created_handle_session_updated_handle_transcription_delta_handle_transcription_completed_handle_transcription_failed_handle_speech_started_handle_speech_stoppedtrace_handle_error)rA   r   evtevt_types       r.   _receive_messagesz*OpenAIRealtimeSTTService._receive_messagesS  s     "__ 	= 	='jj)
 wwvr*H,,223777..223777PP66s;;;TT::3???QQ77<<<@@11#666@@11#666;;?	SU@V?WXYW$((---0
;<9	= '' BC 87;?<66 .5 -s   GG	FG	GF+GF5G8F89GF;G6F>7GGG4G5GGAG"G# GG	(F2.G1F22G8G;G>GGGGG	Gr   c                 j   K   t        j                  d       | j                          d{    y7 w)zHandle ``session.created``.

        Sent immediately after connecting. We respond by configuring the
        session with our desired settings.

        Args:
            evt: The session created event from the server.
        z4Transcription session created, sending configurationN)r
   debugr   rA   r   s     r.   r   z0OpenAIRealtimeSTTService._handle_session_createdw  s'      	KL'')))s   )313c                 D   K   t        j                  d       d| _        yw)zHandle ``session.updated``.

        The session is now fully configured and ready to transcribe.

        Args:
            evt: The session updated event from the server.
        z*Transcription session configured and readyTN)r
   r   rq   r   s     r.   r   z0OpenAIRealtimeSTTService._handle_session_updated  s      	AB"s    c                    K   |j                  dd      }|r9| j                  t        || j                  t	               |             d{    yy7 w)a+  Handle incremental transcription text.

        For ``gpt-4o-transcribe`` and ``gpt-4o-mini-transcribe``, deltas
        contain streaming partial text. For ``whisper-1``, each delta
        contains the full turn transcript.

        Args:
            evt: The delta event from the server.
        r}   r   resultN)r   
push_framer   _user_idr"   )rA   r   r}   s      r.   r   z4OpenAIRealtimeSTTService._handle_transcription_delta  sV      $//)MM$&	   s   AA
AAc                   K   |j                  dd      }|rk| j                  t        || j                  t	               |             d{    | j                  |d       d{    | j                          d{    yy7 87  7 
w)zHandle a completed transcription for a speech segment.

        Pushes a ``TranscriptionFrame`` and records the result for
        tracing.

        Args:
            evt: The completed event containing the full transcript.
        
transcriptr   r   NT)r   r   r   r   r"   _handle_transcription_tracestop_processing_metrics)rA   r   r   s      r.   r   z8OpenAIRealtimeSTTService._handle_transcription_completed  s      WW\2.
//"MM$&	   22:tDDD..000  E0s6   AB	
BB	$B%B	<B=B	B	B	r   is_finalc                    K   yw)zRecord transcription result for tracing.

        Args:
            transcript: The transcribed text.
            is_final: Whether this is a final transcription result.
            language: Optional language of the transcription.
        Nr,   )rA   r   r   r5   s       r.   r   z4OpenAIRealtimeSTTService._handle_transcription_trace  s      	s   c                    K   t        j                  d       | j                  t               d{    | j                  r| j                          d{    | j                          d{    y7 A7 7 	w)zHandle server-side VAD speech start.

        Broadcasts ``UserStartedSpeakingFrame`` and optionally triggers
        interruption of current bot output.

        Args:
            evt: The ``input_audio_buffer.speech_started`` event.
        zServer VAD: speech startedN)r
   r   broadcast_framer   ro   broadcast_interruptionr   r   s     r.   r   z/OpenAIRealtimeSTTService._handle_speech_started  sd      	12""#;<<<!!--///++--- 	=/-s3   .A8A2#A8A4A8,A6-A84A86A8c                 t   K   t        j                  d       | j                  t               d{    y7 w)a  Handle server-side VAD speech stop.

        Broadcasts ``UserStoppedSpeakingFrame``. The audio buffer is
        automatically committed by the server when VAD is enabled.

        Args:
            evt: The ``input_audio_buffer.speech_stopped`` event.
        zServer VAD: speech stoppedN)r
   r   r   r   r   s     r.   r   z/OpenAIRealtimeSTTService._handle_speech_stopped  s*      	12""#;<<<s   .868c                    K   |j                  di       }|j                  dd      }| j                  d|        d{    y7 w)u   Handle a transcription failure for a speech segment.

        Logs the error but does not treat it as fatal — the session
        remains active for subsequent turns.

        Args:
            evt: The failed event containing error details.
        r   r   zTranscription failedzOpenAI Realtime STT error: r   N)r   r   )rA   r   r   r   s       r.   r   z5OpenAIRealtimeSTTService._handle_transcription_failed  sE      $IIi)?@	oo*Ei[(QoRRRs   =AA Ac                    K   |j                  di       }|j                  dd      }|j                  dd      }d| d| }| j                  |       d	{    t        |      7 w)
zHandle a fatal error from the transcription session.

        Raises an exception so that ``WebsocketService`` can decide
        whether to attempt reconnection.

        Args:
            evt: The error event.
        r   r   zUnknown errorcoder   zOpenAI Realtime STT error [z]: r   N)r   r   r   )rA   r   r   r   
error_codemsgs         r.   r   z&OpenAIRealtimeSTTService._handle_error  sm      $IIi9	YYvr*
+J<s9+Fooo,,,n 	-s   AA(A&A(r^   )9r(   r)   r*   r+   r\   rT   rU   r!   r<   r   rV   r   r	   dictr   boolrW   r@   staticmethodry   r|   r   r   r   r   r   r   r   r   r   rX   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r   r   r   r   r   rY   rZ   s   @r.   rb   rb      s^   @ )H((  $:'/{{ $@EHL!%8<,Di? i? }	i?
 i? 8$i? i? !tWU^';!<=i? "'*C"DEi? i? 45i? #5/i?V 
4H 
4 
4 
4d K DcN $ ! !!+ !5 ^E4K-H "2 2> 22b+#,>d *+
Z
u 
&CB"=H
* 
*	# 	#T *1 1, 
 (,	  8$	 . .
= 
=Sd St r-   rb   )<r+   r   rJ   dataclassesr   r   typingr   r   r   r   r	   logurur
   pipecat.audio.utilsr   pipecat.frames.framesr   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   r   r   pipecat.services.stt_latencyr   r   pipecat.services.stt_servicer   !pipecat.services.whisper.base_sttr   r   r    pipecat.transcriptions.languager!   pipecat.utils.timer"   (pipecat.utils.tracing.service_decoratorsr#   websockets.asyncio.clientr$   ri   websockets.protocolr%   ModuleNotFoundErrorr'   r0   r   r\   rb   r,   r-   r.   <module>r     s      ( @ @  7   > _ _ R < 
 5 / ?F) 	. 	 	qH, qHh   N N N|2 |]  Es   C 	CC