
    qit                     4   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lm Z  dd
l!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) dZ* G d de       Z+ G d de+      Z, G d de+e'      Z-y)zRBase classes for Speech-to-Text services with continuous and segmented processing.    N)abstractmethod)AnyAsyncGeneratorOptional)logger)State)AudioRawFrame
ErrorFrameFrameInterruptionFrame#ServiceSwitcherRequestMetadataFrame
StartFrameSTTMetadataFrameSTTMuteFrameSTTUpdateSettingsFrameTranscriptionFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)	AIService)STTSettingsis_given)DEFAULT_TTFS_P99)WebsocketServiceLanguageg?c                       e Zd ZU dZeed<   dddddddddee   d	ed
ee   dee   dedee   f fdZ	e
defd       Zd Zd Ze
defd       ZdefdZdefdZdedee   fdZededeedf   fd       Zdef fdZ fdZdedeeef   f fdZde d e!fd!Z"ded e!f fd"Z#e!jH                  fded e!f fd#Z%d$ Z&d% Z'd& Z(de)fd'Z*de+fd(Z,d) Z-d* Z.d+ Z/d, Z0defd-Z1d.efd/Z2 xZ3S )0
STTServiceaM  Base class for speech-to-text services.

    Provides common functionality for STT services including audio passthrough,
    muting, settings management, and audio processing. Subclasses must implement
    the run_stt method to provide actual speech recognition.

    Includes an optional keepalive mechanism that sends silent audio when no real
    audio has been sent for a configurable timeout, preventing servers from closing
    idle connections (e.g. when behind a ServiceSwitcher). Subclasses that enable
    keepalive must override ``_send_keepalive()`` to deliver the silence in the
    appropriate service-specific protocol.

    Event handlers:
        on_connected: Called when connected to the STT service.
        on_disconnected: Called when disconnected from the STT service.
        on_connection_error: Called when a connection to the STT service error occurs.

    Example::

        @stt.event_handler("on_connected")
        async def on_connected(stt: STTService):
            logger.debug(f"STT connected")

        @stt.event_handler("on_disconnected")
        async def on_disconnected(stt: STTService):
            logger.debug(f"STT disconnected")

        @stt.event_handler("on_connection_error")
        async def on_connection_error(stt: STTService, error: str):
            logger.error(f"STT connection error: {error}")
    	_settingsTNg       @g      @)audio_passthroughsample_ratestt_ttfb_timeoutttfs_p99_latencykeepalive_timeoutkeepalive_intervalsettingsr!   r"   r#   r$   r%   r&   c                   t        	|   d	d|xs
 t               i| || _        || _        d| _        d| _        d| _        || _        || _	        d| _
        d| _        d| _        d| _        d| _        || _        || _        d| _        d| _        | j'                  d       | j'                  d       | j'                  d       y)
u  Initialize the STT service.

        Args:
            audio_passthrough: Whether to pass audio frames downstream after processing.
                Defaults to True.
            sample_rate: The sample rate for audio input. If None, will be determined
                from the start frame.
            stt_ttfb_timeout: Time in seconds to wait after VAD stop before reporting
                TTFB. This delay allows the final transcription to arrive. Defaults to 2.0.
                Note: STT "TTFB" differs from traditional TTFB (which measures from a discrete
                request to first response byte). Since STT receives continuous audio, we measure
                from when the user stops speaking to when the final transcript arrives—capturing
                the latency that matters for voice AI applications.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                This is broadcast via STTMetadataFrame at pipeline start for downstream
                processors (e.g., turn strategies) to optimize timing. Subclasses provide
                measured defaults; pass a value here to override for your deployment.
            keepalive_timeout: Seconds of no audio before sending silence to keep the
                connection alive. None disables keepalive. Useful for services that
                close idle connections (e.g. behind a ServiceSwitcher).
            keepalive_interval: Seconds between idle checks when keepalive is enabled.
            settings: The runtime-updatable settings for the STT service.
            **kwargs: Additional arguments passed to the parent AIService.
        r&   r   F Non_connectedon_disconnectedon_connection_error )super__init__r   _audio_passthrough_init_sample_rate_sample_rate_muted_user_id_ttfs_p99_latency_stt_ttfb_timeout_ttfb_timeout_task_user_speaking_finalize_pending_finalize_requested_last_transcript_time_keepalive_timeout_keepalive_interval_keepalive_task_last_audio_time_register_event_handler)
selfr    r!   r"   r#   r$   r%   r&   kwargs	__class__s
            N/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/stt_service.pyr.   zSTTService.__init__P   s    H 	 	
  }		

 	
 #4!,!!1 "2:>$)',). ,-" #4#5 7;'($$^4$$%67$$%:;    returnc                     | j                   S )zCheck if the STT service is currently muted.

        Returns:
            True if the service is muted and will not process audio.
        )r2   r@   s    rC   is_mutedzSTTService.is_muted   s     {{rD   c                     d| _         y)a  Mark that a finalize request has been sent, awaiting server confirmation.

        For providers that have explicit server confirmation of finalization
        (e.g., Deepgram's from_finalize field), call this when sending the finalize
        request. Then call confirm_finalize() when the server confirms.

        For providers without server confirmation, don't call this method - just
        send the finalize/flush/commit command and rely on the TTFB timeout.
        TN)r9   rG   s    rC   request_finalizezSTTService.request_finalize   s     $( rD   c                 :    | j                   rd| _        d| _         yy)aF  Confirm that the server has acknowledged the finalize request.

        Call this when the server response confirms finalization (e.g., Deepgram's
        from_finalize=True). The next TranscriptionFrame pushed will be marked
        as finalized.

        Only has effect if request_finalize() was previously called.
        TFN)r9   r8   rG   s    rC   confirm_finalizezSTTService.confirm_finalize   s"     ##%)D"',D$ $rD   c                     | j                   S )zoGet the current sample rate for audio processing.

        Returns:
            The sample rate in Hz.
        )r1   rG   s    rC   r!   zSTTService.sample_rate   s        rD   modelc                 `  K   t        j                         5  t        j                  d       t        j                  dt        d       ddd       t        j                  d| d       t        | j                        }| j                   ||             d{    y# 1 sw Y   XxY w7 w)	zSet the speech recognition model.

        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame(model=...)`` instead.

        Args:
            model: The name of the model to use for speech recognition.
        alwayszK'set_model' is deprecated, use 'STTUpdateSettingsFrame(model=...)' instead.   
stacklevelNzSwitching STT model to: [])rN   
warningscatch_warningssimplefilterwarnDeprecationWarningr   infotyper   _update_settings)r@   rN   settings_clss      rC   	set_modelzSTTService.set_model   s      $$& 	!!(+MM]"	 	/wa89DNN+##Lu$=>>>	 	 	?)   B.2B 	AB.B,B. B)%B.languagec                 `  K   t        j                         5  t        j                  d       t        j                  dt        d       ddd       t        j                  d| d       t        | j                        }| j                   ||             d{    y# 1 sw Y   XxY w7 w)	zSet the language for speech recognition.

        .. deprecated:: 0.0.104
            Use ``STTUpdateSettingsFrame(language=...)`` instead.

        Args:
            language: The language to use for speech recognition.
        rP   zQ'set_language' is deprecated, use 'STTUpdateSettingsFrame(language=...)' instead.rQ   rR   NzSwitching STT language to: [rT   )ra   rU   )r@   ra   r^   s      rC   set_languagezSTTService.set_language   s      $$& 	!!(+MMc"	 	28*A>?DNN+##L($CDDD	 	 	Er`   c                     t        |      S )zConvert a language to the service-specific language format.

        Args:
            language: The language to convert.

        Returns:
            The service-specific language identifier, or None if not supported.
        r   )r@   ra   s     rC   language_to_service_languagez'STTService.language_to_service_language   s     !!rD   audioc                    K   yw)aN  Run speech-to-text on the provided audio data.

        This method must be implemented by subclasses to provide actual speech
        recognition functionality.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            Frame: Frames containing transcription results (typically TextFrame).
        Nr,   )r@   rf   s     rC   run_sttzSTTService.run_stt   s      	s   framec                 ~   K   t         |   |       d{    | j                  xs |j                  | _        y7 $w)zwStart the STT service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r-   startr0   audio_in_sample_rater1   r@   ri   rB   s     rC   rk   zSTTService.start  s9      gmE""" 22Pe6P6P 	#s   =;%=c                    K   t         |           d{    | j                          d{    | j                          d{    y7 57 7 	w)zClean up STT service resources.N)r-   cleanup_cancel_ttfb_timeout_cancel_keepalive_taskr@   rB   s    rC   ro   zSTTService.cleanup  sH     go'')))))+++ 	 )+s1   AAAAAAAAAdeltac                    K   t        |j                        r>t        |j                  t              r$| j	                  |j                        }|||_        t
        |   |       d{   }|S 7 w)a  Apply an STT settings delta.

        Handles ``model`` (via parent). Translates ``Language`` enum values
        before applying so the stored value is a service-specific string.
        Concrete services should override this method and handle language
        changes (including any reconnect logic) based on the returned
        changed-field dict.

        Args:
            delta: An STT settings delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)r   ra   
isinstancer   re   r-   r]   )r@   rs   	convertedchangedrB   s       rC   r]   zSTTService._update_settings  s_       ENN#
5>>8(L99%..II$!*077 8s   A&A2)A0*A2	directionc                   K   | j                   ryt        j                         | _        t	        |d      r|j
                  | _        nd| _        |j                  s0t        j                  d| j                   d|j                          y| j                  | j                  |j                               d{    y7 w)a  Process an audio frame for speech recognition.

        If the service is muted, this method does nothing. Otherwise, it
        processes the audio frame and runs speech-to-text on it, yielding
        transcription results. If the frame has a user_id, it is stored
        for later use in transcription.

        Args:
            frame: The audio frame to process.
            direction: The direction of frame processing.
        Nuser_idr(   z,Empty audio frame received for STT service:  )r2   time	monotonicr>   hasattrrz   r3   rf   r   warningname
num_framesprocess_generatorrh   )r@   ri   rx   s      rC   process_audio_framezSTTService.process_audio_frame)  s      ;; $ 0 5)$!MMDM DM{{NN>tyyk5K[K[J\] $$T\\%++%>???s   B5B?7B=8B?c                   K   t         |   ||       d{    t        |t              r3| j	                  ||       d{    | j                          d{    yt        |t              r3| j                          d{    | j	                  ||       d{    yt        |t              rB| j                  ||       d{    | j                  r| j	                  ||       d{    yyt        |t              r4| j                  |       d{    | j	                  ||       d{    yt        |t              r4| j                  |       d{    | j	                  ||       d{    yt        |t              r|j                  $| j!                  |j                         d{    y|j"                  rt%        j&                         5  t%        j(                  d       t%        j*                  dt,        d       ddd       t/        | j0                        j3                  |j"                        }| j!                  |       d{    yyt        |t4              r8|j6                  | _        t;        j<                  d|j6                  rdnd        yt        |t>              r3| jA                          d{    | j	                  ||       d{    y| j	                  ||       d{    y7 7 7 7 7 u7 M7 *7 7 7 7 7 p# 1 sw Y   xY w7 7 m7 U7 <w)	zProcess frames, handling VAD events and audio segmentation.

        Args:
            frame: The frame to process.
            direction: The direction of frame processing.
        NrP   zPassing a dict via STTUpdateSettingsFrame(settings={...}) is deprecated since 0.0.104, use STTUpdateSettingsFrame(delta=STTSettings(...)) instead.rQ   rR   zSTT service mutedunmuted)!r-   process_frameru   r   
push_frame_push_stt_metadatar   r	   r   r/   r   !_handle_vad_user_started_speakingr   !_handle_vad_user_stopped_speakingr   rs   r]   r&   rV   rW   rX   rY   rZ   r\   r   from_mappingr   muter2   r   debugr   _reset_stt_ttfb_state)r@   ri   rx   rs   rB   s       rC   r   zSTTService.process_frameJ  s     g#E9555eZ(//%333))+++BC))+++//%333}- **5)<<<&&ooeY777 ':;88???//%333:;88???//%33356{{&++EKK888,,. ))(3MMe*#$	 T^^,99%..I++E222   |,**DKLL<5::9'MNO01,,...//%333//%333Y 	6 4++3
 =7?3?3 9  3
 /33s1  L>L)L>LL>L(L>LL>L*L>L%L>-L.*L>LL>2L3)L>L L>6L#7?L>6L&7%L>2L)A
L>L6A1L>
L8L>$L:%L>?L< L>L>L>L>L>L>L>L>L> L>#L>&L>)L3.	L>8L>:L><L>c                 H  K   t        |t              rot        j                         | _        | j                  rd|_        d| _        |j
                  r0| j                          d{    | j                          d{    t        | %  ||       d{    y7 57 7 	w)a  Push a frame downstream, tracking TranscriptionFrame timestamps for TTFB.

        Stores the timestamp of each TranscriptionFrame for TTFB calculation.
        If the frame is marked as finalized (via request_finalize/confirm_finalize),
        reports TTFB immediately and cancels any pending timeout. Otherwise, TTFB is
        reported after a timeout.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        TFN)
ru   r   r|   r:   r8   	finalizedstop_ttfb_metricsrp   r-   r   r@   ri   rx   rB   s      rC   r   zSTTService.push_frame  s      e/0)-D& %%"&).& ,,...//111g 	222	 /12s6   A#B"&B'B">B?B"B B"B" B"c                    K   | j                   }|+t        }t        j                  | j                   d| d       | j                  t        | j                  |       d{    y7 w)zJPush STT metadata frame for downstream processors (e.g., turn strategies).Nz*: ttfs_p99_latency not set, using default s)service_namer#   )r4   r   r   r   r   broadcast_framer   )r@   ttfss     rC   r   zSTTService._push_stt_metadata  s]     %%<#DNNdii[(RSWRXXYZ[""#3$))^b"cccs   AA)!A'"A)c                 ~   K   | j                   r+| j                  | j                          d{    d| _         yy7 w)z%Cancel any pending TTFB timeout task.N)r6   cancel_taskrG   s    rC   rp   zSTTService._cancel_ttfb_timeout  s9     """"4#:#:;;;&*D# #;   +=;=c                 @   K   | j                          d{    y7 w)a  Reset STT TTFB measurement state.

        Called when starting a new utterance or on interruption to ensure
        we don't use stale state for TTFB calculations. This specifically guards
        against the case where a TranscriptionFrame is received without corresponding
        VADUserStartedSpeakingFrame and VADUserStoppedSpeakingFrame frames.

        Note: Does not reset _user_speaking since InterruptionFrame can arrive
        while user is still speaking.
        N)rp   rG   s    rC   r   z STTService._reset_stt_ttfb_state  s      '')))s   c                 x   K   | j                          d{    d| _        d| _        d| _        d| _        y7 !w)a[  Handle VAD user started speaking frame to start tracking transcriptions.

        Cancels any pending TTFB timeout, resets TTFB tracking state, and marks user as speaking.
        Also resets finalization state to prevent stale finalization from a previous utterance.

        Args:
            frame: The VAD user started speaking frame.
        NTFr   )r   r7   r9   r8   r:   r@   ri   s     rC   r   z,STTService._handle_vad_user_started_speaking  s?      ((***"#( !&%&"	 	+s   :8":c                    K   d| _         |j                  dk(  ry|j                  |j                  z
  }| j                  |       d{    | j	                  | j                         d      | _        y7 +w)a  Handle VAD user stopped speaking frame.

        Calculates the actual speech end time and starts a timeout task to wait
        for the final transcription before reporting TTFB.

        Args:
            frame: The VAD user stopped speaking frame.
        Fg        N)
start_timer"   r   )r7   	stop_secs	timestampstart_ttfb_metricscreate_task_ttfb_timeout_handlerr6   )r@   ri   speech_end_times      rC   r   z,STTService._handle_vad_user_stopped_speaking  s{      $ ??c!
  //EOO;%%%AAA #'"2"2&&(/A #3 #
	 	Bs   AA6A4	,A6c                 "  K   	 t        j                  | j                         d{    | j                  dkD  r$| j	                  | j                         d{    d| _        y7 ?7 # t         j
                  $ r Y !w xY w# d| _        w xY ww)a{  Wait for timeout then report TTFB using the last transcript timestamp.

        This timeout allows the final transcription to arrive before we calculate
        and report TTFB. Uses _last_transcript_time as the end time so we measure
        to when the transcript actually arrived, not when the timeout fired.
        If no transcription arrived, no TTFB is reported.
        Nr   )end_time)asynciosleepr5   r:   r   CancelledErrorr6   rG   s    rC   r   z STTService._ttfb_timeout_handler  s     	+-- 6 6777))A-,,d6P6P,QQQ
 '+D# 8Q%% 		 '+D#sV   B"A* A&2A* A(A* B&A* (A* *B =B ?B  B 	BBc                     | j                   @t        j                         | _        | j	                  | j                         d      | _        yy)z1Start the keepalive task if keepalive is enabled.N	keepaliver   )r;   r|   r}   r>   r   _keepalive_task_handlerr=   rG   s    rC   _create_keepalive_taskz!STTService._create_keepalive_task  sH    "".$(NN$4D!#'#3#3,,.[ $4 $D  /rD   c                 ~   K   | j                   r+| j                  | j                          d{    d| _         yy7 w)z#Stop the keepalive task if running.N)r=   r   rG   s    rC   rq   z!STTService._cancel_keepalive_task  s9     ""4#7#7888#'D   8r   c                 &  K   	 t        j                  | j                         d{    	 | j                         s:t	        j
                         | j                  z
  }|| j                  k  rkt        | j                  t        z        }d|dz  z  }| j                  |       d{    t	        j
                         | _        t        j                  |  d       7 7 8# t        $ r$}t        j                  |  d|        Y d}~yd}~ww xY ww)a  Send periodic silent audio to prevent the server from closing the connection.

        When keepalive is enabled, this task checks periodically if the connection
        has been idle (no audio sent) for longer than keepalive_timeout seconds.
        If so, it generates silent 16-bit mono PCM audio and passes it to
        _send_keepalive() for service-specific formatting and sending.
        N    rQ   z sent keepalive silencez keepalive error: )r   r   r<   _is_keepalive_readyr|   r}   r>   r;   intr!   _KEEPALIVE_SILENCE_DURATION_send_keepaliver   trace	Exceptionr   )r@   elapsednum_samplessilencees        rC   r   z"STTService._keepalive_task_handler  s      -- 8 8999//1..*T-B-BBT444!$"2"25P"PQ![1_5**7333(,(8%v%<=> 9 4  $'9!=>s`   $DCDC! D0C! -D.8C! &C'5C! DC! !	D*D	D	DDc                      y)zCheck if the service is ready to send keepalive.

        Subclasses should override this to check their connection state.

        Returns:
            True if keepalive can be sent.
        Tr,   rG   s    rC   r   zSTTService._is_keepalive_ready  s     rD   r   c                     K   t        d      w)zSend silent audio to keep the connection alive.

        Subclasses that enable keepalive must override this to deliver silence
        in their service-specific protocol.

        Args:
            silence: Silent 16-bit mono PCM audio bytes.
        z(Subclasses must override _send_keepalive)NotImplementedErrorr@   r   s     rC   r   zSTTService._send_keepalive$  s      ""LMMs   )4__name__
__module____qualname____doc__r   __annotations__r   r   floatr.   propertyboolrH   rJ   rL   r!   strr_   r   rc   re   r   bytesr   r   rh   r   rk   ro   dictr   r]   r	   r   r   r   
DOWNSTREAMr   r   rp   r   r   r   r   r   r   r   rq   r   r   r   __classcell__rB   s   @rC   r   r   -   s   @ 
 %)"%,0-1$'*.C< c]	C<
  C< #5/C< $E?C< "C< ;'C<J $  
(- !S ! !?S ?(E8 E(	"X 	"(3- 	" 5 ^E4K-H  Q Q,K DcN 0@} @ @B34 34> 34j JXIbIb 3e 3 3:d+*'=X '
=X 
6+$(2T 	NU 	NrD   r   c                        e Zd ZdZdddee   f fdZdef fdZe	j                  fdede	f fd	Zdede	f fd
ZdefdZdefdZdede	fdZ xZS )SegmentedSTTServicea  STT service that processes speech in segments using VAD events.

    Uses Voice Activity Detection (VAD) events to detect speech segments and runs
    speech-to-text only on those segments, rather than continuously.

    Requires VAD to be enabled in the pipeline to function properly. Maintains a
    small audio buffer to account for the delay between actual speech start and
    VAD detection.
    N)r!   r!   c                ~    t        |   dd|i| d| _        d| _        t	               | _        d| _        d| _        y)a  Initialize the segmented STT service.

        Args:
            sample_rate: The sample rate for audio input. If None, will be determined
                from the start frame.
            **kwargs: Additional arguments passed to the parent STTService.
        r!   Nr   Fr,   )r-   r.   _content_wave	bytearray_audio_buffer_audio_buffer_size_1sr7   )r@   r!   rA   rB   s      rC   r.   zSegmentedSTTService.__init__;  sB     	;[;F;
&[%&"#rD   ri   c                 h   K   t         |   |       d{    | j                  dz  | _        y7 w)zStart the segmented STT service and initialize audio buffer.

        Args:
            frame: The start frame containing initialization parameters.
        NrQ   )r-   rk   r!   r   rm   s     rC   rk   zSegmentedSTTService.startJ  s3      gmE"""%)%5%5%9" 	#s   202rx   c                 p   K   t        |t              rd|_        t        |   ||       d{    y7 w)af  Push a frame, marking TranscriptionFrames as finalized.

        Segmented STT services process complete speech segments and return a single
        TranscriptionFrame per segment, so every transcription is inherently finalized.

        Args:
            frame: The frame to push.
            direction: The direction of frame flow in the pipeline.
        TN)ru   r   r   r-   r   r   s      rC   r   zSegmentedSTTService.push_frameS  s0      e/0"EOg 	222s   +646c                    K   t         |   ||       d{    t        |t              r| j	                  |       d{    yt        |t
              r| j                  |       d{    yy7 Y7 27 
w)z;Process frames, handling VAD events and audio segmentation.N)r-   r   ru   r   _handle_user_started_speakingr   _handle_user_stopped_speakingr   s      rC   r   z!SegmentedSTTService.process_framea  sn     g#E9555e8944U;;;:;44U;;; <	 	6 <;s3   A7A1(A7 A3)A7*A5+A73A75A7c                    K   d| _         y w)NT)r7   r   s     rC   r   z1SegmentedSTTService._handle_user_started_speakingj  s     "s   	c                   K   d| _         t        j                         }t        j                  |d      }|j                  d       |j                  d       |j                  | j                         |j                  | j                         |j                          |j                  d       | j                  j                          | j                  | j                  |j!                                      d {    y 7 w)NFwbrQ      r   )r7   ioBytesIOwaveopensetsampwidthsetnchannelssetframerater!   writeframesr   closeseekclearr   rh   read)r@   ri   contentwavs       rC   r   z1SegmentedSTTService._handle_user_stopped_speakingm  s     #**,ii&))***+		Q 	  "$$T\\',,.%ABBBs   C6D 8C>9D c                 `  K   t        |d      r|j                  | _        nd| _        | xj                  |j                  z  c_        | j
                  sZt        | j                        | j                  kD  r7t        | j                        | j                  z
  }| j                  |d | _        yyyw)a  Process audio frames by buffering them for segmented transcription.

        Continuously buffers audio, growing the buffer while user is speaking and
        maintaining a small buffer when not speaking to account for VAD delay.

        If the frame has a user_id, it is stored for later use in transcription.

        Args:
            frame: The audio frame to process.
            direction: The direction of frame processing.
        rz   r(   N)r~   rz   r3   r   rf   r7   lenr   )r@   ri   rx   	discardeds       rC   r   z'SegmentedSTTService.process_audio_frame~  s      5)$!MMDM DM 	ekk) ""s4+=+='>A[A['[D../$2L2LLI!%!3!3IJ!?D (\"s   B,B.)r   r   r   r   r   r   r.   r   rk   r   r   r   r   r   r   r   r   r   r	   r   r   r   s   @rC   r   r   0  s     8< $x} $: : JXIbIb 3e 3 3< <> <#9T #C9T C"@} @ @rD   r   c                   v     e Zd ZdZdddefdZ fdZ fdZded	ef fd
Z	d	efdZ
defdZdefdZ xZS )WebsocketSTTServicea#  Base class for websocket-based STT services.

    Combines STT functionality with websocket connectivity, providing automatic
    error handling, reconnection capabilities, and optional silence-based keepalive.

    The keepalive feature (inherited from STTService) sends silent audio when no
    real audio has been sent for a configurable timeout, preventing servers from
    closing idle connections (e.g. when behind a ServiceSwitcher). Subclasses can
    override ``_send_keepalive()`` to wrap the silence in a service-specific protocol.
    T)reconnect_on_errorr   c                `    t        j                  | fi | t        j                  | fd|i| y)a.  Initialize the Websocket STT service.

        Args:
            reconnect_on_error: Whether to automatically reconnect on websocket errors.
            **kwargs: Additional arguments passed to parent classes (including
                keepalive_timeout and keepalive_interval for STTService).
        r   N)r   r.   r   )r@   r   rA   s      rC   r.   zWebsocketSTTService.__init__  s1     	D+F+!!$X;MXQWXrD   c                 ^   K   t         |           d{    | j                          y7 w)z,Connect and start keepalive task if enabled.N)r-   _connectr   rr   s    rC   r   zWebsocketSTTService._connect  s)     g   ##% 	!s   -+-c                 r   K   t         |           d{    | j                          d{    y7 7 w)z%Disconnect and cancel keepalive task.N)r-   _disconnectrq   rr   s    rC   r   zWebsocketSTTService._disconnect  s3     g!###))+++ 	$+s   737577attempt_numberrE   c                    K   t         |   |       d{   }|r(| j                          d{    | j                          |S 7 07 w)zReconnect and restart keepalive task.

        The keepalive task breaks out of its loop on send errors, so it may
        be dead after the websocket failure that triggered this reconnect.
        N)r-   _reconnect_websocketrq   r   )r@   r   resultrB   s      rC   r   z(WebsocketSTTService._reconnect_websocket  sK      w3NCC--///'')	 D/s   AAAA	A	Ac                 n    | j                   duxr& | j                   j                  t        j                  u S )z7Check if the websocket is open and ready for keepalive.N)
_websocketstater   OPENrG   s    rC   r   z'WebsocketSTTService._is_keepalive_ready  s)    d*Rt/D/D

/RRrD   r   c                 V   K   | j                   j                  |       d{    y7 w)a.  Send silent audio over the websocket to keep the connection alive.

        The default implementation sends raw PCM bytes directly. Subclasses
        can override this to wrap the silence in a service-specific protocol.

        Args:
            silence: Silent 16-bit mono PCM audio bytes.
        N)r   sendr   s     rC   r   z#WebsocketSTTService._send_keepalive  s       oo""7+++s   )')errorc                    K   | j                  d|j                         d {    | j                  |       d {    y 7 7 w)Nr+   )_call_event_handlerr  push_error_frame)r@   r  s     rC   _report_errorz!WebsocketSTTService._report_error  s?     &&'<ekkJJJ##E*** 	K*s    AAAAAA)r   r   r   r   r   r.   r   r   r   r   r   r   r   r
   r  r   r   s   @rC   r   r     sa    	 $(Y !Y &
,

 
 
ST S	,U 	,+ +rD   r   ).r   r   r   r|   rV   r   abcr   typingr   r   r   logurur   websockets.protocolr   pipecat.frames.framesr	   r
   r   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.ai_servicer   pipecat.services.settingsr   r   pipecat.services.stt_latencyr   "pipecat.services.websocket_servicer   pipecat.transcriptions.languager   r   r   r   r   r,   rD   rC   <module>r     s    Y  	     0 0  %    > 1 ; 9 ? 4 " @N @NFg@* g@TC+*&6 C+rD   