
    qiK;                        d Z ddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddl m!Z! ddl"m#Z# 	 ddl$m%Z& ddl'm(Z( e G d de             Z- G d d      Z. G d de      Z/y# e)$ r7Z* ejV                  de*         ejV                  d        e,de*       dZ*[*ww xY w)zCartesia Speech-to-Text service implementation.

This module provides a WebSocket-based STT service that integrates with
the Cartesia Live transcription API for real-time speech recognition.
    N)	dataclass)AnyAsyncGeneratorOptional)logger)CancelFrameEndFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)FrameDirection)STTSettings_warn_deprecated_param)CARTESIA_TTFS_P99)WebsocketSTTService)Language)time_now_iso8601)
traced_stt)connect)StatezException: zIIn order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.zMissing module: c                       e Zd ZdZy)CartesiaSTTSettingsz Settings for CartesiaSTTService.N)__name__
__module____qualname____doc__     O/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/cartesia/stt.pyr   r   /   s    *r!   r   c            	           e Zd ZdZdej
                  j                  ddddededed	efd
Z	d Z
d ZddZededd fd       Zy)CartesiaLiveOptionszConfiguration options for Cartesia Live STT service.

    .. deprecated:: 0.0.105
        Use ``settings=CartesiaSTTSettings(...)`` for model/language and
        direct ``__init__`` parameters for encoding/sample_rate instead.
    ink-whisper	pcm_s16lei>  modellanguageencodingsample_rater(   r)   r*   r+   c                J    || _         || _        || _        || _        || _        y)a  Initialize CartesiaLiveOptions with default or provided parameters.

        Args:
            model: The transcription model to use. Defaults to "ink-whisper".
            language: Target language for transcription. Defaults to English.
            encoding: Audio encoding format. Defaults to "pcm_s16le".
            sample_rate: Audio sample rate in Hz. Defaults to 16000.
            **kwargs: Additional parameters for the transcription service.
        N)r(   r)   r*   r+   additional_params)selfr(   r)   r*   r+   kwargss         r"   __init__zCartesiaLiveOptions.__init__>   s)    $ 
  &!'r!   c                     | j                   t        | j                  t              r| j                  n| j                  j                  | j
                  t        | j                        d}|S )zConvert options to dictionary format.

        Returns:
            Dictionary containing all configuration parameters.
        r'   )r(   
isinstancer)   strvaluer*   r+   )r.   paramss     r"   to_dictzCartesiaLiveOptions.to_dictV   sN     ZZ)3DMM3)GT]]M`M`t//0	
 r!   c                 >    | j                         j                         S )zGet configuration items as key-value pairs.

        Returns:
            Iterator of (key, value) tuples for all configuration parameters.
        )r6   itemsr.   s    r"   r8   zCartesiaLiveOptions.itemse   s     ||~##%%r!   Nc                 j    t        | |      rt        | |      S | j                  j                  ||      S )zGet a configuration value by key.

        Args:
            key: The configuration parameter name to retrieve.
            default: Default value if key is not found.

        Returns:
            The configuration value or default if not found.
        )hasattrgetattrr-   get)r.   keydefaults      r"   r=   zCartesiaLiveOptions.getm   s4     44%%%%))#w77r!   json_strreturnc                 8     | di t        j                  |      S )zCreate options from JSON string.

        Args:
            json_str: JSON string containing configuration parameters.

        Returns:
            New CartesiaLiveOptions instance with parsed parameters.
        r    )jsonloads)clsr@   s     r"   	from_jsonzCartesiaLiveOptions.from_json{   s     *TZZ)**r!   N)r   r   r   r   r   ENr4   r3   intr0   r6   r8   r=   classmethodrF   r    r!   r"   r$   r$   6   s~     # ))# ( ( 	(
 ( (0&8 	+ 	+)> 	+ 	+r!   r$   c                   f    e Zd ZU dZeZeed<   dddddeddeded	ed
e	e
   de	e   de	e   de	e   f fdZdefdZdef fdZdef fdZdef fdZd Zdedef fdZdedeedf   fdZ fdZ fdZdede ee!f   f fdZ"d Z#d Z$d  Z%d! Z&d" Z'e(	 d(d#ed$ed%e	e)   fd&       Z*d' Z+ xZ,S ))CartesiaSTTServicea+  Speech-to-text service using Cartesia Live API.

    Provides real-time speech transcription through WebSocket connection
    to Cartesia's Live transcription service. Supports both interim and
    final transcriptions with configurable models and languages.

    Cartesia disconnects WebSocket connections after 3 minutes of inactivity.
    The timeout resets with each message (audio data or text command) sent to
    the server. Silence-based keepalive is enabled by default to prevent this.
    See: https://docs.cartesia.ai/api-reference/stt/stt
    	_settings r&   N)base_urlr*   r+   live_optionssettingsttfs_p99_latencyapi_keyrO   r*   r+   rP   rQ   rR   c          	         t        dt        j                  j                        }	|t	        dt                |s|j
                  r||j
                  }|j                  r|j                  }|j                  r|j                  |	_        |j                  r/|j                  }
t        |
t              r|
j                  n|
|	_        ||	j                  |       t        | 0  d	||dd|	d| || _        |xs d| _        d| _        || _        y)
a-  Initialize CartesiaSTTService with API key and options.

        Args:
            api_key: Authentication key for Cartesia API.
            base_url: Custom API endpoint URL. If empty, uses default.
            encoding: Audio encoding format. Defaults to "pcm_s16le".
            sample_rate: Audio sample rate in Hz. If None, uses the pipeline
                sample rate.
            live_options: Configuration options for transcription service.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaSTTSettings(...)`` for model/language
                    and direct init parameters for encoding/sample_rate instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent STTService.
        r%   )r(   r)   NrP   x      )r+   rR   keepalive_timeoutkeepalive_intervalrQ   zapi.cartesia.air    )r   r   rH   r4   r   r+   r*   r(   r)   r2   apply_updatesuperr0   _api_key	_base_url_receive_task	_encoding)r.   rS   rO   r*   r+   rP   rQ   rR   r/   default_settingslang	__class__s              r"   r0   zCartesiaSTTService.__init__   s   B /[[&&
 #">3FG++0C".":":K((+44H%%-9-?-?$*(('00D>Hx>X

^b$- ))(3 	
#-!!%	
 	
  !6%6! "r!   rA   c                      y)zCheck if the service can generate processing metrics.

        Returns:
            True, indicating metrics are supported.
        Tr    r9   s    r"   can_generate_metricsz'CartesiaSTTService.can_generate_metrics   s     r!   framec                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zStart the STT service and establish connection.

        Args:
            frame: Frame indicating service should start.
        N)rZ   start_connectr.   rd   ra   s     r"   rf   zCartesiaSTTService.start   s3      gmE"""mmo 	#   848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z{Stop the STT service and close connection.

        Args:
            frame: Frame indicating service should stop.
        N)rZ   stop_disconnectrh   s     r"   rk   zCartesiaSTTService.stop   s6      gl5!!!    	" ri   c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zCancel the STT service and close connection.

        Args:
            frame: Frame indicating service should be cancelled.
        N)rZ   cancelrl   rh   s     r"   rn   zCartesiaSTTService.cancel   s6      gnU###    	$ ri   c                 @   K   | j                          d{    y7 w)zBStart performance metrics collection for transcription processing.N)start_processing_metricsr9   s    r"   _start_metricsz!CartesiaSTTService._start_metrics  s     ++---s   	directionc                 l  K   t         |   ||       d{    t        |t              r| j	                          d{    yt        |t
              rX| j                  rK| j                  j                  t        j                  u r$| j                  j                  d       d{    yyyy7 7 p7 w)zProcess incoming frames and handle speech events.

        Args:
            frame: The frame to process.
            direction: Direction of frame flow in the pipeline.
        Nfinalize)rZ   process_framer2   r   rq   r   
_websocketstater   OPENsend)r.   rd   rr   ra   s      r"   ru   z CartesiaSTTService.process_frame  s      g#E9555e89%%''':;4??#8#8EJJ#Foo**:666 $G <	 	6 ( 7s3   B4B.'B4B0 A%B4%B2&	B40B42B4audioc                   K   | j                   r&| j                   j                  t        j                  u r| j	                          d{    | j                   j                  |       d{    d y7 -7 w)zProcess audio data for speech-to-text transcription.

        Args:
            audio: Raw audio bytes to transcribe.

        Yields:
            None - transcription results are handled via WebSocket responses.
        N)rv   rw   r   CLOSEDrg   ry   )r.   rz   s     r"   run_sttzCartesiaSTTService.run_stt  sY      $//"7"75<<"G--/!!oo""5)))
 ")s$   AA:A6	"A:+A8,A:8A:c                   K   | j                          d {    t        | 	          d {    | j                  r=| j                  s0| j                  | j                  | j                              | _        y y y 7 d7 PwrG   )_connect_websocketrZ   rg   rv   r]   create_task_receive_task_handler_report_errorr.   ra   s    r"   rg   zCartesiaSTTService._connect'  sm     %%'''g   ??4#5#5!%!1!1$2L2LTM_M_2`!aD $6?	 	( s    B A<B A>AB >B c                    K   t         |           d {    | j                  r*| j                  | j                         d {    d | _        | j	                          d {    y 7 S7 &7 	wrG   )rZ   rl   r]   cancel_task_disconnect_websocketr   s    r"   rl   zCartesiaSTTService._disconnect/  sf     g!###""4#5#5666!%D((*** 	$ 7 	+s3   A/A).A/A+A/#A-$A/+A/-A/deltac                 d   K   t         |   |       d{   }| j                  |       |S 7 w)zApply a settings delta.

        Args:
            delta: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta.

        Returns:
            Dict mapping changed field names to their previous values.
        N)rZ   _update_settings _warn_unhandled_updated_settings)r.   r   changedra   s      r"   r   z#CartesiaSTTService._update_settings8  s5      077 	--g6 8s   0.0c                   K   	 | j                   r'| j                   j                  t        j                  u ry t	        j
                  d       | j                  j                  | j                  j                  | j                  t        | j                        d}d| j                   dt        j                  j                  |       }d| j                   d}t#        ||       d {   | _         | j%                  d       d {    y 7 #7 # t&        $ r)}| j)                  d	| |
       d {  7   Y d }~y d }~ww xY ww)NzConnecting to Cartesia STTr'   zwss://z/stt/websocket?z
2025-04-16)zCartesia-Versionz	X-API-Key)additional_headerson_connectedzUnknown error occurred: 	error_msg	exception)rv   rw   r   rx   r   debugrM   r(   r)   r^   r3   r+   r\   urllibparse	urlencoder[   websocket_connect_call_event_handler	Exception
push_error)r.   r5   ws_urlheaderses        r"   r   z%CartesiaSTTService._connect_websocketM  s    	Y4??#8#8EJJ#FLL56 -- NN33 NN"4#3#34	F dnn-_V\\=S=STZ=[<\]F+7dmmTG$5fQX$YYDO**>::: Z: 	Y//.Fqc,JVW/XXX	Ysi   E2D EB/D &D
'D DD 	E
D D 	E D;0D31D;6E;E  Ec                   K   	 | j                   r]| j                   j                  t        j                  u r7t	        j
                  d       | j                   j                          d {    d | _         | j                  d       d {    y 7 %# t        $ r)}| j                  d| |       d {  7   Y d }~Pd }~ww xY w7 <# d | _         | j                  d       d {  7   w xY ww)NzDisconnecting from Cartesia STTzError closing websocket: r   on_disconnected)
rv   rw   r   rx   r   r   closer   r   r   )r.   r   s     r"   r   z(CartesiaSTTService._disconnect_websocketa  s     	>4??#8#8EJJ#F>?oo++--- #DO**+<=== . 	Z//.Gs,KWX/YYY	Z > #DO**+<===sw   C-A$B (B)B -C-C	C-B 	CB=2B53B=8C =CC C-C*#C&$C**C-c                 H    | j                   r| j                   S t        d      )NzWebsocket not connected)rv   r   r9   s    r"   _get_websocketz!CartesiaSTTService._get_websocketl  s    ????"122r!   c                 \  K   | j                         2 3 d{   }	 t        j                  |      }| j                  |       d{    97 47 # t        j                  $ r t        j                  d|        Y jt        $ r"}t        j                  d|        Y d}~d}~ww xY w6 yw)z$Process incoming WebSocket messages.NzReceived non-JSON message: zError processing message: )	r   rC   rD   _process_responseJSONDecodeErrorr   warningr   error)r.   messagedatar   s       r"   _receive_messagesz$CartesiaSTTService._receive_messagesq  s     !002 	? 	?'?zz'*,,T222	? 3'' H!<WIFG ?9!=>>? 3sf   B,B*AB*B,)AAAB,B*A+B';B,=B'B"B,"B''B,c                    K   d|v rX|d   dk(  r| j                  |       d {    y |d   dk(  r-|j                  dd      }| j                  |       d {    y y y 7 <7 	w)Ntype
transcriptr   r   zUnknown error)r   )_on_transcriptr=   r   )r.   r   r   s      r"   r   z$CartesiaSTTService._process_response|  sp     T>F||+))$///f( HHY@	oo	o::: )	 / ;s!   !A$A 4A$A"A$"A$r   is_finalr)   c                    K   yw)z+Handle a transcription result with tracing.Nr    )r.   r   r   r)   s       r"   _handle_transcriptionz(CartesiaSTTService._handle_transcription  s     
 	s   c           	      0  K   d|vry |j                  dd      }|j                  dd      }d }d|v r	 t        |d         }t	        |      dkD  r|rm| j                  t        || j                  t               ||             d {    | j                  |||       d {    | j                          d {    y | j                  t        || j                  t               ||             d {    y y # t        t        f$ r Y w xY w7 7 o7 Y7 !w)NtextrN   r   Fr)   r   )result)r=   r   
ValueErrorKeyErrorlen
push_framer   _user_idr   r   stop_processing_metricsr   )r.   r   r   r   r)   s        r"   r   z!CartesiaSTTService._on_transcript  s-    XXfb)
88J.#D$45 z?Qoo&"(* #   00XxPPP22444 oo-"(* #    ) 
 Q4sk   0DC9 ADDD D!D8D99D2D3D9DD
DDDDDrG   )-r   r   r   r   r   Settings__annotations__r   r3   r   rI   r$   floatr0   boolrc   r   rf   r	   rk   r   rn   rq   r
   r   ru   bytesr   r}   rg   rl   r   dictr   r   r   r   r   r   r   r   r   r   r   __classcell__)ra   s   @r"   rL   rL      sn   
 #H"" #%)6:26,=F" F" 	F"
 F" c]F" 23F" ./F" #5/F"Pd  ! !!+ !.7 7> 7 5 ^E4K-H  b+K DcN *Y(	>3
	?; NR)-9A(9K %r!   rL   )0r   rC   urllib.parser   dataclassesr   typingr   r   r   logurur   pipecat.frames.framesr   r	   r
   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   r   websockets.protocolr   ModuleNotFoundErrorr   r   r   r   r$   rL   r    r!   r"   <module>r      s      ! 0 0 	 	 	 > I : < 4 / ?,F) 	+ 	 	O+ O+di, i  ,FLL;qc"#FLL\]
&qc*
++,s   "B C2CC