
    qi                        d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZmZmZmZmZmZmZ dd	lmZmZmZmZ dd
lmZm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* 	 ddl+m,Z- ddl.m/Z/  G d de      Z4de#dee5   fdZ6 G d de5e      Z7e G d de             Z8 G d de!      Z9 G d d e       Z:y# e0$ r7Z1 ejd                  de1         ejd                  d        e3de1       dZ1[1ww xY w)!z0Cartesia text-to-speech service implementations.    N)	dataclassfield)Enum)AsyncGeneratorListOptional)logger)	BaseModel)CancelFrameEndFrame
ErrorFrameFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)TextAggregationMode
TTSServiceWebsocketTTSService)Languageresolve_language)BaseTextAggregator)SkipTagsAggregator)
traced_tts)connect)StatezException: zIIn order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.zMissing module: c                   N    e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee
   ed<   y)GenerationConfiga  Configuration for Cartesia Sonic-3 generation parameters.

    Sonic-3 interprets these parameters as guidance to ensure natural speech.
    Test against your content for best results.

    Parameters:
        volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
        speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
        emotion: Single emotion string to guide the emotional tone. Examples include neutral,
            angry, excited, content, sad, scared. Over 60 emotions are supported. For best
            results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
            and Marian.
    Nvolumespeedemotion)__name__
__module____qualname____doc__r"   r   float__annotations__r#   r$   str     O/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/cartesia/tts.pyr!   r!   -   s3     #FHUO"!E8E?!!GXc]!r-   r!   languagereturnc                    i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  di t         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  d t         jB                  d!t         jD                  d"t         jF                  d#t         jH                  d$t         jJ                  d%t         jL                  d&t         jN                  d't         jP                  d(t         jR                  d)t         jT                  d*i}tW        | |d+,      S )-zConvert a Language enum to Cartesia language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Cartesia language code, or None if not supported.
    arbgbncsdadeenelesfifrguhehihrhuiditjakaknkomlmrmsnlnopaplptrorusksvtatethtltrukvizhT)use_base_code),r   ARBGBNCSDADEENELESFIFRGUHEHIHRHUIDITJAKAKNKOMLMRMSNLNOPAPLPTRORUSKSVTATETHTLTRUKVIZHr   )r/   LANGUAGE_MAPs     r.   language_to_cartesia_languager   A   s   +T+T+ 	T+ 	T	+
 	T+ 	T+ 	T+ 	T+ 	T+ 	T+ 	T+ 	T+ 	T+ 	T+ 	T+  	T!+" 	T#+$ 	T%+& 	T'+( 	T)+* 	T++, 	T-+. 	T/+0 	T1+2 	T3+4 	T5+6 	T7+8 	T9+: 	T;+< 	T=+> 	T?+@ 	TA+B 	TC+D 	TE+F 	TTTTTTTTU+LZ Hl$GGr-   c                       e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ dZ!d Z"d!Z#d"Z$d#Z%d$Z&d%Z'd&Z(d'Z)d(Z*d)Z+d*Z,d+Z-d,Z.d-Z/d.Z0d/Z1d0Z2d1Z3d2Z4d3Z5d4Z6d5Z7d6Z8d7Z9d8Z:d9Z;d:Z<d;Z=d<Z>y=)>CartesiaEmotionz*Predefined Emotions supported by Cartesia.neutralangryexcitedcontentsadscaredhappyenthusiasticelatedeuphoric
triumphantamazed	surprisedflirtatiouszjoking/comediccuriouspeacefulserenecalmgratefulaffectionatetrustsympatheticanticipation
mysteriousmadoutraged
frustratedagitated
threatened	disgustedcontemptenvious	sarcasticironicdejectedmelancholicdisappointedhurtguiltyboredtiredrejected	nostalgicwistful
apologetichesitantinsecureconfusedresignedanxiouspanickedalarmedproud	confidentdistant	skepticalcontemplative
determinedN)?r%   r&   r'   r(   NEUTRALANGRYEXCITEDCONTENTSADSCAREDHAPPYENTHUSIASTICELATEDEUPHORIC
TRIUMPHANTAMAZED	SURPRISEDFLIRTATIOUSJOKING_COMEDICCURIOUSPEACEFULSERENECALMGRATEFULAFFECTIONATETRUSTSYMPATHETICANTICIPATION
MYSTERIOUSMADOUTRAGED
FRUSTRATEDAGITATED
THREATENED	DISGUSTEDCONTEMPTENVIOUS	SARCASTICIRONICDEJECTEDMELANCHOLICDISAPPOINTEDHURTGUILTYBOREDTIREDREJECTED	NOSTALGICWISTFUL
APOLOGETICHESITANTINSECURECONFUSEDRESIGNEDANXIOUSPANICKEDALARMEDPROUD	CONFIDENTDISTANT	SKEPTICALCONTEMPLATIVE
DETERMINEDr,   r-   r.   r   r   z   s.   4 GEGG
CFE!LFHJFIK%NGHFDH!LEK!LJ
CHJHJIHGIFHK!LDFEEHIGJHHHHGHGEIGI#MJr-   r   c                   f    e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	e
dz  ez  ed<   y)CartesiaTTSSettingsa\  Settings for CartesiaTTSService and CartesiaHttpTTSService.

    Parameters:
        generation_config: Generation configuration for Sonic-3 models. Includes volume,
            speed (numeric), and emotion (string) parameters.
        pronunciation_dict_id: The ID of the pronunciation dictionary to use for
            custom pronunciations.
    c                      t         S Nr   r,   r-   r.   <lambda>zCartesiaTTSSettings.<lambda>   s    	 r-   )default_factoryNgeneration_configc                      t         S r  r  r,   r-   r.   r  zCartesiaTTSSettings.<lambda>   s    R[ r-   pronunciation_dict_id)r%   r&   r'   r(   r   r  r!   r   r*   r	  r+   r,   r-   r.   r  r     sF     >C)>'$.:  5:J[4\3:	1\r-   r  c                   T    e Zd ZU dZeZeed<    G d de      Zddddddd	dddddd
de	de
e	   de	de	de
e	   de
e   de	de	de
e   de
e   de
e   de
e   de
e   f fdZdefdZdede
e	   fdZde	de	fdZdede	fd Zd!ede	fd"Zd#ede	fd$Zd%ede	fd&Zde	defd'Zd(ee	   d)ee   deee	ef      fd*Z	 	 	 	 d>de	d+ed,ed-e	fd.Zd/ef fd0Z d/e!f fd1Z"d/e#f fd2Z$ fd3Z% fd4Z&d5 Z'd6 Z(d7 Z)d-e	fd8Z*d-e	fd9Z+d?d-e
e	   fd:Z,d; Z-d< Z.e/de	d-e	de0e1df   fd=       Z2 xZ3S )@CartesiaTTSServicea  Cartesia TTS service with WebSocket streaming and word timestamps.

    Provides text-to-speech using Cartesia's streaming WebSocket API.
    Supports word-level timestamps, audio context management, and various voice
    customization options including generation configuration.
    	_settingsc                   b    e Zd ZU dZej
                  Zee   ed<   dZ	ee
   ed<   dZee   ed<   y)CartesiaTTSService.InputParamsa  Input parameters for Cartesia TTS configuration.

        Parameters:
            language: Language to use for synthesis.
            generation_config: Generation configuration for Sonic-3 models. Includes volume,
                speed (numeric), and emotion (string) parameters.
            pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
        r/   Nr  r	  r%   r&   r'   r(   r   rc   r/   r   r*   r  r!   r	  r+   r,   r-   r.   InputParamsr     <    	 (0{{(8$28<8$45</3x}3r-   r  Nz
2025-04-16z#wss://api.cartesia.ai/tts/websocket	pcm_s16leraw)voice_idcartesia_versionurlmodelsample_rateencoding	containerparamssettingstext_aggregatortext_aggregation_modeaggregate_sentencesapi_keyr  r  r  r  r  r  r  r  r  r  r  r  c                   t        ddt        t        j                        dd      }|t	        dt         d       ||_        |t	        dt         d       ||_        |	xt	        dt                |
sf|	j                   | j                  |	j                        |_        |	j                  |	j                  |_	        |	j                  |	j                  |_
        |
|j                  |
       t        | 4  d||dd|d	||d
| |st        dg| j                        | _        || _        || _        || _        || _        || _        d| _        d| _        y)ar  Initialize the Cartesia TTS service.

        Args:
            api_key: Cartesia API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(voice=...)`` instead.

            cartesia_version: API version string for Cartesia service.
            url: WebSocket URL for Cartesia TTS API.
            model: TTS model to use (e.g., "sonic-3").

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(model=...)`` instead.

            sample_rate: Audio sample rate. If None, uses default.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            text_aggregator: Custom text aggregator for processing input text.

                .. deprecated:: 0.0.95
                    Use an LLMTextProcessor before the TTSService for custom text aggregation.

            text_aggregation_mode: How to aggregate incoming text before synthesis.
            aggregate_sentences: Whether to aggregate sentences within the TTSService.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            **kwargs: Additional arguments passed to the parent service.
        sonic-3Nr  voicer/   r  r	  r  r$  r  r  FT)r  r  push_text_framespause_frame_processingr  push_start_framer  r  )<spell></spell>)aggregation_typer   r,   )r  r   r   rc   r   r$  r  r/   language_to_service_languager  r	  apply_updatesuper__init__r   _text_aggregation_mode_text_aggregator_api_key_cartesia_version_url_output_container_output_encoding_output_sample_rate_receive_task)selfr   r  r  r  r  r  r  r  r  r  r  r  r  kwargsdefault_settings	__class__s                   r.   r.  zCartesiaTTSService.__init__   sz   P /28;;?""&
 ":/BGL%-""7,?I%*" "8-@A??.040Q0QRXRaRa0b$-++79?9Q9Q$6//;=C=Y=Y$: ))(3 
	
"7 3"#(#!+%
	
 
	
  %7()D<W<W%D!  !1	 "+ (#$ !r-   r0   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Cartesia service supports metrics generation.
        Tr,   r8  s    r.   can_generate_metricsz'CartesiaTTSService.can_generate_metricsn       r-   r/   c                     t        |      S zConvert a Language enum to Cartesia language format.

        Args:
            language: The language to convert.

        Returns:
            The Cartesia-specific language code, or None if not supported.
        r   r8  r/   s     r.   r+  z/CartesiaTTSService.language_to_service_languagev       -X66r-   textc                     d|  dS )z Wrap text in Cartesia spell tag.r(  r)  r,   )rE  s    r.   SPELLzCartesiaTTSService.SPELL  s    h''r-   r$   c                     d|  dS )z,Convenience method to create an emotion tag.z<emotion value="" />r,   )r$   s    r.   EMOTION_TAGzCartesiaTTSService.EMOTION_TAG  s    !'$//r-   secondsc                     d|  dS )z)Convenience method to create a pause tag.z<break time="zs" />r,   )rK  s    r.   	PAUSE_TAGzCartesiaTTSService.PAUSE_TAG  s    wiu--r-   r"   c                     d|  dS )z*Convenience method to create a volume tag.z<volume ratio="rI  r,   )r"   s    r.   
VOLUME_TAGzCartesiaTTSService.VOLUME_TAG  s     --r-   r#   c                     d|  dS )z)Convenience method to create a speed tag.z<speed ratio="rI  r,   )r#   s    r.   	SPEED_TAGzCartesiaTTSService.SPEED_TAG  s    wd++r-   c                 V    h d}|j                  d      d   j                         }||v S )zCheck if the given language is CJK (Chinese, Japanese, Korean).

        Args:
            language: The language code to check.

        Returns:
            True if the language is Chinese, Japanese, or Korean.
        >   rD   rG   r[   -r   )splitlower)r8  r/   cjk_languages	base_langs       r.   _is_cjk_languagez#CartesiaTTSService._is_cjk_language  s0     +NN3'*002	M))r-   wordsstartsc                     | j                   j                  }|r2| j                  |      r!|r|rdj                  |      }|d   }||fgS g S t	        t        ||            S )u  Process word timestamps based on the current language.

        For CJK languages, Cartesia groups related characters in the same timestamp message.
        For example, in Japanese a single message might be `['こ', 'ん', 'に', 'ち', 'は', '。']`.
        We combine these into single words so the downstream aggregator can add natural
        spacing between meaningful units rather than individual characters.

        For non-CJK languages, words are already properly separated and are used as-is.

        Args:
            words: List of words/characters from Cartesia.
            starts: List of start timestamps for each word/character.

        Returns:
            List of (word, start_time) tuples processed for the language.
         r   )r  r/   rX  joinlistzip)r8  rY  rZ  current_languagecombined_wordfirst_starts         r.   %_process_word_timestamps_for_languagez8CartesiaTTSService._process_word_timestamps_for_language  sk    &  >>22  5 56F G  "$Qi&455	 E6*++r-   continue_transcriptadd_timestamps
context_idc           	      ^   i }d|d<   | j                   j                  |d<   |||| j                   j                  || j                  | j                  | j
                  d|| j                   j                  dk(  rdndd}| j                   j                  r| j                   j                  |d<   | j                   j                  r)| j                   j                  j                  d	      |d
<   | j                   j                  r| j                   j                  |d<   t        j                  |      S )NrB   moder  r  r  sonicFT)
transcriptcontinuerf  model_idr$  output_formatre  use_original_timestampsr/   exclude_noner  r	  )r  r$  r  r4  r5  r6  r/   r  
model_dumpr	  jsondumps)r8  rE  rd  re  rf  voice_configmsgs          r.   
_build_msgzCartesiaTTSService._build_msg  s    #V!^^11T +$,,!!33 11#77
 -040D0D0OuUY
 >>"""nn55C
O>>++'+~~'G'G'R'R! (S (C#$ >>//+/>>+O+OC'(zz#r-   framec                    K   t         |   |       d{    | j                  | _        | j	                          d{    y7 .7 w)zStart the Cartesia TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r-  startr  r6  _connectr8  rx  r;  s     r.   rz  zCartesiaTTSService.start  sA      gmE"""#'#3#3 mmo 	#s    A	A(A	A A	A	c                 t   K   t         |   |       d{    | j                          d{    y7 7 wzXStop the Cartesia TTS service.

        Args:
            frame: The end frame.
        N)r-  stop_disconnectr|  s     r.   r  zCartesiaTTSService.stop  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 wr~  )r-  cancelr  r|  s     r.   r  zCartesiaTTSService.cancel   s6      gnU###    	$ r  c                   K   t         |           d {    | j                          d {    | j                  r=| j                  s0| j                  | j                  | j                              | _        y y y 7 f7 Pwr  )r-  r{  _connect_websocket
_websocketr7  create_task_receive_task_handler_report_errorr8  r;  s    r.   r{  zCartesiaTTSService._connect	  sm     g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD $6?	 	!'s    B A<B A>AB >B c                    K   t         |           d {    | j                  r*| j                  | j                         d {    d | _        | j	                          d {    y 7 S7 &7 	wr  )r-  r  r7  cancel_task_disconnect_websocketr  s    r.   r  zCartesiaTTSService._disconnect  sf     g!###""4#5#5666!%D((*** 	$ 7 	+s3   A/A).A/A+A/#A-$A/+A/-A/c                   K   	 | j                   r'| j                   j                  t        j                  u ry t	        j
                  d       t        | j                   d| j                   d| j                          d {   | _         | j                  d       d {    y 7 #7 # t        $ rL}| j                  d| |       d {  7   d | _         | j                  d|        d {  7   Y d }~y d }~ww xY ww)NzConnecting to Cartesia TTSz	?api_key=z&cartesia_version=on_connectedUnknown error occurred: 	error_msg	exceptionon_connection_error)r  stater   OPENr	   debugwebsocket_connectr3  r1  r2  _call_event_handler	Exception
push_errorr8  es     r.   r  z%CartesiaTTSService._connect_websocket  s     	J4??#8#8EJJ#FLL56$599+Yt}}o5GH^H^G_`% DO **>::: ; 	J//.Fqc,JVW/XXX"DO**+@QCIII	Jsu   D 2B( D A	B(  B$B( B&B( #D $B( &B( (	C=1C8
C"C8-C0.C83D 8C==D c                 ,  K   	 | j                          d {    | j                  r7t        j                  d       | j                  j	                          d {    | j                          d {    d | _        | j                  d       d {    y 7 7 ?# t
        $ r)}| j                  d| |       d {  7   Y d }~jd }~ww xY w7 ^7 @# | j                          d {  7   d | _        | j                  d       d {  7   w xY ww)NzDisconnecting from Cartesiar  r  on_disconnected)	stop_all_metricsr  r	   r  closer  r  remove_active_audio_contextr  r  s     r.   r  z(CartesiaTTSService._disconnect_websocket(  s     	>''))):;oo++--- 22444"DO**+<=== * . 	Y//.Fqc,JVW/XXX	Y 5= 22444"DO**+<===s   DB BAB BB D2C3DCDB B 	C%C	>C?C	C 	CC DDD)C,* D
DDDc                 H    | j                   r| j                   S t        d      )NzWebsocket not connected)r  r  r=  s    r.   _get_websocketz!CartesiaTTSService._get_websocket6  s    ????"122r-   c                    K   | j                          d{    |r@t        j                  |dd      }| j                         j	                  |       d{    yy7 G7 w)z?Cancel the active Cartesia context when the bot is interrupted.NT)rf  r  )r  rs  rt  r  send)r8  rf  
cancel_msgs      r.   on_audio_context_interruptedz/CartesiaTTSService.on_audio_context_interrupted;  s[     ##%%%:$NOJ%%',,Z888  	& 9s"   A"AA A"A A" A"c                    K   yw)zClose the Cartesia context after all audio has been played.

        No close message is needed: the server already considers the context
        done once it has sent its ``done`` message, which is handled in
        ``_process_messages``.
        Nr,   )r8  rf  s     r.   on_audio_context_completedz-CartesiaTTSService.on_audio_context_completedB  s      	s   c                    K   |xs | j                         }|r| j                  syt        j                  |  d       | j	                  dd|      }| j                  j                  |       d{    y7 w)zFlush any pending audio and finalize the current context.

        Args:
            context_id: The specific context to flush. If None, falls back to the
                currently active context.
        Nz: flushing audior\  F)rE  rd  rf  )get_active_audio_context_idr  r	   tracerw  r  )r8  rf  flush_idrv  s       r.   flush_audiozCartesiaTTSService.flush_audioK  sg      C!A!A!Ctv-./oo25XoVoo""3'''s   A.A80A61A8c                   K   | j                         2 3 d {   }t        j                  |      }|r| j                  |d         s5|d   }|d   dk(  rN| j	                          d {    | j                  ddg|       d {    | j                  |       d {    |d   dk(  r9| j                  |d   d   |d   d	         }| j                  ||       d {    |d   d
k(  rKt        t        j                  |d         | j                  d|      }| j                  ||       d {    $|d   dk(  rj| j                  t        |             d {    | j                          d {    | j!                  d|        d {    | j#                          | j!                  d|        d {    7 7 c7 J7 47 7 7 z7 d7 I7 6 y w)Nrf  typedone)r   r   )Resetr   
timestampsword_timestampsrY  rz  chunkdata   audior  num_channelsrf  errorrf  zError: )r  zError, unknown message type: )r  rs  loadsaudio_context_availablestop_ttfb_metricsadd_word_timestampsremove_audio_contextrc  r   base64	b64decoder  append_to_audio_context
push_framer   r  r  reset_active_audio_context)r8  messagerv  ctx_idprocessed_timestampsrx  s         r.   _process_messagesz$CartesiaTTSService._process_messagesY  s    !002 	W 	W'**W%Cd::3|;LM&F6{f$,,.....0F/UW]^^^//777V,'+'Q'Q)*73S9J5KG5T($ ../CVLLLV'( **3v;7 $ 0 0!"%	 2265AAAV'ooo&HIII++---oo'#o@@@//1oo2OPSu0UoVVV;	W /^7 M BI-@ W; 3s   G"G GG AG"(G)G"GG"GA G"GAG"0G1,G"GG"5G6G"G.G"GG"G G"G"G"G"G"G"G"G"G" G"c                    K   	 | j                          d {    t        j                  |  d       | j                          d {    I7 57 w)Nz> Cartesia connection was disconnected (timeout?), reconnecting)r  r	   r  r  r=  s    r.   _receive_messagesz$CartesiaTTSService._receive_messagesy  sQ     ((*** LLD6!_`a))+++ * ,s!   AA/AAAAc                  K   | j                   st        j                  |  d| d       nt        j                  |  d| d       	 | j                  r&| j                  j
                  t        j                  u r| j                          d{    | j                  ||      }	 | j                         j                  |       d{    | j                  |       d{    d y7 ^7 %7 # t        $ r]}t        d|        t        |       | j!                          d{  7   | j                          d{  7   Y d}~yd}~ww xY w# t        $ r}t        d|        Y d}~yd}~ww xY ww)a  Generate speech from text using Cartesia's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []N)rE  rf  r  r  r  )_is_streaming_tokensr	   r  r  r  r  r   CLOSEDr{  rw  r  r  start_tts_usage_metricsr  r   r   r  )r8  rE  rf  rv  r  s        r.   run_ttszCartesiaTTSService.run_tts  sN     ((LLD6!3D6;<LLD6!3D6;<	C??doo&;&;u||&Kmmo%%//t
/CC))+00555224888 J &
 68  )A!'EFF%<<&&(((mmo%%  	C%=aS#ABBB	Cs   AFAE C+E %"C1 C-C1  C/!C1 %E *F+E -C1 /C1 1	E:4E.D1/EE
EE FEE 	E?#E:5F:E??F)r\  TTr\  r  )4r%   r&   r'   r(   r  Settingsr*   r
   r  r+   r   intr   r   boolr.  r>  r   r+  rG  r   rJ  r)   rM  rO  rQ  rX  r   tuplerc  rw  r   rz  r   r  r   r  r{  r  r  r  r  r  r  r  r  r  r   r   r   r  __classcell__r;  s   @r.   r  r     s    #H""4i 4$ #' ,8#%)#(,268<?C.2F" F" 3-	F"
 F" F" }F" c]F" F" F" %F" ./F" ""45F"  ((;<F" &d^F"Pd 	7X 	7(3- 	7(C (C (0_ 0 0.5 .S ..5 .S ., ,3 ,* * *!,#Y!,(,U!,	eCJ	 !,J $(#%% "% 	%
 %N ! !!+ !b+J>3
9S 93 (HSM (W@,  C#  C3  C>%QU+;V  C  Cr-   r  c                   F    e Zd ZU dZeZeed<    G d de      Zdddddddd	ddd

de	de
e	   de
e	   de	de	de
ej                     de
e   de	de	de
e   de
e   f fdZdefdZdede
e	   fdZdef fdZd Zdef fdZdef fdZed e	d!e	deedf   fd"       Z xZS )#CartesiaHttpTTSServicezCartesia HTTP-based TTS service.

    Provides text-to-speech using Cartesia's HTTP API for simpler, non-streaming
    synthesis. Suitable for use cases where streaming is not required and simpler
    integration is preferred.
    r  c                   b    e Zd ZU dZej
                  Zee   ed<   dZ	ee
   ed<   dZee   ed<   y)"CartesiaHttpTTSService.InputParamsa  Input parameters for Cartesia HTTP TTS configuration.

        Parameters:
            language: Language to use for synthesis.
            generation_config: Generation configuration for Sonic-3 models. Includes volume,
                speed (numeric), and emotion (string) parameters.
            pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
        r/   Nr  r	  r  r,   r-   r.   r  r    r  r-   r  Nzhttps://api.cartesia.aiz
2026-03-01r  r  )
r  r  base_urlr  aiohttp_sessionr  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  c                r   t        ddt        t        j                        dd      }|t	        dt         d       ||_        |t	        dt         d       ||_        |
xt	        dt                |sf|
j                   | j                  |
j                        |_        |
j                  |
j                  |_	        |
j                  |
j                  |_
        ||j                  |       t        | 4  d|dd|d	| || _        || _        || _        |	| _        || _        d
| _        || _        |du | _        y)a6  Initialize the Cartesia HTTP TTS service.

        Args:
            api_key: Cartesia API key for authentication.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(voice=...)`` instead.

            model: TTS model to use (e.g., "sonic-3").

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(model=...)`` instead.

            base_url: Base URL for Cartesia HTTP API.
            cartesia_version: API version string for Cartesia service.
            aiohttp_session: Optional aiohttp ClientSession for HTTP requests.
                If not provided, a session will be created and managed internally.
            sample_rate: Audio sample rate. If None, uses default.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=CartesiaTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to the parent TTSService.
        r"  Nr#  r  r$  r  r  T)r  r'  push_stop_framesr  r   r,   )r  r   r   rc   r   r$  r  r/   r+  r  r	  r,  r-  r.  r1  	_base_urlr2  r4  r5  r6  _session_owns_session)r8  r   r  r  r  r  r  r  r  r  r  r  r9  r:  r;  s                 r.   r.  zCartesiaHttpTTSService.__init__  sX   ^ /28;;?""&
 ":/BGL%-""7,?I%*" "8-@A??.040Q0QRXRaRa0b$-++79?9Q9Q$6//;=C=Y=Y$: ))(3 	
#!!%		

 	
  !!1 "+ (#$ 6E,4r-   r0   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Cartesia HTTP service supports metrics generation.
        Tr,   r=  s    r.   r>  z+CartesiaHttpTTSService.can_generate_metrics   r?  r-   r/   c                     t        |      S rA  rB  rC  s     r.   r+  z3CartesiaHttpTTSService.language_to_service_language(  rD  r-   rx  c                    K   t         |   |       d{    | j                  | _        | j                  rt        j                         | _        yy7 <w)zStart the Cartesia HTTP TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r-  rz  r  r6  r  aiohttpClientSessionr  r|  s     r.   rz  zCartesiaHttpTTSService.start3  sL      gmE"""#'#3#3 #113DM  	#s   AA=Ac                    K   | j                   r7| j                  r*| j                  j                          d{    d| _        yyy7 w)z$Close the HTTP session if we own it.N)r  r  r  r=  s    r.   _close_sessionz%CartesiaHttpTTSService._close_session>  s=     $----%%''' DM #0's   6A	AA	c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z]Stop the Cartesia HTTP TTS service.

        Args:
            frame: The end frame.
        N)r-  r  r  r|  s     r.   r  zCartesiaHttpTTSService.stopD  s6      gl5!!!!!### 	"#r  c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zbCancel the Cartesia HTTP TTS service.

        Args:
            frame: The cancel frame.
        N)r-  r  r  r|  s     r.   r  zCartesiaHttpTTSService.cancelM  s6      gnU###!!### 	$#r  rE  rf  c                2  K   t        j                  |  d| d       	 d| j                  j                  d}| j                  | j
                  | j                  d}| j                  j                  |||d}| j                  j                  r| j                  j                  |d<   | j                  j                  r)| j                  j                  j                  d	      |d
<   | j                  j                  r| j                  j                  |d<   | j                  | j                  dd}| j                   d}| j                  j!                  |||      4 d{   }|j"                  dk7  rE|j%                          d{   }	t'        d|	        t)        d|j"                   d|	       |j+                          d{   }
ddd      d{    | j-                  |       d{    t/        
| j0                  d|      }| | j3                          d{    y7 7 7 h7 Z# 1 d{  7  sw Y   jxY w7 X# t(        $ r}t'        d|        Y d}~Xd}~ww xY w7 L# | j3                          d{  7   w xY ww)a  Generate speech from text using Cartesia's HTTP API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        r  r  rB   )rh  rB   ri  )rm  rk  r$  rn  r/   Trp  r  r	  zapplication/json)zCartesia-Versionz	X-API-KeyzContent-Typez
/tts/bytes)rs  headersN   zCartesia API error: r  zCartesia API returned status z: r  r  r  )r	   r  r  r$  r4  r5  r6  r  r/   r  rr  r	  r2  r1  r  r  poststatusrE  r   r  readr  r   r  r  )r8  rE  rf  ru  rn  payloadr  r  response
error_text
audio_datarx  r  s                r.   r  zCartesiaHttpTTSService.run_ttsV  so     	v/vQ789	+$(0D0DEL "33 11#77M !NN00"%!.	G ~~&&&*nn&=&=
#~~///3~~/O/O/Z/Z!% 0[ 0+, ~~3337>>3W3W/0 %)$:$:!]] 2G ^^$J/C}}))#GW)M 3 3QY??c)'/}}!6J$-A*+NOO#&CHOOCTTVWaVb$cdd#+==?2
3 3 ..t444$  ,,%	E K
 ((***-3!6 33 3 3 3 5  	C%=aS#ABBB	C +$((***s   JE I H0 I ##H8H2AH8H4H8I H6I 4I5"I J*I7+J0I 2H84H86I 8I
>I?I
I 	I4I/*I9 /I44I9 7J9JJJJ)r%   r&   r'   r(   r  r  r*   r
   r  r+   r   r  r  r  r.  r  r>  r   r+  r   rz  r  r   r  r   r  r   r   r   r  r  r  s   @r.   r  r    sg    #H""4i 4$ #'#1 ,;?%)#(,26`5 `5 3-	`5
 }`5 `5 `5 "'"7"78`5 c]`5 `5 `5 %`5 ./`5Dd 	7X 	7(3- 	7	4 	4!$ $$+ $ E+# E+3 E+>%QU+;V E+ E+r-   r  );r(   r  rs  dataclassesr   r   enumr   typingr   r   r   r  logurur	   pydanticr
   pipecat.frames.framesr   r   r   r   r   r   r   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer   r   r   pipecat.transcriptions.languager   r   'pipecat.utils.text.base_text_aggregatorr   'pipecat.utils.text.skip_tags_aggregatorr   (pipecat.utils.tracing.service_decoratorsr   websockets.asyncio.clientr   r  websockets.protocolr   ModuleNotFoundErrorr  r  r  r!   r+   r   r   r  r  r  r,   r-   r.   <module>r	     s   7   (  1 1      ` _ ] ] F F F ?,F)"y "(6HH 6H# 6Hr?c4 ?D ]+ ] ] UC, UCpw+Z w+}  ,FLL;qc"#FLL\]
&qc*
++,s   6C D2DD