
    qin                     t   U d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1 	 ddl2m3Z4 ddl5m6Z6  G d de;e      Z< G d de;e      Z= G d de;e      Z> ed       G d d             Z? e?ddddd d!d e@d" e=D              #       e?dddd$d%d&d e@d' e>D              #       e?dddd$d%d&d e@d( e>D              #      d)ZAee;e?f   eBd*<   d+e;d,ee;   fd-ZCd.e.d,ee;   fd/ZDe G d0 d1e&             ZEe G d2 d3eE             ZF G d4 d5e,      ZG G d6 d7e*      ZHy# e7$ r7Z8 ejr                  de8         ejr                  d        e:de8       dZ8[8ww xY w)8av  Sarvam AI text-to-speech service implementation.

This module provides TTS services using Sarvam AI's API with support for multiple
Indian languages and two model variants:

**Model Variants:**

- **bulbul:v2** (default): Standard TTS model
    - Supports: pitch, loudness, pace (0.3-3.0)
    - Default sample rate: 22050 Hz
    - Speakers: anushka (default), abhilash, manisha, vidya, arya, karun, hitesh

- **bulbul:v3-beta**: Advanced TTS model with temperature control
    - Does NOT support: pitch, loudness
    - Supports: pace (0.5-2.0), temperature (0.01-1.0)
    - Default sample rate: 24000 Hz
    - Preprocessing is always enabled
    - Speakers: aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
      kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
      aayan, shubh, ashutosh, advait, amelia, sophia

- **bulbul:v3**: Advanced TTS model with temperature control
    - Does NOT support: pitch, loudness
    - Supports: pace (0.5-2.0), temperature (0.01-1.0)
    - Default sample rate: 24000 Hz
    - Preprocessing is always enabled
    - Speakers: aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
      kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
      aayan, shubh, ashutosh, advait, amelia, sophia

See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API details.
    N)	dataclassfield)Enum)AnyAsyncGeneratorClassVarDictListOptionalTuple)logger)	BaseModelField)CancelFrameEndFrame
ErrorFrameFrameLLMFullResponseEndFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)FrameDirection)sdk_headers)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)InterruptibleTTSServiceTextAggregationMode
TTSService)Languageresolve_language)
traced_tts)connect)StatezException: zEIn order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.zMissing module: c                       e Zd ZdZdZdZdZy)SarvamTTSModela  Available Sarvam TTS models.

    Attributes:
        BULBUL_V2: Standard TTS model with pitch/loudness control.
            - Supports pitch, loudness, pace (0.3-3.0)
            - Default sample rate: 22050 Hz
        BULBUL_V3_BETA: Advanced model with temperature control.
            - Does NOT support pitch/loudness
            - Pace range: 0.5-2.0
            - Supports temperature parameter
            - Default sample rate: 24000 Hz
            - Preprocessing is always enabled
    	bulbul:v2bulbul:v3-beta	bulbul:v3N)__name__
__module____qualname____doc__	BULBUL_V2BULBUL_V3_BETA	BULBUL_V3     M/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/sarvam/tts.pyr'   r'   M   s     I%NIr3   r'   c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
SarvamTTSSpeakerV2zAvailable speakers for bulbul:v2 model.

    Female voices: anushka, manisha, vidya, arya
    Male voices: abhilash, karun, hitesh
    anushkaabhilashmanishavidyaaryakarunhiteshN)r+   r,   r-   r.   ANUSHKAABHILASHMANISHAVIDYAARYAKARUNHITESHr2   r3   r4   r6   r6   a   s,     GHGEDEFr3   r6   c                   t    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZy)SarvamTTSSpeakerV3zyAvailable speakers for bulbul:v3-beta model.

    Includes a wider variety of voices with different characteristics.
    adityaritupriyaneharahulpoojarohansimrankavyaamitdevishitashreyaratanvarunmanansumitroopakabiraayanshubhashutoshadvaitameliasophiaN)r+   r,   r-   r.   ADITYARITUPRIYANEHARAHULPOOJAROHANSIMRANKAVYAAMITDEVISHITASHREYARATANVARUNMANANSUMITROOPAKABIRAAYANSHUBHASHUTOSHADVAITAMELIASOPHIAr2   r3   r4   rF   rF   q   s    
 FDEDEEEFED
CFFEEEEEEEEHFFFr3   rF   T)frozenc                   v    e Zd ZU dZeed<   eed<   eed<   eed<   eed<   ee	e	f   ed<   eed<   eed	f   ed
<   y)TTSModelConfigab  Immutable configuration for a Sarvam TTS model.

    Attributes:
        supports_pitch: Whether the model accepts pitch parameter.
        supports_loudness: Whether the model accepts loudness parameter.
        supports_temperature: Whether the model accepts temperature parameter.
        default_sample_rate: Default audio sample rate in Hz.
        default_speaker: Default speaker voice ID.
        pace_range: Valid range for pace parameter (min, max).
        preprocessing_always_enabled: Whether preprocessing is always enabled.
        speakers: Tuple of available speaker names for this model.
    supports_pitchsupports_loudnesssupports_temperaturedefault_sample_ratedefault_speaker
pace_rangepreprocessing_always_enabled.speakersN)
r+   r,   r-   r.   bool__annotations__intstrr   floatr2   r3   r4   r{   r{      sJ     eUl##"&&CHor3   r{   Fi"V  r7   )333333?      @c              #   4   K   | ]  }|j                     y wNvalue.0ss     r4   	<genexpr>r           ;1qww;   )r|   r}   r~   r   r   r   r   r   i]  r[   )g      ?g       @c              #   4   K   | ]  }|j                     y wr   r   r   s     r4   r   r      r   r   c              #   4   K   | ]  }|j                     y wr   r   r   s     r4   r   r      r   r   )r(   r)   r*   TTS_MODEL_CONFIGSmodelreturnc                     | t         v rt        t         |    j                        S t        t         d   j                        S )zGet the list of available speakers for a given model.

    Args:
        model: The model name (e.g., "bulbul:v2" or "bulbul:v3-beta").

    Returns:
        List of speaker names available for the model.
    r(   )r   listr   )r   s    r4   get_speakers_for_modelr      s;     !!%e,5566!+.7788r3   languagec                    i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  d	t         j$                  d	t         j&                  d
t         j(                  d
t         j*                  dt         j,                  di}t/        | |d      S )zConvert Pipecat Language enum to Sarvam AI language codes.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Sarvam AI language code, or None if not supported.
    zbn-INen-INzgu-INzhi-INzkn-INzml-INzmr-INzod-INzpa-INzta-INzte-INF)use_base_code)r!   BNBN_INENEN_INGUGU_INHIHI_INKNKN_INMLML_INMRMR_INOROR_INPAPA_INTATA_INTETE_INr"   )r   LANGUAGE_MAPs     r4   language_to_sarvam_languager      sZ   W 	W 		
 	W 	 	W 	 	W 	 	W 	 	W 	 	W  	!" 	W#$ 	WW-L2 Hl%HHr3   c                       e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	e
dz  ez  ed<    ed       Ze
dz  ez  ed	<    ed
       Ze
dz  ez  ed<    ed       Ze
dz  ez  ed<   y)SarvamHttpTTSSettingsa  Settings for SarvamHttpTTSService.

    Parameters:
        enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
            **Note:** Always enabled for bulbul:v3-beta (cannot be disabled).
        pace: Speech pace multiplier. Defaults to 1.0.
            - bulbul:v2: Range 0.3 to 3.0
            - bulbul:v3-beta: Range 0.5 to 2.0
        pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
            **Note:** Only supported for bulbul:v2. Ignored for v3 models.
        loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
            **Note:** Only supported for bulbul:v2. Ignored for v3 models.
        temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
            Lower values = more deterministic, higher = more random. Defaults to 0.6.
            **Note:** Only supported for bulbul:v3-beta. Ignored for v2.
    c                      t         S r   r   r2   r3   r4   <lambda>zSarvamHttpTTSSettings.<lambda>  s    R[ r3   default_factoryNenable_preprocessingc                      t         S r   r   r2   r3   r4   r   zSarvamHttpTTSSettings.<lambda>  s    9 r3   pacec                      t         S r   r   r2   r3   r4   r   zSarvamHttpTTSSettings.<lambda>  s    I r3   pitchc                      t         S r   r   r2   r3   r4   r   zSarvamHttpTTSSettings.<lambda>  s    y r3   loudnessc                      t         S r   r   r2   r3   r4   r   zSarvamHttpTTSSettings.<lambda>  s    ) r3   temperature)r+   r,   r-   r.   r   r   r   r   r   r   r   r   r   r   r2   r3   r4   r   r     s    " 5:J[4\$+	1\%*;L%MD%$,
"M&+<M&NE54<)#N).?P)QHedlY&Q,1BS,TK	)Tr3   r   c                       e Zd ZU dZddiZeeeef      ed<    e	d       Z
edz  ez  ed<    e	d	       Zedz  ez  ed
<   y)SarvamTTSSettingsa  Settings for SarvamTTSService.

    Extends :class:`SarvamHttpTTSSettings` with WebSocket-specific buffering parameters.

    Parameters:
        min_buffer_size: Minimum characters to buffer before generating audio.
            Lower values reduce latency but may affect quality. Defaults to 50.
        max_chunk_length: Maximum characters processed in a single chunk.
            Controls memory usage and processing efficiency. Defaults to 150.
    target_language_coder   _aliasesc                      t         S r   r   r2   r3   r4   r   zSarvamTTSSettings.<lambda>*  s    I r3   r   Nmin_buffer_sizec                      t         S r   r   r2   r3   r4   r   zSarvamTTSSettings.<lambda>+  s    Y r3   max_chunk_length)r+   r,   r-   r.   r   r   r	   r   r   r   r   r   r   r   r2   r3   r4   r   r     sY    	 +A*)MHhtCH~&M.3DU.VOS4Z)+V/4EV/WcDj9,Wr3   r   c                       e Zd ZU dZeZeed<    G d de      Zdddddddde	d	e
j                  d
ee	   dee	   de	dee   dee   dee   f fdZdefdZdedee	   fdZdef fdZede	de	deedf   fd       Z xZS )SarvamHttpTTSServicea  Text-to-Speech service using Sarvam AI's API.

    Converts text to speech using Sarvam AI's TTS models with support for multiple
    Indian languages. Provides control over voice characteristics.

    **Model Differences:**

    - **bulbul:v2** (default):
        - Supports: pitch (-0.75 to 0.75), loudness (0.3 to 3.0), pace (0.3 to 3.0)
        - Default sample rate: 22050 Hz
        - Speakers: anushka, abhilash, manisha, vidya, arya, karun, hitesh

    - **bulbul:v3-beta**:
        - Does NOT support: pitch, loudness (will be ignored)
        - Supports: pace (0.5 to 2.0), temperature (0.01 to 1.0)
        - Default sample rate: 24000 Hz
        - Preprocessing is always enabled
        - Speakers: aditya, ritu, priya, neha, rahul, pooja, rohan, simran, kavya,
          amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
          aayan, shubh, ashutosh, advait, amelia, sophia

    Example::

        # Using bulbul:v2 (default)
        tts = SarvamHttpTTSService(
            api_key="your-api-key",
            voice_id="anushka",
            model="bulbul:v2",
            aiohttp_session=session,
            params=SarvamHttpTTSService.InputParams(
                language=Language.HI,
                pitch=0.1,
                pace=1.2,
                loudness=1.5
            )
        )

        # Using bulbul:v3-beta with temperature control
        tts_v3 = SarvamHttpTTSService(
            api_key="your-api-key",
            voice_id="aditya",  # Use v3 speaker
            model="bulbul:v3-beta",
            aiohttp_session=session,
            params=SarvamHttpTTSService.InputParams(
                language=Language.HI,
                pace=1.2,  # Range: 0.5-2.0 for v3
                temperature=0.8
            )
        )
    	_settingsc                       e Zd ZU dZej
                  Zee   ed<    e	dddd      Z
ee   ed<    e	d	d
dd      Zee   ed<    e	d	d
dd      Zee   ed<    e	dd      Zee   ed<    e	ddd	d      Zee   ed<   y) SarvamHttpTTSService.InputParamsa  Input parameters for Sarvam TTS configuration.

        .. deprecated:: 0.0.105
            Use ``SarvamHttpTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English (India).
            pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            pace: Speech pace multiplier. Defaults to 1.0.
                - bulbul:v2: Range 0.3 to 3.0
                - bulbul:v3-beta: Range 0.5 to 2.0
            loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
                **Note:** Always enabled for bulbul:v3-beta (cannot be disabled).
            temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
                Lower values = more deterministic, higher = more random. Defaults to 0.6.
                **Note:** Only supported for bulbul:v3-beta. Ignored for v2.
        r                       ?+Voice pitch adjustment. Only for bulbul:v2.defaultgeledescriptionr         ?r   r   &Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.r   &Volume multiplier. Only for bulbul:v2.r   Fz<Enable text preprocessing. Always enabled for v3-beta model.r   r   r   333333?{Gz?;Output randomness for bulbul:v3-beta only. Range: 0.01-1.0.r   N)r+   r,   r-   r.   r!   r   r   r   r   r   r   r   r   r   r   r   r   r2   r3   r4   InputParamsr   e  s    	* (0{{(8$2!&E	"
x 	
 !&@	!
huo 	
 %*@	%
(5/ 	
 05V0
htn 	
 (-U	(
Xe_ 	
r3   r   Nzhttps://api.sarvam.ai)voice_idr   base_urlsample_rateparamssettingsapi_keyaiohttp_sessionr   r   r   r   r   r   c          
         t        dddddddd      }
|t        dt         d       ||
_        |t        d	t         d
       ||
_        |t        dt                |s|j                  $| j                  |j                        xs d|
_        |j                  |j                  |
_        |j                  |j                  |
_        |j                  |j                  |
_        |j                  |j                  |
_	        |j                  |j                  |
_
        ||
j                  |       t        |
j                  t              r | j                  |
j                        |
_        |
j                  }|t        vr>dj                  t!        t        j#                                     }t%        d| d| d      t        |   | _        || j&                  j(                  }|/||j                  t*        u r| j&                  j,                  |
_        |
j                  }| j&                  j.                  \  }}|D||k  s||kD  r:t1        j2                  d| d| d| d       t5        |t7        ||            |
_        | j&                  j8                  rd|
_        | j&                  j:                  s-|
j                  dvrt1        j2                  d|        d|
_        | j&                  j<                  s-|
j                  dvrt1        j2                  d|        d|
_	        | j&                  j>                  s-|
j                  dvrt1        j2                  d|        d|
_
        tA        |   d|dd|
d|	 || _"        || _#        || _$        y)aL  Initialize the Sarvam TTS service.

        Args:
            api_key: Sarvam AI API subscription key.
            aiohttp_session: Shared aiohttp session for making requests.
            voice_id: Speaker voice ID. If None, uses model-appropriate default.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSSettings(voice=...)`` instead.

            model: TTS model to use. Options:
                - "bulbul:v2" (default): Standard model with pitch/loudness support
                - "bulbul:v3-beta": Advanced model with temperature control

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSSettings(model=...)`` instead.

            base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
            sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000).
                If None, uses model-specific default.
            params: Additional voice and preprocessing parameters. If None, uses defaults.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamHttpTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        r(   r7   r   Fr   N)r   voicer   r   r   r   r   r   r   r   r   r   , Unsupported model ''. Allowed values: .Pace  is outside model range (-). Clamping.TNr   pitch parameter is ignored for Nr   "loudness parameter is ignored for Nr   %temperature parameter is ignored for )r   push_stop_framespush_start_framer   r2   )%r   r   r   r   r   language_to_service_languager   r   r   r   r   apply_update
isinstancer!   r   joinsortedkeys
ValueError_configr   r   r   r   r   warningmaxminr   r|   r}   r~   super__init___api_key	_base_url_session)selfr   r   r   r   r   r   r   r   kwargsdefault_settingsresolved_modelallowedr   pace_minpace_max	__class__s                   r4   r  zSarvamHttpTTSService.__init__  s~   V 1!&	
 "7,A7K%*"":/DgN%-" "8-BC??.99&//JUg %- ..:<B<W<W$9;;*,2KK$)<<+-3\\$*??.06$-%%1393E3E$0 ))(3 &//:(,(I(IJZJcJc(d% *//!22ii'8'='='? @AG2>2BBUV]U^^_`aa(8 ,,::K !1X^^y5P%)\\%A%A"  $$!\\44(D8ONNU4&(A(1XJVbcd$'#h2E$F! <<44481 ||**/?/E/E[/XNN<^<LMN%)"||--2B2K2KS^2^NN??OPQ(,%||005E5Q5Q Z
 6
 NNB>BRST+/( 	
#!!%		

 	
  !'r3   r   c                      yzCheck if this service can generate processing metrics.

        Returns:
            True, as Sarvam service supports metrics generation.
        Tr2   r  s    r4   can_generate_metricsz)SarvamHttpTTSService.can_generate_metrics'       r3   r   c                     t        |      S zConvert a Language enum to Sarvam AI language format.

        Args:
            language: The language to convert.

        Returns:
            The Sarvam AI-specific language code, or None if not supported.
        r   r  r   s     r4   r   z1SarvamHttpTTSService.language_to_service_language/       +844r3   framec                 @   K   t         |   |       d{    y7 wz~Start the Sarvam TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)r  startr  r"  r  s     r4   r%  zSarvamHttpTTSService.start:  s      gmE"""s   text
context_idc                T  K   t        j                  |  d| d       	 || j                  j                  | j                  j                  | j
                  | j                  j                  | j                  j                  | j                  j                  | j                  j                  ndd}| j                  j                  r1| j                  j                  | j                  j                  nd|d<   | j                  j                  r1| j                  j                  | j                  j                  nd|d<   | j                  j                  r1| j                  j                  | j                  j                  nd	|d
<   | j                   ddt#               }| j$                   d}| j&                  j)                  |||      4 d{   }|j*                  dk7  rT|j-                          d{   }t/        d|        	 ddd      d{    | j1                          d{    y|j3                          d{   }ddd      d{    | j5                  |       d{    dvs|d   s)t/        d       	 | j1                          d{    y|d   d   }	t7        j8                  |	      }
t;        |
      dkD  r+|
j=                  d      rt        j                  d       |
dd }
t?        |
| j
                  d|      }| | j1                          d{    y7 h7 D7 $7 7 7 # 1 d{  7  sw Y   xY w7 7 # t@        $ r}t/        d| |       Y d}~cd}~ww xY w7 W# | j1                          d{  7   w xY ww)a  Generate speech from text using Sarvam AI's API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        z: Generating TTS []Nr   )r'  r   speakerr   r   r   r   r   r   r   r   r   zapplication/json)api-subscription-keyzContent-Typez/text-to-speech)jsonheaders   zSarvam API error: erroraudioszNo audio data receivedr   ,   s   RIFFz+Stripping WAV header from Sarvam audio data   )audior   num_channelsr(  zError generating TTS: )r1  	exception)!r   debugr   r   r   r   r   r   r   r  r|   r   r}   r   r~   r   r  r   r  r  poststatusr'  r   stop_ttfb_metricsr-  start_tts_usage_metricsbase64	b64decodelen
startswithr   	Exception)r  r'  r(  payloadr.  urlresponse
error_textresponse_database64_audio
audio_datar"  es                r4   run_ttszSarvamHttpTTSService.run_ttsB  sK     	v/vQ78D	+ (,(?(?>>//#//(,(K(K--/3~~/B/B/N++TWG ||**;?>>;O;O;[4>>#7#7ad ||--/3~~/F/F/RDNN++X[ 
# ||0026..2L2L2XDNN..^a &
 )- 2 -G ^^$O4C}}))#GW)M 6 6QY??c)'/}}!6J$-?
|+LMM	6 6H ((***= '/mmo 56 6 ..t444 },M(4K '?@@. ((***) )215L)),7J :#
(=(=g(FJK'_
$  ,,%	E K
 ((***I6!66H += !66 6 6 6 58 +  	N%;A3#?1MMM	N +$((***s&  N(F/M L6M #M5L96MM L<M N(0L?1N(6M	M
MM MM 2M3M N(#M$N()A4M N(0N1N(6M 9M<M ?N(MM MMMM N(	N(N ;N
  NN
 N(
N%N!N%%N()r+   r,   r-   r.   r   Settingsr   r   r   r   aiohttpClientSessionr   r   r  r   r  r!   r   r   r%  r#   r   r   rJ  __classcell__r  s   @r4   r   r   .  s   1f %H$$2
i 2
r #'#/%)(,48L( L( !..	L(
 3-L( }L( L( c]L( %L( 01L(\d 	5X 	5(3- 	5# # P+# P+3 P+>%QU+;V P+ P+r3   r   c                       e Zd ZU dZeZeed<    G d de      Zdddddddddde	d	e
e	   d
e
e	   de	de
e   de
e   de
e   de
e   de
e   f fdZdefdZdede
e	   fdZdef fdZdef fdZdef fdZd-de
e	   fdZej4                  fdedef fdZdedef fdZdedee	e f   f fd Z! fd!Z" fd"Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)d) Z*d*e	fd+Z+e,d*e	de	de-edf   fd,       Z. xZ/S ).SarvamTTSServicea>  WebSocket-based text-to-speech service using Sarvam AI.

    Provides streaming TTS with real-time audio generation for multiple Indian languages.
    Uses WebSocket for low-latency streaming audio synthesis.

    **Model Differences:**

    - **bulbul:v2** (default):
        - Supports: pitch (-0.75 to 0.75), loudness (0.3 to 3.0), pace (0.3 to 3.0)
        - Default sample rate: 22050 Hz
        - Speakers: anushka, abhilash, manisha, vidya, arya, karun, hitesh

    - **bulbul:v3-beta** / **bulbul:v3**:
        - Does NOT support: pitch, loudness (will be ignored)
        - Supports: pace (0.5 to 2.0), temperature (0.01 to 1.0)
        - Default sample rate: 24000 Hz
        - Preprocessing is always enabled
        - Speakers: aditya, ritu, priya, neha, rahul, pooja, rohan, simran, kavya,
          amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir,
          aayan, shubh, ashutosh, advait, amelia, sophia

    **WebSocket Protocol:**
    The service uses a WebSocket connection for real-time streaming. Messages include:
    - config: Initial configuration with voice settings
    - text: Text chunks for synthesis
    - flush: Signal to process remaining buffered text
    - ping: Keepalive signal

    Example::

        # Using bulbul:v2 (default)
        tts = SarvamTTSService(
            api_key="your-api-key",
            voice_id="anushka",
            model="bulbul:v2",
            params=SarvamTTSService.InputParams(
                language=Language.HI,
                pitch=0.1,
                pace=1.2,
                loudness=1.5
            )
        )

        # Using bulbul:v3-beta with temperature control
        tts_v3 = SarvamTTSService(
            api_key="your-api-key",
            voice_id="aditya",  # Use v3 speaker
            model="bulbul:v3-beta",
            params=SarvamTTSService.InputParams(
                language=Language.HI,
                pace=1.2,  # Range: 0.5-2.0 for v3
                temperature=0.8
            )
        )

    See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for API details.
    r   c                      e Zd ZU dZ edddd      Zee   ed<    edd	d
d      Z	ee   ed<    edd	d
d      Z
ee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<   ej(                  Zee   ed<    ed d!dd"      Zee   ed#<   y$)%SarvamTTSService.InputParamsa]  Configuration parameters for Sarvam TTS WebSocket service.

        .. deprecated:: 0.0.105
            Use ``SarvamTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            pace: Speech pace multiplier. Defaults to 1.0.
                - bulbul:v2: Range 0.3 to 3.0
                - bulbul:v3-beta: Range 0.5 to 2.0
            loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0.
                **Note:** Only supported for bulbul:v2. Ignored for v3 models.
            enable_preprocessing: Enable text preprocessing. Defaults to False.
                **Note:** Always enabled for bulbul:v3-beta.
            min_buffer_size: Minimum characters to buffer before generating audio.
                Lower values reduce latency but may affect quality. Defaults to 50.
            max_chunk_length: Maximum characters processed in a single chunk.
                Controls memory usage and processing efficiency. Defaults to 150.
            output_audio_codec: Audio codec format. Options: linear16, mulaw, alaw,
                opus, flac, aac, wav, mp3. Defaults to "linear16".
            output_audio_bitrate: Audio bitrate (32k, 64k, 96k, 128k, 192k).
                Defaults to "128k".
            language: Target language for synthesis. Supports Indian languages.
            temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0).
                Lower = more deterministic, higher = more random. Defaults to 0.6.
                **Note:** Only supported for bulbul:v3-beta. Ignored for v2.

        **Speakers by Model:**

        bulbul:v2:
            - Female: anushka (default), manisha, vidya, arya
            - Male: abhilash, karun, hitesh

        bulbul:v3-beta:
            - aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran,
              kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit,
              roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
        r   r   r   r   r   r   r   r   r   r   r   r   r   Fz8Enable text preprocessing. Always enabled for v3 models.r   r   2   z3Minimum characters to buffer before TTS processing.r      z&Maximum length for sentence splitting.r   linear16z>Audio codec: linear16, mulaw, alaw, opus, flac, aac, wav, mp3.output_audio_codec128kz)Audio bitrate: 32k, 64k, 96k, 128k, 192k.output_audio_bitrater   r   r   r   r   N)r+   r,   r-   r.   r   r   r   r   r   r   r   r   r   r   r   r   rW  r   rY  r!   r   r   r   r2   r3   r4   r   rS    s<   &	P "'E	"
x 	
 !&@	!
huo 	
 %*@	%
(5/ 	
 05R0
htn 	
 */M*
# 	
 +0@+
(3- 	
 -2X-
HSM 	
 /4C/
hsm 	
 (0{{(8$2',U	(
Xe_ 	
r3   r   Nz%wss://api.sarvam.ai/text-to-speech/ws)r   r   rC  aggregate_sentencestext_aggregation_moder   r   r   r   r   r   rC  rZ  r[  r   r   r   c       	         ^   t        dddddddddd	
      }|t        d
t         d
       ||_        |t        dt         d       ||_        d}d}|>t        dt                |	s+|j                  $| j                  |j                        xs d|_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  }|j                  |j                  }|j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_        |	|j                  |	       t!        |j                  t"              r | j                  |j                        |_        |j                  }|t$        vr>dj'                  t)        t$        j+                                     }t-        d| d| d      t$        |   | _        || j.                  j0                  }|/|	|	j                  t2        u r| j.                  j4                  |_        |j                  }| j.                  j6                  \  }}|D||k  s||kD  r:t9        j:                  d| d| d| d       t=        |t?        ||            |_        | j.                  j@                  rd|_        | j.                  jB                  s-|j                  dvrt9        j:                  d|        d|_        | j.                  jD                  s-|j                  dvrt9        j:                  d|        d|_        | j.                  jF                  s-|j                  dvrt9        j:                  d|        d|_        tI        |   d!||dddd||d|
 tM        |      | _'        || _(        || _)        | d | | _*        || _+        d| _,        d| _-        y)"ac  Initialize the Sarvam TTS service with voice and transport configuration.

        Args:
            api_key: Sarvam API key for authenticating TTS requests.
            model: TTS model to use. Options:
                - "bulbul:v2" (default): Standard model with pitch/loudness support
                - "bulbul:v3-beta": Advanced model with temperature control

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSSettings(model=...)`` instead.

            voice_id: Speaker voice ID. If None, uses model-appropriate default.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSSettings(voice=...)`` instead.

            url: WebSocket URL for the TTS backend (default production URL).
            aggregate_sentences: Deprecated. Use text_aggregation_mode instead.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            text_aggregation_mode: How to aggregate text before synthesis.
            sample_rate: Output audio sample rate in Hz (8000, 16000, 22050, 24000).
                If None, uses model-specific default.
            params: Optional input parameters to override defaults.

                .. deprecated:: 0.0.105
                    Use ``settings=SarvamTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Arguments forwarded to InterruptibleTTSService.

        See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream
        r(   r7   r   FrT  rU  r   N)
r   r   r   r   r   r   r   r   r   r   r   r   r   rV  rX  r   r   r   r   r   r   r   r   r   Tr   r   r   r   r   r   )rZ  r[  push_text_framespause_frame_processingr   r   r   r   z?model=r2   ).r   r   r   r   r   r   r   r   r   rW  rY  r   r   r   r   r  r  r!   r   r  r  r  r  r  r   r   r   r   r   r  r	  r
  r   r|   r}   r~   r  r  r   _speech_sample_rate_output_audio_codec_output_audio_bitrate_websocket_urlr  _receive_task_keepalive_task)r  r   r   r   rC  rZ  r[  r   r   r   r  r  rW  rY  r  r  r   r  r  r  s                      r4   r  zSarvamTTSService.__init__+  s;   f -!& 
 "7,=wG%*"":/@'J%-" (% "8->???.99&//JUg %- ..:<B<W<W$9))57=7M7M$4**68>8O8O$5,,8)/)B)B&..:+1+F+F(;;*,2KK$)<<+-3\\$*??.06$-%%1393E3E$0 ))(3 &//:(,(I(IJZJcJc(d% *//!22ii'8'='='? @AG2>2BBUV]U^^_`aa(8 ,,::K !1X^^y5P%)\\%A%A"  $$!\\44(D8ONNU4&(A(1XJVbcd$'#h2E$F! <<44481 ||**/?/E/E[/XNN<^<LMN%)"||--2B2K2KS^2^NN??OPQ(,%||005E5Q5Q Z
 6
 NNB>BRST+/( 	 
	
 3"7!#'!!#%
	
 
	
 $'{#3 #5 %9" "%W^,<=!#r3   r   c                      yr  r2   r  s    r4   r  z%SarvamTTSService.can_generate_metrics  r  r3   r   c                     t        |      S r  r  r   s     r4   r   z-SarvamTTSService.language_to_service_language  r!  r3   r"  c                    K   t         |   |       d{    t        | j                        | _        | j                          d{    y7 77 wr$  )r  r%  r   r   r_  _connectr&  s     r4   r%  zSarvamTTSService.start  sJ      gmE""" $'t'7'7#8 mmo	 	# 	s!   AA1AA	AAc                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zVStop the Sarvam TTS service.

        Args:
            frame: The end frame.
        N)r  stop_disconnectr&  s     r4   rj  zSarvamTTSService.stop  s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)z[Cancel the Sarvam TTS service.

        Args:
            frame: The cancel frame.
        N)r  cancelrk  r&  s     r4   rn  zSarvamTTSService.cancel  s6      gnU###    	$ rl  r(  c                 
  K   	 | j                   r;ddi}| j                   j                  t        j                  |             d{    yy7 # t        $ r)}| j                  d| |       d{  7   Y d}~yd}~ww xY ww)z;Flush any pending audio synthesis by sending flush command.typeflushNzError sending flush to Sarvam: 	error_msgr7  )
_websocketsendr-  dumpsrA  
push_error)r  r(  msgrI  s       r4   flush_audiozSarvamTTSService.flush_audio  su     	`w'oo**4::c?;;; ; 	`//.MaS,Q]^/___	`sM   BAA AA 
BA 	B A;0A31A;6B;B  B	directionc                 B   K   t         |   ||       d{    y7 w)zPush a frame downstream with special handling for stop conditions.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        N)r  
push_framer  r"  rz  r  s      r4   r|  zSarvamTTSService.push_frame  s      g 	222s   c                    K   t         |   ||       d{    t        |t        t        f      r| j                          d{    yy7 47 w)zCProcess a frame and flush audio if it's the end of a full response.N)r  process_framer  r   r   ry  r}  s      r4   r  zSarvamTTSService.process_frame  sQ     g#E9555 e5x@A""$$$ B 	6 %s!   AA-AAAAdeltac                 z   K   t         |   |       d{   }|r| j                          d{    |S 7  7 w)z:Apply a settings delta and resend config if voice changed.N)r  _update_settings_send_config)r  r  changedr  s      r4   r  z!SarvamTTSService._update_settings'  s>     077##%%% 8 &s   ;7;9;;c                 |  K   t         |           d{    | j                          d{    | j                  r;| j                  s/| j                  | j                  | j                              | _        | j                  r2| j                  s%| j                  | j                               | _        yyy7 7 w)z7Connect to Sarvam WebSocket and start background tasks.N)
r  rh  _connect_websocketrt  rc  create_task_receive_task_handler_report_errorrd  _keepalive_task_handlerr  r  s    r4   rh  zSarvamTTSService._connect0  s     g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD??4#7#7#'#3#3,,.$D  $8? 	!'s    B<B8B<B:BB<:B<c                 R  K   t         |           d{    | j                  r*| j                  | j                         d{    d| _        | j                  r*| j                  | j                         d{    d| _        | j                          d{    y7 7 \7 (7 w)z4Disconnect from Sarvam WebSocket and clean up tasks.N)r  rk  rc  cancel_taskrd  _disconnect_websocketr  s    r4   rk  zSarvamTTSService._disconnect>  s     g!###""4#5#5666!%D""4#7#7888#'D ((*** 	$ 7 9 	+sE   B'B.B'B!5B':B#;B'B%B'!B'#B'%B'c                 4  K   	 | j                   r'| j                   j                  t        j                  u ryd| j                  it               }t        | j                  |       d{   | _         t        j                  d       | j                          d{    | j                  d       d{    y7 P7  7 	# t        $ rL}| j                  d| |       d{  7   d| _         | j                  d|        d{  7   Y d}~yd}~ww xY ww)	z-Establish WebSocket connection to Sarvam API.Nr,  )additional_headersz!Connected to Sarvam TTS Websocketon_connectedz*Error connecting to Sarvam TTS Websocket: rr  on_connection_error)rt  stater%   OPENr  r   websocket_connectrb  r   r8  r  _call_event_handlerrA  rw  )r  ws_additional_headersrI  s      r4   r  z#SarvamTTSService._connect_websocketL  s    	J4??#8#8EJJ#F '%-%!
 %6###8% DO LL<=##%%%**>:::
 &: 	J//FqcJVW "    #DO**+@QCIII	Js   D2C  D2C  )B:*1C  B<C  4B>5C  9D:C  <C  >C   	D	D"C%#"DDDDDDc                   K   | j                   st        d      | j                  j                  | j                  j                  | j
                  | j                  j                  | j                  j                  | j                  j                  | j                  | j                  | j                  j                  | j                  j                  d
}| j                  j                  | j                  j                  |d<   | j                  j                  | j                  j                  |d<   | j                  j                  | j                  j                  |d<   t!        j"                  d|        d|d	}	 | j                   j%                  t'        j(                  |             d{    t!        j"                  d
       y7 # t        $ r%}| j+                  d| |       d{  7    d}~ww xY ww)z#Send initial configuration message.zWebSocket not connected)
r   r+  speech_sample_rater   r   r   rW  rY  r   r   Nr   r   r   zConfig being sent is configrp  datazConfiguration sent successfullyUnknown error occurred: rr  )rt  rA  r   r   r   r_  r   r   r   r`  ra  r   r   r   r   r   r   r8  ru  r-  rv  rw  )r  config_dataconfig_messagerI  s       r4   r  zSarvamTTSService._send_configf  s    566 %)NN$;$;~~++"&":":$(NN$G$G#~~== $ ? ?"&":":$($>$>NN''^^))
 >>+#'>>#7#7K >>"".&*nn&=&=K
#>>%%1)-)C)CK&,[M:;"*K@	//&&tzz.'ABBBLL:; C 	//.Fqc,JVW/XXX	sH   E9G;<1G
 -G.G
 G;G
 
	G8G3,G/-G33G88G;c                   K   	 | j                          d{    | j                  r7t        j                  d       | j                  j	                          d{    d| _        | j                  d       d{    y7 h7 '# t
        $ r)}| j                  d| |       d{  7   Y d}~Rd}~ww xY w7 ># d| _        | j                  d       d{  7   w xY ww)z.Close WebSocket connection and clean up state.NzDisconnecting from SarvamzError closing websocket: rr  on_disconnected)stop_all_metricsrt  r   r8  closerA  rw  r  )r  rI  s     r4   r  z&SarvamTTSService._disconnect_websocket  s     
	>'')))89oo++--- #DO**+<=== * . 	Z//.Gs,KWX/YYY	Z > #DO**+<===s   C!B B AB BB C!:B9;C! B B 	B6B1&B)'B1,B; 1B66B; 9C!;CCCC!c                 H    | j                   r| j                   S t        d      )NzWebsocket not connected)rt  rA  r  s    r4   _get_websocketzSarvamTTSService._get_websocket  s    ????"122r3   c                   K   | j                         2 3 d{   }t        |t              st        j                  |      }|j                  d      dk(  rt| j                          d{    t        j                  |d   d         }t        || j                  d| j                               }| j                  |       d{    |j                  d      dk(  s|d   d   }| j                  d	| 
       d{    d|j                         v sd|j                         v rt        j                   d       | j                  t#        d	|              d{    R7 N7 7 7 l7 6 yw)z3Receive and process messages from Sarvam WebSocket.Nrp  r5  r  r4  r(  r1  messagezTTS Error: )rs  ztoo longtimeoutz5Connection timeout detected, service may need restartr0  )r  r  r   r-  loadsgetr;  r=  r>  r   r   get_active_audio_context_idr|  rw  lowerr   r  r   )r  r  rx  r5  r"  rs  s         r4   _receive_messagesz"SarvamTTSService._receive_messages  sR    !002 	W 	W''3'jj)776?g-00222",,S[-ABE,t//t?_?_?aE //%000WWV_/ #FI 6I//k)4M/NNN "Y__%66)yGX:X'^_//*[;T*UVVV'	W
 3
 1 O W' 3sz   E2E0E%E0E2<E2)E(*AE2E*E2 E2?E, AE2E.E2%E0(E2*E2,E2.E20E2c                    K   d}	 t        j                  |       d{    | j                          d{    67 7 w)z;Handle keepalive messages to maintain WebSocket connection.   N)asynciosleep_send_keepalive)r  KEEPALIVE_SLEEPs     r4   r  z(SarvamTTSService._keepalive_task_handler  s>     --000&&((( 0(s   A <A >A A c                    K   | j                   rc| j                   j                  t        j                  k(  r;ddi}| j                   j	                  t        j                  |             d{    yyy7 w)z.Send keepalive message to maintain connection.rp  pingN)rt  r  r%   r  ru  r-  rv  )r  rx  s     r4   r  z SarvamTTSService._send_keepalive  sW     ??t44

B6"C//&&tzz#777  C?7s   A)A5+A3,A5r'  c                   K   | j                   re| j                   j                  t        j                  k(  r>dd|id}| j                   j	                  t        j                  |             d{    yt        j                  d       y7 w)z,Send text to Sarvam WebSocket for synthesis.r'  r  Nz%WebSocket not ready, cannot send text)	rt  r  r%   r  ru  r-  rv  r   r  )r  r'  rx  s      r4   
_send_textzSarvamTTSService._send_text  sb     ??t44

B!FD>:C//&&tzz#777NNBC 8s   A,B.B
/Bc                r  K   t        j                  d| d       	 | j                  r&| j                  j                  t        j
                  u r| j                          d{    	 | j                  |       d{    | j                  |       d{    d y7 =7 %7 # t        $ r]}t        d|        t        |       | j                          d{  7   | j                          d{  7   Y d}~yd}~ww xY w# t        $ r}t        d|        Y d}~yd}~ww xY ww)a  Generate speech audio frames from input text using Sarvam TTS.

        Sends text over WebSocket for synthesis and yields corresponding audio or status frames.

        Args:
            text: The text input to synthesize.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame objects including TTSStartedFrame, TTSAudioRawFrame(s, context_id=context_id), or TTSStoppedFrame.
        zGenerating TTS: [r*  Nr  r0  r  )r   r8  rt  r  r%   CLOSEDrh  r  r<  rA  r   r   rk  )r  r'  r(  rI  s       r4   rJ  zSarvamTTSService.run_tts  s
     	(a01	C??doo&;&;u||&Kmmo%%ood+++224888 J & ,8  )A!'EFF%<<&&(((mmo%%  	C%=aS#ABBB	Cs   D7AD "B #D (B& <B"=B& B$B& D D7 D "B& $B& &	D/4D#C&$D<C?=DD D7DD 	D4D/*D7/D44D7r   )0r+   r,   r-   r.   r   rK  r   r   r   r   r   r   r   r   r  r  r!   r   r   r%  r   rj  r   rn  ry  r   
DOWNSTREAMr   r|  r  r   dictr   r  rh  rk  r  r  r  r  r  r  r  r  r#   r   rJ  rN  rO  s   @r4   rQ  rQ    s   8t !H  U
i U
v  $"&:.2?C%)(,04o$ o$ }	o$
 3-o$ o$ &d^o$  ((;<o$ c]o$ %o$ ,-o$bd 	5X 	5(3- 	5
 
! !!+ !`HSM ` JXIbIb 3e 3 3% %> %K DcN +J4B>3
W.)8DS D C# C3 C>%QU+;V C Cr3   rQ  )Ir.   r  r=  r-  dataclassesr   r   enumr   typingr   r   r   r	   r
   r   r   rL  logurur   pydanticr   r   pipecat.frames.framesr   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.sarvam._sdkr   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer   r   r    pipecat.transcriptions.languager!   r"   (pipecat.utils.tracing.service_decoratorsr#   websockets.asyncio.clientr$   r  websockets.protocolr%   ModuleNotFoundErrorrI  r1  rA  r   r'   r6   rF   r{   tupler   r   r   r   r   r   r   rQ  r2   r3   r4   <module>r     s(  B    (  M M M   %	 	 	 > 4 _ _ a a F ?,F)S$ (d  d B $  2  "!!%*;(:;;	 %!!%);(:;;	  !!%);(:;;	+0 4^+, D9# 9$s) 9"I( "Ix} "IJ UK U U2 X- X X$e+: e+PN	C. N	C_  ,FLL;qc"#FLLXY
&qc*
++,s   E; ;F7 2F22F7