
    qiU                        d Z ddlZddlZddlZddlmZ dej                  d<   ddlmZm	Z	 ddl
mZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZmZmZmZmZ ddlm Z  ddl!m"Z"m#Z# 	 ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- de"dee2   fdZ3de"dee2   fdZ4e G d de             Z5e G d de             Z6e6Z7e G d de             Z8 G d  d!e       Z9 G d" d#e       Z: G d$ d%e:      Z; G d& d'e:      Z<y# e.$ r7Z/ ej`                  de/         ej`                  d        e1de/       dZ/[/ww xY w)(al  Google Cloud Text-to-Speech service implementations.

This module provides integration with Google Cloud Text-to-Speech API,
offering both HTTP-based synthesis with SSML support and streaming synthesis
for real-time applications.

It also includes GeminiTTSService which uses Gemini's TTS-specific models
for natural voice control and multi-speaker conversations.
    N)
traced_ttsfalseGRPC_ENABLE_FORK_SUPPORT)	dataclassfield)AnyAsyncGeneratorListLiteralOptional)logger)	BaseModel)
ErrorFrameFrame
StartFrameTTSAudioRawFrame)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_paramis_given)
TTSService)Languageresolve_language)ClientOptions)default)GoogleAuthError)texttospeech_v1)service_accountzException: zIn order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_APPLICATION_CREDENTIALS` environment variable.zMissing module: languagereturnc                    i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                   d	t         j"                  d
i t         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  dt         jB                  dt         jD                  di t         jF                  dt         jH                  dt         jJ                  dt         jL                  dt         jN                  dt         jP                  dt         jR                  dt         jT                  dt         jV                  dt         jX                  dt         jZ                  dt         j\                  dt         j^                  dt         j`                  dt         jb                  dt         jd                  dt         jf                  di t         jh                  dt         jj                  dt         jl                  dt         jn                  dt         jp                  dt         jr                  d t         jt                  d t         jv                  d!t         jx                  d!t         jz                  d!t         j|                  d"t         j~                  d"t         j                  d#t         j                  d#t         j                  d$t         j                  d$t         j                  d%i t         j                  d%t         j                  d&t         j                  d&t         j                  d't         j                  d't         j                  d(t         j                  d(t         j                  d)t         j                  d)t         j                  d*t         j                  d+t         j                  d+t         j                  d,t         j                  d,t         j                  d-t         j                  d-t         j                  d.t         j                  d.t         j                  d/t         j                  d/t         j                  d0t         j                  d0t         j                  d1t         j                  d1t         j                  d2t         j                  d2t         j                  d3t         j                  d3i}t        | |d45      S )6a#  Convert a Language enum to Google TTS language code.

    Source:
    https://docs.cloud.google.com/text-to-speech/docs/chirp3-hd

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Google TTS language code, or None if not supported.
    zar-XAzbn-INbg-BGhr-HRcs-CZda-DKnl-NLznl-BEen-USen-AUen-GBen-INet-EEfi-FIfr-FRfr-CAde-DEel-GRgu-INhe-ILhi-INhu-HUid-IDit-ITja-JPkn-INko-KRlv-LVlt-LTml-INcmn-CNmr-INnb-NOpl-PLpt-BRro-ROru-RUsr-RSsk-SKsl-SIes-ESzes-USsw-KEsv-SEta-INte-INth-THtr-TRuk-UAzur-INvi-VNFuse_base_code)br   ARBNBN_INBGBG_BGHRHR_HRCSCS_CZDADA_DKNLNL_BENL_NLENEN_USEN_AUEN_GBEN_INETET_EEFIFI_FIFRFR_CAFR_FRDEDE_DEELEL_GRGUGU_INHEHE_ILHIHI_INHUHU_HUIDID_IDITIT_ITJAJA_JPKNKN_INKOKO_KRLVLV_LVLTLT_LTMLML_INZHZH_CNMRMR_INNONBNB_NOPLPL_PLPTPT_BRRORO_RORURU_RUSRSR_RSSKSK_SKSLSL_SIESES_ESES_USSWSW_KESVSV_SETATA_INTETE_INTHTH_THTRTR_TRUKUK_UAURUR_INVIVI_VNr   r    LANGUAGE_MAPs     M/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/google/tts.pylanguage_to_google_tts_languager   ?   s   NWN 	W	N
 	N 	WN 	N 	WN 	N 	WN 	N  	W!N" 	#N& 	W'N( 	)N* 	+N. 	W/N0 	1N2 	3N4 	5N6 	7N: 	W;N< 	=N@ 	WANB 	CNF 	WGNH 	INJ 	KNN 	WONP 	QNT 	WUNV 	WNZ 	W[N\ 	]N` 	WaNb 	cNf 	WgNh 	iNl 	WmNn 	oNr 	WsNt 	uNx 	WyNz 	{N~ 	WN@ 	AND 	WENF 	GNJ 	WKNL 	MNP 	WQNR 	SNV 	WWNX 	YN\ 	W]N^ 	_Nb 	XcNd 	eNh 	WiNj 	kNn 	WoNp 	WqNr 	sNv 	WwNx 	yN| 	W}N~ 	NB 	WCND 	ENH 	WINJ 	KNN 	WONP 	QNT 	WUNV 	WNZ 	W[N\ 	]N` 	WaNb 	cNd 	eNh 	WiNj 	kNn 	WoNp 	qNt 	WuNv 	wNz 	W{N| 	WWWWW[NL` Hl%HH    c                    i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   d	t         j"                  d	i t         j$                  d
t         j&                  d
t         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  dt         jB                  dt         jD                  di t         jF                  dt         jH                  dt         jJ                  dt         jL                  dt         jN                  dt         jP                  dt         jR                  dt         jT                  dt         jV                  dt         jX                  dt         jZ                  dt         j\                  dt         j^                  dt         j`                  dt         jb                  dt         jd                  dt         jf                  di t         jh                  dt         jj                  dt         jl                  dt         jn                  dt         jp                  dt         jr                  d t         jt                  d t         jv                  d!t         jx                  d!t         jz                  d"t         j|                  d"t         j~                  d#t         j                  d#t         j                  d$t         j                  d$t         j                  d%t         j                  d%i t         j                  d&t         j                  d&t         j                  d't         j                  d't         j                  d(t         j                  d(t         j                  d)t         j                  d)t         j                  d*t         j                  d*t         j                  d+t         j                  d+t         j                  d,t         j                  d,t         j                  d-t         j                  d-t         j                  d.i t         j                  d.t         j                  d/t         j                  d/t         j                  d0t         j                  d0t         j                  d1t         j                  d1t         j                  d2t         j                  d2t         j                  d3t         j                  d3t         j                  d4t         j                  d4t         j                  d5t         j                  d5t         j                  d6t         j                  d6i t         j                  d7t         j                  d7t         j                  d8t         j                  d8t         j                  d9t         j                  d9t         j                  d:t         j                  d:t         j                  d;t         j                  d;t         j                  d<t         j                  d<t         j                  d<t         j                  d=t         j                  d=t         j                  d>t         j                  d>i t         j                  d?t         j                  d?t         j                  d@t         j                  d@t         j                  dAt         j                  dAt         j                  dBt         j                  dBt         j                   dCt         j                  dDt         j                  dDt         j                  dEt         j                  dEt         j
                  dFt         j                  dFt         j                  dGt         j                  dGi t         j                  dHt         j                  dHt         j                  dIt         j                  dIt         j                  dJt         j                  dJt         j                  dKt         j                   dKt         j"                  dLt         j$                  dLt         j&                  dMt         j(                  dNt         j*                  dOt         j,                  dOt         j.                  dPt         j0                  dPt         j2                  dQt         j4                  dQt         j6                  dRt         j8                  dRt         j:                  dSt         j<                  dSt         j>                  dTt         j@                  dTt         jB                  dUt         jD                  dUt         jF                  dVt         jH                  dVt         jJ                  dWt         jL                  dWi}tO        | |dXY      S )Za8  Convert a Language enum to Gemini TTS language code.

    Source:
    https://docs.cloud.google.com/text-to-speech/docs/gemini-tts#available_languages

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Gemini TTS language code, or None if not supported.
    zaf-ZAzsq-ALzam-ETzar-EGzar-001zhy-AMzaz-AZzeu-ESzbe-BYzbn-BDr#   zmy-MMzca-ESzceb-PHr>   zcmn-TWr$   r%   r&   r'   r(   r)   r*   r+   r,   zfil-PHr-   r.   r/   zgl-ESzka-GEr0   r1   r2   zht-HTr3   r4   r5   zis-ISr6   r7   r8   zjv-JVr9   zkok-INr:   zlo-LAzla-VAr;   r<   zlb-LUzmk-MKzmai-INzmg-MGzms-MYr=   r?   zmn-MNzne-NPr@   znn-NOzor-INzps-AFzfa-IRrA   rB   zpt-PTzpa-INrC   rD   rE   zsd-INzsi-LKrF   rG   rH   zes-419zes-MXrI   rJ   rK   rL   rM   rN   rO   zur-PKrP   FrQ   )r   AFAF_ZASQSQ_ALAMAM_ETrS   AR_EGAR_001HYHY_AMAZAZ_AZEUEU_ESBEBE_BYrT   BN_BDrV   rW   MYMY_MMCACA_ESCEBCEB_PHr   r   ZH_TWrX   rY   rZ   r[   r\   r]   r^   r`   ra   rb   rc   rd   re   rf   rg   FILFIL_PHrh   ri   rj   rl   rk   GLGL_ESKAKA_GErm   rn   ro   rp   rq   rr   HTHT_HTrs   rt   ru   rv   rw   rx   ISIS_ISry   rz   r{   r|   r}   r~   JVJV_JVr   r   KOKKOK_INr   r   LOLO_LALALA_VAr   r   r   r   LBLB_LUMKMK_MKMAIMAI_INMGMG_MGMSMS_MYr   r   r   r   MNMN_MNNENE_NPr   r   r   NNNN_NOOROR_INPSPS_AFFAFA_IRr   r   r   r   PT_PTPAPA_INr   r   r   r   r   r   SDSD_INSISI_LKr   r   r   r   r   r   ES_419ES_MXr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   UR_PKr   r   r   r   s     r   language_to_gemini_tts_languager     s   tWt 	t
 	Wt 	t 	Wt 	t 	Wt 	t 	t 	Wt  	!t$ 	W%t& 	't* 	W+t, 	-t0 	W1t2 	3t6 	W7t8 	9t< 	W=t> 	?tB 	WCtD 	EtH 	WItJ 	KtN 	hOtP 	QtT 	XUtV 	WtX 	Yt\ 	W]t^ 	_tb 	Wctd 	eth 	Witj 	ktn 	Wotp 	qtt 	Wutv 	wtx 	ytz 	{t| 	}t@ 	WAtB 	CtF 	hGtH 	ItL 	WMtN 	OtR 	WStT 	UtV 	WtZ 	W[t\ 	]t` 	Watb 	ctf 	Wgth 	itl 	Wmtn 	otr 	Wstt 	utx 	Wytz 	{t~ 	Wt@ 	AtD 	WEtF 	GtJ 	WKtL 	MtP 	WQtR 	StV 	WWtX 	Yt\ 	W]t^ 	_tb 	Wctd 	eth 	Witj 	ktn 	Wotp 	qtt 	hutv 	wtz 	W{t| 	}t@ 	WAtB 	CtF 	WGtH 	ItL 	WMtN 	OtR 	WStT 	UtX 	WYtZ 	[t^ 	W_t` 	atd 	hetf 	gtj 	Wktl 	mtp 	Wqtr 	stv 	Wwtx 	yt| 	W}t~ 	tB 	WCtD 	EtH 	WItJ 	KtN 	WOtP 	WQtR 	StT 	WUtV 	WtZ 	W[t\ 	]t` 	Watb 	ctf 	Wgth 	itl 	Wmtn 	otr 	Wstt 	utv 	wtz 	W{t| 	}t@ 	WAtB 	CtF 	WGtH 	ItL 	WMtN 	OtR 	WStT 	UtX 	WYtZ 	[t^ 	W_t` 	atd 	Wetf 	gtj 	Wktl 	mtn 	otp 	qtt 	Wutv 	wtz 	W{t| 	}t@ 	WAtB 	WWWWWWgtLl Hl%HHr   c                   J   e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	edz  ez  ed<    ed       Z
edz  ez  ed	<    ed
       Zedz  ez  ed<    ed       Zed   dz  ez  ed<    ed       Zed   dz  ez  ed<    ed       Zed   dz  ez  ed<   y)GoogleHttpTTSSettingsa1  Settings for GoogleHttpTTSService.

    Parameters:
        pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
        rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for
            SSML prosody tags (non-Chirp voices).
        speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices).
            Range [0.25, 2.0].
        volume: Volume adjustment (e.g., "loud", "soft", "+6dB").
        emphasis: Emphasis level for the text.
        gender: Voice gender preference.
        google_style: Google-specific voice style.
    c                      t         S Nr    r   r   <lambda>zGoogleHttpTTSSettings.<lambda>  s    ) r   default_factoryNpitchc                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>  s     r   ratec                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>      I r   speaking_ratec                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>      9 r   volumec                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>      	 r   strongmoderatereducednoneemphasisc                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>  r  r   malefemaleneutralgenderc                      t         S r  r  r	  r   r   r
  zGoogleHttpTTSSettings.<lambda>  s    i r   
apologeticcalm
empatheticfirmlivelygoogle_style)__name__
__module____qualname____doc__r   r  strr   __annotations__r  r  floatr  r  r   r#  r+  r	  r   r   r  r    s     %*:K$LE3:	!L#(9J#KD#*y
 K.3DU.VM54<)+V%*;L%MFC$J"MTY)UHg=>E	Q  GL)GFG/047)C 
 	/0 DELyX1r   r  c                   <    e Zd ZU dZ ed       Zedz  ez  ed<   y)GoogleTTSSettingszxSettings for GoogleTTSService.

    Parameters:
        speaking_rate: The speaking rate, in the range [0.25, 2.0].
    c                      t         S r  r  r	  r   r   r
  zGoogleTTSSettings.<lambda>
  r  r   r  Nr  )	r,  r-  r.  r/  r   r  r2  r   r1  r	  r   r   r4  r4    s"     /4DU.VM54<)+Vr   r4  c                       e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	e
ez  ed<    ed       Zeeeef      dz  ez  ed	<   y)
GeminiTTSSettingsa  Settings for GeminiTTSService.

    Parameters:
        prompt: Optional style instructions for how to synthesize the content.
        multi_speaker: Whether to enable multi-speaker support.
        speaker_configs: List of speaker configurations for multi-speaker mode.
    c                      t         S r  r  r	  r   r   r
  zGeminiTTSSettings.<lambda>  r  r   r  Npromptc                      t         S r  r  r	  r   r   r
  zGeminiTTSSettings.<lambda>  s    I r   multi_speakerc                      t         S r  r  r	  r   r   r
  zGeminiTTSSettings.<lambda>  r  r   speaker_configs)r,  r-  r.  r/  r   r9  r0  r   r1  r;  boolr=  listdictr   r	  r   r   r7  r7    sc     &+;L%MFC$J"M&+<M&NM4)#N?D)@OT$sCx.)D09< r   r7  c                   N    e Zd ZU dZeZeed<    G d de      Zddddddddde	e
   de	e
   d	e	e
   d
e	e
   de	e   de	e   de	e   f fdZde	e
   de	e
   dej                  fdZdefdZdede	e
   fdZdedee
ef   f fdZde
de
fdZede
de
deedf   fd       Z xZS )GoogleHttpTTSServicea;  Google Cloud Text-to-Speech HTTP service with SSML support.

    Provides text-to-speech synthesis using Google Cloud's HTTP API with
    comprehensive SSML support for voice customization, prosody control,
    and styling options. Ideal for applications requiring fine-grained
    control over speech output.

    Note:
        Requires Google Cloud credentials via service account JSON, credentials file,
        or default application credentials (GOOGLE_APPLICATION_CREDENTIALS).
        Chirp and Journey voices don't support SSML and will use plain text input.
    	_settingsc                       e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee
   ed<   dZee   ed<   dZeed      ed<   ej                  Zee   ed	<   dZeed
      ed<   dZeed      ed<   y) GoogleHttpTTSService.InputParamsa  Input parameters for Google HTTP TTS voice customization.

        .. deprecated:: 0.0.105
            Use ``GoogleHttpTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            pitch: Voice pitch adjustment (e.g., "+2st", "-50%").
            rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for SSML prosody tags (non-Chirp voices).
            speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices). Range [0.25, 2.0].
            volume: Volume adjustment (e.g., "loud", "soft", "+6dB").
            emphasis: Emphasis level for the text.
            language: Language for synthesis. Defaults to English.
            gender: Voice gender preference.
            google_style: Google-specific voice style.
        Nr  r  r  r  r  r  r    r  r#  r%  r+  )r,  r-  r.  r/  r  r   r0  r1  r  r  r2  r  r  r   r   ra   r    r#  r+  r	  r   r   InputParamsrE  4  s    	   $x}#"hsm")-x- $$OS(7#JKLS'/{{(8$2AE!<=>E`dhw'[\]dr   rF  N)credentialscredentials_pathlocationvoice_idsample_rateparamssettingsrG  rH  rI  rJ  rK  rL  rM  c                   t        dddddddddd
      }	|t        dt         d       ||	_        |	t        dt                |s|j                  |j                  |	_        |j                  |j                  |	_        |j
                  |j
                  |	_        |j                  |j                  |	_        |j                  |j                  |	_        |j                   | j                  |j                        |	_        |j                  |j                  |	_
        |j                  |j                  |	_        ||	j                  |       t        
| 8  d
|dd|	d	| || _        | j!                  ||      | _        y)a  Initializes the Google HTTP TTS service.

        Args:
            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Google TTS voice identifier (e.g., "en-US-Standard-A").

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleHttpTTSSettings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses default.
            params: Voice customization parameters including pitch, rate, volume, etc.

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleHttpTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        Nen-US-Chirp3-HD-Charonr(   )
modelvoicer    r  r  r  r  r  r#  r+  rJ  rQ  rL  TrK  push_start_framepush_stop_framesrM  r	  )r  r   rQ  r  r  r  r  r  r    language_to_service_languager#  r+  apply_updatesuper__init__	_location_create_client_client)selfrG  rH  rI  rJ  rK  rL  rM  kwargsdefault_settings	__class__s             r   rX  zGoogleHttpTTSService.__init__N  s   D 1*
 ":/DgN%-" "8-BC<<+-3\\$*;;*,2KK$)''35;5I5I$2==,.4mm$+??.06$-??.040Q0QRXRaRa0b$-==,.4mm$+&&24:4G4G$1 ))(3 	
#!!%		

 	
 "@D@S@S)A
r   r!   c                    d}|r5t        j                  |      }t        j                  j	                  |      }n3|r t        j                  j                  |      }n	 t        dg      \  }}|st        d      d}| j                  rt        | j                   d      }t        j                  ||      S # t        $ r Y Vw xY wao  Create authenticated Google Text-to-Speech client.

        Args:
            credentials: JSON string with service account credentials.
            credentials_path: Path to service account JSON file.

        Returns:
            Authenticated TextToSpeechAsyncClient instance.

        Raises:
            ValueError: If no valid credentials are provided.
        Nz.https://www.googleapis.com/auth/cloud-platform)scopeszNo valid credentials provided.z-texttospeech.googleapis.com)api_endpoint)rG  client_optionsjsonloadsr   Credentialsfrom_service_account_infofrom_service_account_filer   r   
ValueErrorrY  r   r   TextToSpeechAsyncClientr\  rG  rH  credsjson_account_info
project_idrd  s          r   rZ  z#GoogleHttpTTSService._create_client       8< $

; 7#//IIJ[\E#//IIJZ[E$+LM%!z =>>>>* $//KLN 66n
 	
 #    B8 8	CCc                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Google HTTP TTS service supports metrics generation.
        Tr	  r\  s    r   can_generate_metricsz)GoogleHttpTTSService.can_generate_metrics       r   r    c                     t        |      S zConvert a Language enum to Google TTS language format.

        Args:
            language: The language to convert.

        Returns:
            The Google TTS-specific language code, or None if not supported.
        r   r\  r    s     r   rU  z1GoogleHttpTTSService.language_to_service_language       /x88r   deltac                   K   t        |t              r[t        |j                        rFt	        |j                        }d|cxk  rdk  s&n t        j                  d| d       t        |_        t        | %  |       d{   S 7 wzOverride to handle speaking_rate validation.

        Args:
            delta: Settings delta. Can include 'speaking_rate' (float).
        g      ?g       @zInvalid speaking_rate value: z. Must be between 0.25 and 2.0N)

isinstancer  r   r  r2  r   warningr   rW  _update_settingsr\  r|  
rate_valuer_  s      r   r  z%GoogleHttpTTSService._update_settings  sw      e23ATAT8Uu223JJ-#-3J<?]^ '0#W-e4444   A>BBBtextc                 P   d}d| j                   j                   dg}| j                   j                  }|j                  d| d       | j                   j                  r)|j                  d| j                   j                   d       |ddj                  |       dz  }g }| j                   j                  r)|j                  d	| j                   j                   d       | j                   j                  r)|j                  d
| j                   j                   d       | j                   j                  r)|j                  d| j                   j                   d       |r|ddj                  |       dz  }| j                   j                  r|d| j                   j                   dz  }| j                   j                  r|d| j                   j                   dz  }||z  }| j                   j                  r|dz  }| j                   j                  r|dz  }|r|dz  }|dz  }|S )Nz<speak>zname=''z
language='zgender='z<voice  >zpitch='zrate='zvolume='z	<prosody z<emphasis level='z'>z<google:style name='z</google:style>z</emphasis>z
</prosody>z</voice></speak>)rC  rQ  r    appendr#  joinr  r  r  r  r+  )r\  r  ssmlvoice_attrsr    prosody_attrss         r   _construct_ssmlz$GoogleHttpTTSService._construct_ssml  s      4 45Q78>>**Zz34>>  $..*?*?)@BC'#((;/022 >>  74>>+?+?*@!BC>>  6$..*=*=)>a!@A>>    8DNN,A,A+B!!DEi 78::D >>""'(?(?'@CCD >>&&*4>>+F+F*GrJJD >>&&%%D>>""M!DL D""r   
context_idc                  K   t        j                  |  d| d       	 d| j                  j                  j	                         v }d| j                  j                  j	                         v }|s|rt        j                  |      }n'| j                  |      }t        j                  |      }t        j                  | j                  j                  | j                  j                        }t
        j                  j                  | j                  d}|s|r/| j                  j                  | j                  j                  |d
<   t        j                  di |}	t        j                  |||	      }
| j                   j#                  |
       d	{   }| j%                  |       d	{    |j&                  dd	 }| j(                  }t+        dt-        |      |      D ]D  }||||z    }|s y	| j/                          d	{    t1        || j                  d|      }| F y	7 7 7 )# t2        $ r'}dt5        |       }t7        |       Y d	}~y	d	}~ww xY ww)a  Generate speech from text using Google's HTTP TTS API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []chirpjourney)r  )r  language_codenameaudio_encodingsample_rate_hertzNr  )inputrQ  audio_config)request,   r      r  TTS generation error: errorr	  )r   debugrC  rQ  lowerr   SynthesisInputr  VoiceSelectionParamsr    AudioEncodingLINEAR16rK  r  AudioConfigSynthesizeSpeechRequestr[  synthesize_speechstart_tts_usage_metricsaudio_content
chunk_sizerangelenstop_ttfb_metricsr   	Exceptionr0  r   )r\  r  r  is_chirp_voiceis_journey_voicesynthesis_inputr  rQ  audio_config_paramsr  r  responser  
CHUNK_SIZEichunkframeeerror_messages                      r   run_ttszGoogleHttpTTSService.run_tts%  s)     	v/vQ783	2$(<(<(B(B(DDN(DNN,@,@,F,F,HH !1"1"@"@d"K++D1"1"@"@d"K#88"nn55DNN<P<PE
 #2"?"?"H"H%)%5%5# "28T8T8`7;~~7S7S#O4*66M9LML%==%UG "\\;;G;LLH..t444 %22237MJ1c-0*= %a!j.9,,...(0@0@!PZ[ M4 /  	24SVH=M=111	2sr   I"E3H/ H)H/ +H+,AH/ /I"0H/ H-$H/ (I")H/ +H/ -H/ /	I8II"II")r,  r-  r.  r/  r  Settingsr1  r   rF  r   r0  intrX  r   rl  rZ  r>  ru  r   rU  r   r@  r   r  r  r   r	   r   r  __classcell__r_  s   @r   rB  rB  #  s^    %H$$ei e: &**."&"&%)(,48X
 c]X
 #3-	X

 3-X
 3-X
 c]X
 %X
 01X
t+
#C=+
<DSM+
		0	0+
Zd 	9X 	9(3- 	95K 5DcN 5,C ,C ,\ ?2# ?23 ?2>%QU+;V ?2 ?2r   rB  c                       e Zd ZdZdee   dee   dej                  fdZde	fdZ
dedee   fdZ	 dd
ej                  dededee   deed	f   f
dZy	)GoogleBaseTTSServicezBase class for Google Cloud Text-to-Speech streaming services.

    Provides shared streaming synthesis logic for Google TTS services.
    This is an abstract base class. Use GoogleTTSService or GeminiTTSService instead.
    rG  rH  r!   c                    d}|r5t        j                  |      }t        j                  j	                  |      }n3|r t        j                  j                  |      }n	 t        dg      \  }}|st        d      d}| j                  rt        | j                   d      }t        j                  ||      S # t        $ r Y Vw xY wra  re  rm  s          r   rZ  z#GoogleBaseTTSService._create_cliento  rq  rr  c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Google streaming TTS services support metrics generation.
        Tr	  rt  s    r   ru  z)GoogleBaseTTSService.can_generate_metrics  rv  r   r    c                     t        |      S rx  ry  rz  s     r   rU  z1GoogleBaseTTSService.language_to_service_language  r{  r   Nstreaming_configr  r  r9  c                >  K   t        j                  |      fd}| j                  j                   |              d{   }| j	                         d{    d}d}| j
                  }	|2 3 d{   }
|
j                  }|s|s| j                          d{    d}||z  }t        |      |	k\  sH|d|	 }||	d }t        || j                  d|       t        |      |	k\  r5~7 7 7 }7 V6 |rt        || j                  d|       yyw)	a|  Shared streaming synthesis logic.

        Args:
            streaming_config: The streaming configuration.
            text: The text to synthesize.
            context_id: Unique identifier for this TTS context.
            prompt: Optional prompt for style instructions (Gemini only).

        Yields:
            Frame: Audio frames containing the synthesized speech.
        )r  c                    K    di} | d<   t        j                  t        j                  di |        y w)Nr  r9  )r  r	  )r   StreamingSynthesizeRequestStreamingSynthesisInput)synthesis_input_paramsconfig_requestr9  r  s    r   request_generatorz;GoogleBaseTTSService._stream_tts.<locals>.request_generator  sM       &,d^"!39&x0!<<%==W@VW s   >ANr   FTr  r  )r   r  r[  streaming_synthesizer  r  r  r  r  r   rK  )r\  r  r  r  r9  r  streaming_responsesaudio_bufferfirst_chunk_for_ttfbr  r  r  piecer  s     ` `        @r   _stream_ttsz GoogleBaseTTSService._stream_tts  s8    $ )CC-
	 %)LL$E$EFWFY$ZZ**4000$__
1 	Z 	Z(**E',,...'+$E!Ll#z1$[j1+JK8&ud.>.>jYY l#z1% [0	Z / 2 "<1A1A1Q[\\ sf   ADC4DC6 D6C<:C8;C<>%D#C:$D>4D3D6D8C<:D<!Dr  )r,  r-  r.  r/  r   r0  r   rl  rZ  r>  ru  r   rU  StreamingSynthesizeConfigr	   r   r  r	  r   r   r  r  h  s    +
#C=+
<DSM+
		0	0+
Zd 	9X 	9(3- 	9  !%7])CC7] 7] 	7]
 7] 
t	$7]r   r  c                        e Zd ZU dZeZeed<    G d de      Zdddddddddde	e
   de	e
   d	e	e
   d
e	e
   de	e
   de	e   de	e   de	e   f fdZdedee
ef   f fdZede
de
deedf   fd       Z xZS )GoogleTTSServicea*  Google Cloud Text-to-Speech streaming service.

    Provides real-time text-to-speech synthesis using Google Cloud's streaming API
    for low-latency applications. Optimized for Chirp 3 HD and Journey voices
    with continuous audio streaming capabilities.

    Note:
        Requires Google Cloud credentials via service account JSON, file path, or
        default application credentials (GOOGLE_APPLICATION_CREDENTIALS env var).
        Only Chirp 3 HD and Journey voices are supported. Use GoogleHttpTTSService for other voices.

    Example::

        tts = GoogleTTSService(
            credentials_path="/path/to/service-account.json",
            voice_id="en-US-Chirp3-HD-Charon",
            params=GoogleTTSService.InputParams(
                language=Language.EN_US,
            )
        )
    rC  c                   N    e Zd ZU dZej
                  Zee   ed<   dZ	ee
   ed<   y)GoogleTTSService.InputParamsaY  Input parameters for Google streaming TTS configuration.

        .. deprecated:: 0.0.105
            Use ``GoogleTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
            speaking_rate: The speaking rate, in the range [0.25, 2.0].
        r    Nr  )r,  r-  r.  r/  r   ra   r    r   r1  r  r2  r	  r   r   rF  r    s*    	 (0{{(8$2)-x-r   rF  N)rG  rH  rI  rJ  voice_cloning_keyrK  rL  rM  rG  rH  rI  rJ  r  rK  rL  rM  c                   t        dddd      }
|t        dt         d       ||
_        |[t        dt                |sI|j                   | j	                  |j                        |
_        |j
                  |j
                  |
_        ||
j                  |       t        |    d
|dd|
d	|	 || _	        || _
        | j                  ||      | _        y)a,  Initializes the Google streaming TTS service.

        Args:
            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Google TTS voice identifier (e.g., "en-US-Chirp3-HD-Charon").

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleTTSSettings(voice=...)`` instead.

            voice_cloning_key: The voice cloning key for Chirp 3 custom voices.
            sample_rate: Audio sample rate in Hz. If None, uses default.
            params: Language configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=GoogleTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        NrO  r(   )rP  rQ  r    r  rJ  rQ  rL  TrR  r	  )r4  r   rQ  r    rU  r  rV  rW  rX  rY  _voice_cloning_keyrZ  r[  )r\  rG  rH  rI  rJ  r  rK  rL  rM  r]  r^  r_  s              r   rX  zGoogleTTSService.__init__  s    H -*	
 ":/@'J%-" "8->???.040Q0QRXRaRa0b$-''35;5I5I$2 ))(3 	
#!!%		

 	
 ""3@D@S@S)A
r   r|  r!   c                   K   t        |t              r[t        |j                        rFt	        |j                        }d|cxk  rdk  s&n t        j                  d| d       t        |_        t        | %  |       d{   S 7 wr~  )
r  r4  r   r  r2  r   r  r   rW  r  r  s      r   r  z!GoogleTTSService._update_settings\  sw      e./HU=P=P4Qu223JJ-#-3J<?]^ '0#W-e4444r  r  r  c                  K   t        j                  |  d| d       	 | j                  rLt        j                  | j                        }t        j
                  | j                  j                  |      }n?t        j
                  | j                  j                  | j                  j                        }t        j                  |t        j                  t        j                  j                  | j                  | j                  j                              }| j                  |||      2 3 d{   }| 7 
6 y# t         $ r2}| j#                  d	t%        |       |
       d{  7   Y d}~yd}~ww xY ww)a7  Generate streaming speech from text using Google's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech as it's generated.
        r  r  )r  )r  voice_cloner  )r  r  r  rQ  streaming_audio_configNr  )	error_msg	exception)r   r  r  r   VoiceCloneParamsr  rC  r    rQ  r  StreamingAudioConfigr  PCMrK  r  r  r  
push_errorr0  )r\  r  r  voice_clone_paramsrQ  r  r  r  s           r   r  zGoogleTTSService.run_ttsk  sF     	v/vQ78	\&&%4%E%E&*&=&=&" (<<"&.."9"9GY (<<"&.."9"9@T@T
  /HH'6'K'K#2#@#@#D#D&*&6&6"&..">">(   $//0@$
S  eS  	\//.DSVH,MYZ/[[[	\s_   E=DD? ,D=0D;1D=4D? ;D==D? >E=?	E:"E5*E-+E50E=5E::E=)r,  r-  r.  r/  r4  r  r1  r   rF  r   r0  r  rX  r   r@  r   r  r   r	   r   r  r  r  s   @r   r  r    s   , !H  .i ." &**."&"&+/%)(,04I
 c]I
 #3-	I

 3-I
 3-I
 $C=I
 c]I
 %I
 ,-I
V5K 5DcN 5 )\# )\3 )\>%QU+;V )\ )\r   r  c                   2    e Zd ZU dZeZeed<   dZg dZ G d de	      Z
dddddddddd	d	ee   d
ee   dee   dee   dee   dee   dee   dee
   dee   f fdZdedee   fdZdef fdZdedeeef   f fdZedededeedf   fd       Z xZS )GeminiTTSServicea  Gemini Text-to-Speech streaming service using Gemini TTS models.

    Provides real-time text-to-speech synthesis using Gemini's TTS-specific models
    (gemini-2.5-flash-tts and gemini-2.5-pro-tts) with support for natural
    voice control, prompts for style instructions, expressive markup tags,
    and multi-speaker conversations.

    Note:
        Requires Google Cloud credentials via service account JSON, credentials file,
        or default application credentials (GOOGLE_APPLICATION_CREDENTIALS).

        Uses the Google Cloud Text-to-Speech streaming API for low-latency synthesis.

    Example::

        tts = GeminiTTSService(
            credentials_path="/path/to/service-account.json",
            model="gemini-2.5-flash-tts",
            voice_id="Kore",
            params=GeminiTTSService.InputParams(
                language=Language.EN_US,
                prompt="Say this in a friendly and helpful tone"
            )
        )
    rC  i]  )AchernarAchirdAlgenibAlgiebaAlnilamAoedeAutonoe	CallirhoeCharonDespina	EnceladusErinomeFenrirGacruxIapetusKore	LaomedeiaLedaOrusPuckPulcherrima
Rasalgethi	Sadachbia
SadaltagerSchedarSulafarUmbrielVindemiatrixZephyrZubenelgenubic                   v    e Zd ZU dZej
                  Zee   ed<   dZ	ee
   ed<   dZeed<   dZeee      ed<   y)GeminiTTSService.InputParamsa  Input parameters for Gemini TTS configuration.

        .. deprecated:: 0.0.105
            Use ``GeminiTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for synthesis. Defaults to English.
            prompt: Optional style instructions for how to synthesize the content.
            multi_speaker: Whether to enable multi-speaker support.
            speaker_configs: List of speaker configurations for multi-speaker mode.
        r    Nr9  Fr;  r=  )r,  r-  r.  r/  r   ra   r    r   r1  r9  r0  r;  r>  r=  r
   r@  r	  r   r   rF  r    sG    
	 (0{{(8$2 $$#t#04$t*-4r   rF  N)	api_keyrP  rG  rH  rI  rJ  rK  rL  rM  r  rP  rG  rH  rI  rJ  rK  rL  rM  c       	         h   |t        j                  dt        d       |r5|| j                  k7  r&t	        j
                  d| j                   d| d       t        dd	d
ddd      }|t        dt        d       ||_        |t        dt        d       ||_	        |j                  | j                  vr#t	        j
                  d|j                   d       |t        dt               |	s|j                   | j                  |j                        |_        |j                  |j                  |_        |j                  |j                  |_        |j                  |j                  |_        |	|j!                  |	       t#        | H  d|dd|d|
 || _        | j)                  ||      | _        y)a  Initializes the Gemini TTS service.

        Args:
            api_key:

                .. deprecated:: 0.0.95
                    The `api_key` parameter is deprecated. Use `credentials` or
                    `credentials_path` instead for Google Cloud authentication.

            model: Gemini TTS model to use. Must be a TTS model like
                   "gemini-2.5-flash-tts" or "gemini-2.5-pro-tts".

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSSettings(model=...)`` instead.

            credentials: JSON string containing Google Cloud service account credentials.
            credentials_path: Path to Google Cloud service account JSON file.
            location: Google Cloud location for regional endpoint (e.g., "us-central1").
            voice_id: Voice name from the available Gemini voices.

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSSettings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses Google's default 24kHz.
            params: TTS configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=GeminiTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        NzThe 'api_key' parameter is deprecated and will be removed in a future version. Use 'credentials' or 'credentials_path' instead for Google Cloud authentication.   )
stacklevelzGoogle TTS only supports  Hz sample rate. Current rate of Hz may cause issues.zgemini-2.5-flash-ttsr  r(   F)rP  rQ  r    r9  r;  r=  rP  rJ  rQ  Voice ')' not in known voices list. Using anyway.rL  TrR  r	  )warningswarnDeprecationWarningGOOGLE_SAMPLE_RATEr   r  r7  r   rP  rQ  AVAILABLE_VOICESr    rU  r9  r;  r=  rV  rW  rX  rY  rZ  r[  )r\  r  rP  rG  rH  rI  rJ  rK  rL  rM  r]  r^  r_  s               r   rX  zGeminiTTSService.__init__  s   ` MMc"	 ;$*A*AANN+D,C,C+D E##.-/CE -( 
 "7,=wG%*"":/@'J%-"!!)>)>>NN*0011Z[
 "8->???.040Q0QRXRaRa0b$-==,.4mm$+''35;5I5I$2))57=7M7M$4 ))(3 	
#!!%		

 	
 "@D@S@S)A
r   r    r!   c                     t        |      S )zConvert a Language enum to Gemini TTS language format.

        Args:
            language: The language to convert.

        Returns:
            The Gemini TTS-specific language code, or None if not supported.
        )r  rz  s     r   rU  z-GeminiTTSService.language_to_service_language_  r{  r   r  c                    K   t         |   |       d{    | j                  | j                  k7  r1t	        j
                  d| j                   d| j                   d       yy7 Ow)z~Start the Gemini TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        NzGoogle TTS requires r  r  )rW  startrK  r  r   r  )r\  r  r_  s     r   r  zGeminiTTSService.startj  sn      gmE"""t666NN&t'>'>&? @##'#3#3"44HJ 7 	#s   A(A&AA(r|  c                    K   t        |j                        r;|j                  | j                  vr#t        j                  d|j                   d       t
        |   |       d{   S 7 w)zApply a settings delta with voice validation.

        Args:
            delta: Settings delta. Can include 'voice', 'prompt', etc.

        Returns:
            Dict mapping changed field names to their previous values.
        r  r  N)r   rQ  r  r   r  rW  r  )r\  r|  r_  s     r   r  z!GeminiTTSService._update_settingsw  sV      EKK U[[8M8M%MNNWU[[M1Z[\W-e4444s   A#A-&A+'A-r  r  c           
     <  K   t        j                  |  d| d       	 | j                  j                  r| j                  j                  rg }| j                  j                  D ]O  }|j                  t        j                  |d   |j                  d| j                  j                                     Q t        j                  |      }t        j                  | j                  j                  | j                  j                  |      }nTt        j                  | j                  j                  | j                  j                  | j                  j                        }t        j                  |t        j                  t        j                   j"                  | j$                  	      
      }| j'                  |||| j                  j(                        2 3 d{   }| 7 
6 y# t*        $ r'}	dt-        |	       }
t/        |
       Y d}	~	yd}	~	ww xY ww)a  Generate streaming speech from text using Gemini TTS models.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames. Can include markup tags
                  like [sigh], [laughing], [whispering] for expressive control.

        Yields:
            Frame: Audio frames containing the synthesized speech as it's generated.
        r  r  speaker_alias
speaker_id)r   r!  )speaker_voice_configs)r  
model_namemulti_speaker_voice_config)r  r  r#  r  r  NzGemini TTS generation error: r  )r   r  rC  r;  r=  r  r   MultispeakerPrebuiltVoicegetrQ  MultiSpeakerVoiceConfigr  r    rP  r  r  r  r  rK  r  r9  r  r0  r   )r\  r  r  r"  speaker_configr$  rQ  r  r  r  r  s              r   r  zGeminiTTSService.run_tts  s     	v/vQ78/	2~~++0N0N(*%&*nn&D&D N)00'AA*8*I'5'9'9,H\H\'] .=-T-T*?.* (<<"&.."9"9#~~33/I (<<"&.."9"9--#~~33  /HH'6'K'K#2#@#@#D#D&*&6&6(   $// $
DNN4I4I   e   
  	2;CF8DM=111	2sS   HF7G) G'G%G'G) %G''G) (H)	H2HHHH)r,  r-  r.  r/  r7  r  r1  r  r  r   rF  r   r0  r  rX  r   rU  r   r  r   r@  r   r  r   r	   r   r  r  r  s   @r   r  r    sO   4 !H  B5i 5* "&#%)*."&"&%)(,04q
 #q
 }	q

 c]q
 #3-q
 3-q
 3-q
 c]q
 %q
 ,-q
f	9X 	9(3- 	9 5K 5DcN 5 <2# <23 <2>%QU+;V <2 <2r   r  )=r/  rf  osr  (pipecat.utils.tracing.service_decoratorsr   environdataclassesr   r   typingr   r	   r
   r   r   logurur   pydanticr   pipecat.frames.framesr   r   r   r   pipecat.services.settingsr   r   r   r   r   pipecat.services.tts_servicer   pipecat.transcriptions.languager   r   google.api_core.client_optionsr   google.authr   google.auth.exceptionsr   google.cloudr   google.oauth2r   ModuleNotFoundErrorr  r  r  r0  r   r  r  r4  GoogleStreamTTSSettingsr7  rB  r  r  r  r	  r   r   <module>r;     s    	  ? *1

% & ( ? ?     4 F,<#6,-\Ih \I8C= \I~BIh BI8C= BIJ 1K 1 1< W W W ,     B2: B2J
~]: ~]Bl\+ l\^j2+ j2C#  ,FLL;qc"#FLL 	T &qc*
++,s   /D D>2D99D>