
    qir                        d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ 	 ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ de0de*fdZ1e G d de             Z2 G d d      Z3 G d de e3      Z4 G d de e3      Z5y# e,$ r7Z- e	j\                  de-         e	j\                  d        e/de-       dZ-[-ww xY w)z@Azure Cognitive Services Text-to-Speech service implementations.    N)	dataclassfield)AsyncGeneratorOptional)logger)	BaseModel)CancelFrameEndFrame
ErrorFrameFrameInterruptionFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)FrameDirectionlanguage_to_azure_language)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)TextAggregationMode
TTSService)Language)
traced_tts)CancellationReasonResultReasonServicePropertyChannelSpeechConfigSpeechSynthesisOutputFormatSpeechSynthesizerzException: zCIn order to use Azure, you need to `pip install pipecat-ai[azure]`.zMissing module: sample_ratereturnc                     t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  d}|j                  | t         j                        S )a	  Convert sample rate to Azure speech synthesis output format.

    Args:
        sample_rate: Sample rate in Hz.

    Returns:
        Corresponding Azure SpeechSynthesisOutputFormat enum value.
        Defaults to Raw24Khz16BitMonoPcm if sample rate not found.
    )@  i>  i"V  i]  iD  i  )r    Raw8Khz16BitMonoPcmRaw16Khz16BitMonoPcmRaw22050Hz16BitMonoPcmRaw24Khz16BitMonoPcmRaw44100Hz16BitMonoPcmRaw48Khz16BitMonoPcmget)r"   sample_rate_maps     L/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/azure/tts.pysample_rate_to_output_formatr/   0   s]     *==*??*AA*??*AA*??O {,G,\,\]]    c                      e Zd ZU dZ ed       Zeez  ed<    ed       Z	eez  ed<    ed       Z
eez  ed<    ed	       Zeez  ed
<    ed       Zeez  ed<    ed       Zeez  ed<    ed       Zeez  ed<   y)AzureTTSSettingsa8  Settings for AzureTTSService and AzureHttpTTSService.

    Parameters:
        emphasis: Emphasis level for speech ("strong", "moderate", "reduced").
        pitch: Voice pitch adjustment (e.g., "+10%", "-5Hz", "high").
        rate: Speech rate adjustment (e.g., "1.0", "1.25", "slow", "fast").
        role: Voice role for expression (e.g., "YoungAdultFemale").
        style: Speaking style (e.g., "cheerful", "sad", "excited").
        style_degree: Intensity of the speaking style (0.01 to 2.0).
        volume: Volume level (e.g., "+20%", "loud", "x-soft").
    c                      t         S Nr    r0   r.   <lambda>zAzureTTSSettings.<lambda>S   s    i r0   )default_factoryemphasisc                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>T       9 r0   pitchc                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>U       ) r0   ratec                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>V   r>   r0   rolec                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>W   r;   r0   stylec                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>X   s    ) r0   style_degreec                      t         S r4   r5   r6   r0   r.   r7   zAzureTTSSettings.<lambda>Y   s    I r0   volumeN)__name__
__module____qualname____doc__r   r9   strr   __annotations__r<   r?   rA   rC   rE   rG   r6   r0   r.   r2   r2   E   s    
 !&6G HHcIoH"3DEE3?E!2CDD#	/D!2CDD#	/D"3DEE3?E$):K$LL#	/L#4EFFC)OFr0   r2   c                       e Zd ZU dZeed<   ddddddZ G d	 d
e      Zde	de	fdZ
dedee	   fdZde	de	fdZde	de	fdZy)AzureBaseTTSServicea1  Base mixin class for Azure Cognitive Services text-to-speech implementations.

    Provides common functionality for Azure TTS services including SSML
    construction, voice configuration, and parameter management.
    This is a mixin class and should be used alongside TTSService or its subclasses.
    	_settingsz&amp;z&lt;z&gt;z&quot;z&apos;)&<>"'c                       e Zd ZU dZdZee   ed<   ej                  Z
ee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed	<   dZee   ed
<   y)AzureBaseTTSService.InputParamsa  Input parameters for Azure TTS voice configuration.

        .. deprecated:: 0.0.105
            Use ``settings=AzureTTSSettings(...)`` instead.

        Parameters:
            emphasis: Emphasis level for speech ("strong", "moderate", "reduced").
            language: Language for synthesis. Defaults to English (US).
            pitch: Voice pitch adjustment (e.g., "+10%", "-5Hz", "high").
            rate: Speech rate adjustment (e.g., "1.0", "1.25", "slow", "fast").
            role: Voice role for expression (e.g., "YoungAdultFemale").
            style: Speaking style (e.g., "cheerful", "sad", "excited").
            style_degree: Intensity of the speaking style (0.01 to 2.0).
            volume: Volume level (e.g., "+20%", "loud", "x-soft").
        Nr9   languager<   r?   rA   rC   rE   rG   )rH   rI   rJ   rK   r9   r   rL   rM   r   EN_USrX   r<   r?   rA   rC   rE   rG   r6   r0   r.   InputParamsrW   p   s~    	  #'(3-&'/~~(8$5#x}#"hsm""hsm"#x}#&*hsm* $$r0   rZ   api_keyregionc                .    || _         || _        d| _        y)a)  Initialize Azure-specific configuration.

        This method should be called by subclasses after initializing their TTSService parent.

        Args:
            api_key: Azure Cognitive Services subscription key.
            region: Azure region identifier (e.g., "eastus", "westus2").
        N)_api_key_region_speech_synthesizer)selfr[   r\   s      r.   _init_azure_basez$AzureBaseTTSService._init_azure_base   s      #' r0   rX   r#   c                     t        |      S )zConvert a Language enum to Azure language format.

        Args:
            language: The language to convert.

        Returns:
            The Azure-specific language code, or None if not supported.
        r   ra   rX   s     r.   language_to_service_languagez0AzureBaseTTSService.language_to_service_language   s     *(33r0   textc                 p   | j                   j                  }| j                  |      }d| d| j                   j                   d}| j                   j                  r|d| j                   j                   dz  }| j                   j
                  r|d| j                   j
                   dz  }| j                   j                  r|d| j                   j                   dz  }|dz  }g }| j                   j                  r)|j                  d	| j                   j                   d       | j                   j                  r)|j                  d
| j                   j                   d       | j                   j                  r)|j                  d| j                   j                   d       |r|ddj                  |       dz  }| j                   j                  r|d| j                   j                   dz  }||z  }| j                   j                  r|dz  }|r|dz  }| j                   j                  r|dz  }|dz  }|S )Nz<speak version='1.0' xml:lang='zf' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'><voice name='z8'><mstts:silence type='Sentenceboundary' value='20ms' />z<mstts:express-as style='rU   z styledegree='z role='rS   zrate='zpitch='zvolume='z	<prosody  z<emphasis level='z'>z</emphasis>z
</prosody>z</mstts:express-as>z</voice></speak>)rP   rX   _escape_textvoicerC   rE   rA   r?   appendr<   rG   joinr9   )ra   rf   rX   escaped_textssmlprosody_attrss         r.   _construct_ssmlz#AzureBaseTTSService._construct_ssml   s   >>** ((. .hZ 8 !NN001 2EE 	 >>/0D0D/EQGGD~~**.)D)D(EQGG~~""'$.."5"5!6a88CKD>>  6$..*=*=)>a!@A>>  74>>+?+?*@!BC>>    8DNN,A,A+B!!DE i 78::D>>""'(?(?'@CCD>>""M!DL D>>))D""r0   c                 z    |}t         j                  j                         D ]  \  }}|j                  ||      } |S )aq  Escapes XML/SSML reserved characters according to Microsoft documentation.

        This method escapes the following characters:
        - & becomes &amp;
        - < becomes &lt;
        - > becomes &gt;
        - " becomes &quot;
        - ' becomes &apos;

        Args:
            text: The text to escape.

        Returns:
            The escaped text.
        )rO   SSML_ESCAPE_CHARSitemsreplace)ra   rf   rm   charescape_codes        r.   ri   z AzureBaseTTSService._escape_text   sF      !4!F!F!L!L!N 	CD+'//kBL	Cr0   N)rH   rI   rJ   rK   r2   rM   rr   r   rZ   rL   rb   r   r   re   rp   ri   r6   r0   r.   rO   rO   \   s      
 %i %4( ( 	($	4X 	4(3- 	42C 2C 2h  r0   rO   c                       e Zd ZdZeZddddddddededee   dee   dee	j                     d	ee   d
ee   dee   f fdZdefdZdef fdZdef fdZdef fdZdefdZdedefdZd Zd ZdedefdZdedefdZd Zd Zd Zd Ze jB                  fde"d e f fd!Z#d" Z$d'd#ee   fd$Z%de&d e f fd%Z'e(ded#ede)e"df   fd&       Z* xZ+S )(AzureTTSServicea:  Azure Cognitive Services streaming TTS service with word timestamps.

    Provides real-time text-to-speech synthesis using Azure's WebSocket-based
    streaming API. Audio chunks and word boundaries are streamed as they become
    available for lower latency playback and accurate word-level synchronization.
    N)rj   r"   paramssettingsaggregate_sentencestext_aggregation_moder[   r\   rj   r"   ry   rz   r{   r|   c                B   t        dddddddddd
      }
|t        dt         d       ||
_        |t        dt                |s|j                  |
_        |j                  r| j                  |j                        nd|
_        |j                  |
_        |j                  |
_        |j                  |
_        |j                  |
_	        |j                  |
_
        |j                  |
_        ||
j                  |       t        | 8  d||dddd||
d	|	 | j                  ||
       d| _        d| _        t%        j&                         | _        t%        j&                         | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        y)aW  Initialize the Azure streaming TTS service.

        Args:
            api_key: Azure Cognitive Services subscription key.
            region: Azure region identifier (e.g., "eastus", "westus2").
            voice: Voice name to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=AzureTTSSettings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses service default.
            params: Voice and synthesis parameters configuration.

                .. deprecated:: 0.0.105
                    Use ``settings=AzureTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            aggregate_sentences: Deprecated. Use text_aggregation_mode instead.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            text_aggregation_mode: How to aggregate text before synthesis.
            **kwargs: Additional arguments passed to parent WordTTSService.
        Nen-US-SaraNeuralen-US
modelrj   rX   r9   r<   r?   rA   rC   rE   rG   rj   ry   FT)r{   r|   push_text_framespush_stop_framespush_start_framepause_frame_processingr"   rz   r[   r\           r6   )r2   r   rj   r9   rX   re   r<   r?   rA   rC   rE   rG   apply_updatesuper__init__rb   _speech_configr`   asyncioQueue_audio_queue_word_boundary_queue_word_processor_task_cumulative_audio_offset_current_sentence_base_offset_current_sentence_duration!_current_sentence_max_word_offset
_last_word_last_timestamp_current_context_id)ra   r[   r\   rj   r"   ry   rz   r{   r|   kwargsdefault_settings	__class__s              r.   r   zAzureTTSService.__init__   s   P ,$
 "7,<gF%*" "8-=>,2OO )  55fooF  !)
 *0 &(. %(. %)/ &060C0C -*0-- ' ))(3 
	
 3"7"!!#'#%
	
 
	
 	gf="#' #MMO$+MMO!$(!/2%47*14' 	. *.04 	 r0   r#   c                      yzCheck if this service can generate processing metrics.

        Returns:
            True, as Azure TTS service supports metrics generation.
        Tr6   ra   s    r.   can_generate_metricsz$AzureTTSService.can_generate_metricsj       r0   framec                   K   t         |   |       d{    | j                  ryt        | j                  | j
                        | _        | j                  j                  | j                  _        | j                  j                  t        | j                               | j                  j                  ddt        j                         t        | j                  d      | _        | j                   j"                  j%                  | j&                         | j                   j(                  j%                  | j*                         | j                   j,                  j%                  | j.                         | j                   j0                  j%                  | j2                         | j4                  s%| j7                  | j9                               | _        yy7 w)zStart the Azure TTS service and initialize speech synthesizer.

        Args:
            frame: Start frame containing initialization parameters.
        Nsubscriptionr\   z8synthesizer.synthesis.connection.synthesisConnectionImpl	websocketspeech_configaudio_config)r   startr   r   r^   r_   rP   rX   speech_synthesis_language"set_speech_synthesis_output_formatr/   r"   set_service_propertyr   UriQueryParameterr!   r`   synthesizingconnect_handle_synthesizingsynthesis_completed_handle_completedsynthesis_canceled_handle_canceledsynthesis_word_boundary_handle_word_boundaryr   create_task_word_processor_task_handlerra   r   r   s     r.   r   zAzureTTSService.startr  sr     gmE""" +<<
 9=8O8O5>>()9)9:	
 	00F"44	
 $5--D$
 
 	  --55d6O6OP  44<<T=S=ST  33;;D<Q<QR  88@@A[A[\ (((,(8(89Z9Z9\(]D% )? 	#s   GGG Gc                    K   t         |   |       d{    | j                  | j                         d{    d| _        y7 /7 w)zhStop the Azure TTS service.

        Args:
            frame: End frame signaling service stop.
        N)r   stopcancel_taskr   r   s     r.   r   zAzureTTSService.stop  sF      gl5!!!t88999$(! 	"9   A
A"A
AA
A
c                    K   t         |   |       d{    | j                  | j                         d{    d| _        y7 /7 w)zuCancel the Azure TTS service.

        Args:
            frame: Cancel frame signaling service cancellation.
        N)r   cancelr   r   r   s     r.   r   zAzureTTSService.cancel  sF      gnU###t88999$(! 	$9r   c                     | j                   j                  r| j                   j                  ndj                         }|j                  d      S )zCheck if the configured language is CJK (Chinese, Japanese, Korean).

        Returns:
            True if the language is CJK, False otherwise.
         )zhjakocmnyuewuu)rP   rX   lower
startswithrd   s     r.   _is_cjk_languagez AzureTTSService._is_cjk_language  s<     04~~/F/FDNN++BUUW""#JKKr0   rf   c                 .    |xr t        d |D              S )zCheck if text consists only of punctuation and whitespace.

        Args:
            text: Text to check.

        Returns:
            True if text is only punctuation/whitespace, False otherwise.
        c              3   >   K   | ]  }|j                            y wr4   )isalnum).0cs     r.   	<genexpr>z7AzureTTSService._is_punctuation_only.<locals>.<genexpr>  s     :		O:s   )all)ra   rf   s     r.   _is_punctuation_onlyz$AzureTTSService._is_punctuation_only  s     ::T:::r0   c                     |j                   }|j                  dz  }| j                  |z   }|| j                  kD  r|| _        |sy| j	                         r| j                  ||       y| j                  ||       y)a  Handle word boundary events from Azure SDK.

        Azure sends punctuation as separate word boundaries, and breaks CJK text
        into individual characters/particles. This method routes to language-specific
        handlers to properly merge and emit word boundaries.

        Args:
            evt: SpeechSynthesisWordBoundaryEventArgs from Azure Speech SDK
                containing word text and audio offset timing.
        g    cAN)rf   audio_offsetr   r   r   _handle_cjk_word_boundary_handle_non_cjk_word_boundary)ra   evtwordsentence_relative_secondsabsolute_secondss        r.   r   z%AzureTTSService._handle_word_boundary  s     xx$'$4$4|$C!  ==@YY %t'M'MM5ND2   "**41AB..t5EFr0   c                     | j                   @| j                  j                  | j                   | j                  f       d| _         d| _        yy)z/Emit the currently buffered word if one exists.N)r   r   
put_nowaitr   r   s    r.   _emit_pending_wordz"AzureTTSService._emit_pending_word  sC    ??&%%00$//4CWCW1XY"DO#'D  'r0   r   	timestampc                 6   | j                   || _         || _        y| j                  |      r&| xj                   |z  c_         | j                          y|j	                         |k7  r| j                          || _         || _        y| xj                   |z  c_         y)a  Handle word boundaries for CJK languages (Chinese, Japanese, Korean).

        CJK languages don't use spaces between words, so we merge characters together
        and only emit at natural break points (punctuation or whitespace boundaries).
        Without this logic, we don't get word output for CJK languages.

        Args:
            word: The word/character from Azure.
            timestamp: Timestamp in seconds.
        N)r   r   r   r   stripra   r   r   s      r.   r   z)AzureTTSService._handle_cjk_word_boundary  s     ??""DO#,D  $$T*OOt#O##% ::<4##%"DO#,D  	4r0   c                     | j                  |      r"| j                  | xj                  |z  c_        y| j                  1| j                  j                  | j                  | j                  f       || _        || _        y)a  Handle word boundaries for non-CJK languages.

        Non-CJK languages use spaces between words, so we emit each word separately
        after merging any trailing punctuation.

        Args:
            word: The word from Azure.
            timestamp: Timestamp in seconds.
        N)r   r   r   r   r   r   s      r.   r   z-AzureTTSService._handle_non_cjk_word_boundary  sj     $$T*t/JOOt#O ??&%%00$//4CWCW1XY(r0   c                   K   	 	 | j                   j                          d{   \  }}| j                  r'| j                  ||fg| j                         d{    | j                   j	                          t7 U7 !# t
        j                  $ r Y yt        $ r)}| j                  d| |       d{  7   Y d}~Fd}~ww xY ww)zDProcess word timestamps from the queue and call add_word_timestamps.NUnknown error occurred: 	error_msg	exception)	r   r,   r   add_word_timestamps	task_doner   CancelledError	Exception
push_error)ra   r   timestamp_secondses       r.   r   z,AzureTTSService._word_processor_task_handler#  s     
]040I0I0M0M0O*O''++22 123T5M5M   ))335 *O ))  ]oo2J1#0NZ[o\\\]sh   CA< A85A< A:A< 7C8A< :A< <CCCB=2B53B=8C=CCc                     |j                   rG|j                   j                  r0| j                  j                  |j                   j                         yyy)zsHandle audio chunks as they arrive.

        Args:
            evt: Synthesis event containing audio data.
        N)result
audio_datar   r   ra   r   s     r.   r   z$AzureTTSService._handle_synthesizing2  s<     ::#**//(()>)>? 0:r0   c                 f   | j                   ?| j                  j                  | j                   | j                  f       d| _         d| _        |j                  r?|j                  j
                  r)|j                  j
                  j                         | _        | j                  j                  d       y)zmHandle synthesis completion.

        Args:
            evt: Completion event from Azure Speech SDK.
        N)	r   r   r   r   r   audio_durationtotal_secondsr   r   r   s     r.   r   z!AzureTTSService._handle_completed;  s     ??&%%00$//4CWCW1XY"DO#'D  ::#**33.1jj.G.G.U.U.WD+$$T*r0   c                    |j                   j                  j                  }|t        j                  k(  r4t        j                  |  d       | j                  j                  d       y|j                   j                  }d| }|j                  r|d|j                   z  }| j                  j                  t        |             y)z[Handle synthesis cancellation.

        Args:
            evt: Cancellation event.
        z2: Speech synthesis canceled by user (interruption)NzAzure TTS synthesis canceled: z - )r   cancellation_detailsreasonr   CancelledByUserr   debugr   r   error_detailsr   )ra   r   r   detailsr   s        r.   r   z AzureTTSService._handle_canceledM  s     0077'777LLD6!STU((.jj55G8AI$$s7#8#8"9::	((9)=>r0   	directionc                   K   t         |   ||       d{    t        |t        t        f      rT| j                          t        |t              r3| j                  r&| j                  dg| j                         d{    yyyy7 o7 
w)zPush a frame and handle state changes.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        N)Resetr   )r   
push_frame
isinstancer   r   _reset_stater   r   )ra   r   r   r   s      r.   r   zAzureTTSService.push_frame_  s      g 	222eo/@AB%1d6N6N..~t?W?WXXX 7O1 C 	3 Ys"   BBA&B>B	?	B	Bc                 f    d| _         d| _        d| _        d| _        d| _        d| _        d| _        y)zReset TTS state between turns.r   N)r   r   r   r   r   r   r   r   s    r.   r  zAzureTTSService._reset_statel  s9    (+%-0**-'14.##' r0   
context_idc                 <   K   t        j                  |  d       yw)zFlush any pending audio data.z: flushing audioN)r   trace)ra   r  s     r.   flush_audiozAzureTTSService.flush_audiov  s     v-./s   c                   K   t         |   ||       d{    | j                          d{    | j                  rB	 | j                  j	                         }t        j                  |j                         d{    | j                          | j                  j                         sP	 | j                  j                          | j                  j                          | j                  j                         sP| j                   j                         sQ	 | j                   j                          | j                   j                          | j                   j                         sPyy7 Q7 <7 # t        $ r*}| j                  d| |       d{  7   Y d}~d}~ww xY w# t
        j                  $ r Y w xY w# t
        j                  $ r Y yw xY ww)zHandle interruption by stopping current synthesis.

        Args:
            frame: The interruption frame.
            direction: Frame processing direction.
        Nr   r   )r   _handle_interruptionstop_all_metricsr`   stop_speaking_asyncr   	to_threadr,   r   r   r  r   empty
get_nowaitr   
QueueEmptyr   )ra   r   r   result_futurer   r   s        r.   r  z$AzureTTSService._handle_interruptionz  s     g*5)<<<##%%% ##] !% 8 8 L L N''(9(9:::
 	##))+!!,,.!!++- ##))+ ++113))446))335 ++113/ 	=% ; ]oo2J1#0NZ[o\\\] %%  %% s   GE(GE+G<E0 =E.>E0 *G-4F& !G<G4F? G&G+G.E0 0	F#9FFFGF##G&F<9G;F<<G?GGGGc                h  K   t        j                  |  d| d       | j                  j                         sO| j                  j	                          | j                  j                          | j                  j                         sO	 | j                  y	 || _        | j                  | _	        d| _
        d| _        | j                  |      }| j                  j                  |       | j                  |       d{    	 | j                  j                          d{   }|nHt!        |t"              rt%        t'        |             nt)        || j*                  d|      }| m| j*                  dk(  r | xj                  | j                  z  c_        n | xj                  | j                  z  c_        yy7 7 # t"        $ r;}t%        d	|        t-        |
       | j/                          Y d}~yd}~ww xY w# t"        $ r}t%        d	|        Y d}~yd}~ww xY ww)a!  Generate speech from text using Azure's streaming synthesis.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing synthesized speech data.
        : Generating TTS []Nr   error   audior"   num_channelsr  r%   r   )r  )r   r   r   r  r  r   r`   r   r   r   r   r   rp   speak_ssml_asyncstart_tts_usage_metricsr,   r   r   r   rL   r   r"   r   r  )ra   rf   r  rn   chunkr   r   s          r.   run_ttszAzureTTSService.run_tts  s     	v/vQ78
 ##))+((*'') ##))+4	C''/-+5( 6:5R5R225/9<6++D1((99$?224888 "&"3"3"7"7"99E}!%3(s5z::,#$($4$4%&#-	E  K * ##t+11T5[5[[111T5T5TT1 25 9
 :2   )A!'EFF%<<!!#	  	C%=aS#ABBB	Cs   BH2H
 H2A&G <F?="G G BG =H2?G G 	H1H=H
 H2HH
 
	H/H*%H2*H//H2r4   ),rH   rI   rJ   rK   r2   SettingsrL   r   intrO   rZ   boolr   r   r   r   r   r
   r   r	   r   r   r   r   r   floatr   r   r   r   r   r   r   
DOWNSTREAMr   r   r  r  r   r  r   r   r  __classcell__r   s   @r.   rx   rx      s     H  $%)<@/3.2?Cm
 m
 	m

 }m
 c]m
 ,889m
 +,m
 &d^m
  ((;<m
^d &^ &^P) ))+ )L$ L	; 	; 	;!GF( c  e  B)# )% )*]@+$?$ JXIbIb Ye Y Y(0HSM 0#0A #n #J GC# GC3 GC>%QU+;V GC GCr0   rx   c                        e Zd ZdZeZddddddededee   dee   dee	j                     d	ee   f fd
ZdefdZdef fdZedededeedf   fd       Z xZS )AzureHttpTTSServicezAzure Cognitive Services HTTP-based TTS service.

    Provides text-to-speech synthesis using Azure's HTTP API for simpler,
    non-streaming synthesis. Suitable for use cases where streaming is not
    required and simpler integration is preferred.
    N)rj   r"   ry   rz   r[   r\   rj   r"   ry   rz   c                f   t        dddddddddd
      }|t        dt         d       ||_        |t        dt                |s|j                  |_        |j                  r| j                  |j                        nd|_        |j                  |_        |j                  |_        |j                  |_        |j                  |_	        |j                  |_
        |j                  |_        ||j                  |       t        	| 8  d
|dd|d| | j                  ||	       d| _        d| _        y)aN  Initialize the Azure HTTP TTS service.

        Args:
            api_key: Azure Cognitive Services subscription key.
            region: Azure region identifier (e.g., "eastus", "westus2").
            voice: Voice name to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=AzureTTSSettings(voice=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses service default.
            params: Voice and synthesis parameters configuration.

                .. deprecated:: 0.0.105
                    Use ``settings=AzureTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        Nr~   r   r   rj   ry   T)r"   r   r   rz   r   r6   )r2   r   rj   r9   rX   re   r<   r?   rA   rC   rE   rG   r   r   r   rb   r   r`   )
ra   r[   r\   rj   r"   ry   rz   r   r   r   s
            r.   r   zAzureHttpTTSService.__init__  sM   @ ,$
 "7,<gF%*" "8-=>,2OO )  55fooF  !)
 *0 &(. %(. %)/ &060C0C -*0-- ' ))(3 	
#!!%		

 	
 	gf="#' r0   r#   c                      yr   r6   r   s    r.   r   z(AzureHttpTTSService.can_generate_metricsI  r   r0   r   c                   K   t         |   |       d{    | j                  ryt        | j                  | j
                        | _        | j                  j                  | j                  _        | j                  j                  t        | j                               t        | j                  d      | _        y7 w)zStart the Azure HTTP TTS service and initialize speech synthesizer.

        Args:
            frame: Start frame containing initialization parameters.
        Nr   r   )r   r   r   r   r^   r_   rP   rX   r   r   r/   r"   r!   r`   r   s     r.   r   zAzureHttpTTSService.startQ  s      gmE"""*<<
 9=8O8O5>>()9)9:	
 $5--D$
  	#s   C B>B(C rf   r  c                  K   t        j                  |  d| d       | j                  |      }t        j                  | j
                  j                  |       d{   }|j                  t        j                  k(  r[| j                  |       d{    | j                          d{    t        |j                  dd | j                  d|       y|j                  t        j                  k(  ri|j                   }t        j"                  d|j                          |j                  t$        j&                  k(  rt)        d|j*                   	       yyy7 7 7 w)
a(  Generate speech from text using Azure's HTTP synthesis API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the complete synthesized speech.
        r  r  N,   r  r  zSpeech synthesis canceled: r   r  )r   r   rp   r   r  r`   
speak_ssmlr   r   SynthesizingAudioCompletedr  stop_ttfb_metricsr   r   r"   Canceledr   warningr   Errorr   r   )ra   rf   r  rn   r   r   s         r.   r  zAzureHttpTTSService.run_ttsh  sB     	v/vQ78##D)(()A)A)L)LdSS==LCCC..t444((***"'', ,,%	  ]]l333#)#>#> NN89M9T9T8UVW#**.@.F.FF 45I5W5W4XY  G 4 T 5*s7   AE'E 5E'E#E'*E%+B6E'#E'%E')rH   rI   rJ   rK   r2   r  rL   r   r  rO   rZ   r   r  r   r   r   r   r   r   r  r"  r#  s   @r.   r%  r%    s      H  $%)<@/3S( S( 	S(
 }S( c]S( ,889S( +,S(jd 
 
.  #  3  >%QU+;V    r0   r%  )6rK   r   dataclassesr   r   typingr   r   logurur   pydanticr   pipecat.frames.framesr	   r
   r   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.azure.commonr   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer   r   pipecat.transcriptions.languager   (pipecat.utils.tracing.service_decoratorsr   azure.cognitiveservices.speechr   r   r   r   r    r!   ModuleNotFoundErrorr   r  r   r  r/   r2   rO   rx   r%  r6   r0   r.   <module>r>     s    G  ( +  	 	 	 > D _ _ H 4 ?, ^c ^6Q ^* G{ G G,R RjvCj"5 vCr_*&9 _A  ,FLL;qc"#FLLVW
&qc*
++,s   $B/ /C+42C&&C+