
    qi#-                        d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlm Z  	 ddl!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* e G d de             Z/ G d de      Z0y# e+$ r7Z, e	jZ                  de,         e	jZ                  d        e.de,       dZ,[,ww xY w)zAzure Speech-to-Text service implementation for Pipecat.

This module provides speech-to-text functionality using Azure Cognitive Services
Speech SDK for real-time audio transcription.
    N)	dataclass)AnyAsyncGeneratorOptional)logger)CancelFrameEndFrame
ErrorFrameFrameInterimTranscriptionFrame
StartFrameTranscriptionFramelanguage_to_azure_language)STTSettings_warn_deprecated_param)AZURE_TTFS_P99)
STTService)Language)time_now_iso8601)
traced_stt)CancellationReasonResultReasonSpeechConfigSpeechRecognizer)AudioStreamFormatPushAudioInputStream)AudioConfigzException: zCIn order to use Azure, you need to `pip install pipecat-ai[azure]`.zMissing module: c                       e Zd ZdZy)AzureSTTSettingszSettings for AzureSTTService.N)__name__
__module____qualname____doc__     L/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/azure/stt.pyr    r    6   s    'r&   r    c                   t    e Zd ZU dZeZeed<   dej                  dddde	dde
dee
   dee   dee   d	ee
   d
ee
   dee   dee   f fdZdefdZdedee
   fdZdedee
ef   f fdZdedeedf   fdZdef fdZdef fdZdef fdZd Z d Z!e"	 d!de
dedee   fd       Z#d Z$d Z%d  Z& xZ'S )"AzureSTTServicea"  Azure Speech-to-Text service for real-time audio transcription.

    This service uses Azure Cognitive Services Speech SDK to convert speech
    audio into text transcriptions. It supports continuous recognition and
    provides real-time transcription results with timing information.
    	_settingsN)regionlanguagesample_rateprivate_endpointendpoint_idsettingsttfs_p99_latencyapi_keyr+   r,   r-   r.   r/   r0   r1   c                4   t        dt        t        j                              }
|4|t        j                  k7  r!t	        dt         d       t        |      |
_        ||
j                  |       t        |    d	|||
d|	 |
j
                  xs t        t        j                        }|s|st        d      |r+|rt        j                  d       t        |||      | _        nt        |||      | _        |r|| j                  _        d| _        d| _        y)
a  Initialize the Azure STT service.

        Args:
            api_key: Azure Cognitive Services subscription key.
            region: Azure region for the Speech service (e.g., 'eastus').
                Required unless ``private_endpoint`` is provided.
            language: Language for speech recognition. Defaults to English (US).

                .. deprecated:: 0.0.105
                    Use ``settings=AzureSTTSettings(language=...)`` instead.

            sample_rate: Audio sample rate in Hz. If None, uses service default.
            private_endpoint: Private endpoint for STT behind firewall.
                See https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-private-link?tabs=portal
            endpoint_id: Custom model endpoint id.
            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
            **kwargs: Additional arguments passed to parent STTService.
        N)modelr,   r,   )r-   r1   r0   z7Either 'region' or 'private_endpoint' must be provided.zHBoth 'region' and 'private_endpoint' provided; 'region' will be ignored.)subscriptionendpointspeech_recognition_language)r5   r+   r7   r%   )r    r   r   EN_USr   r,   apply_updatesuper__init__
ValueErrorr   warningr   _speech_configr/   _audio_stream_speech_recognizer)selfr2   r+   r,   r-   r.   r/   r0   r1   kwargsdefault_settingsrecognition_language	__class__s               r'   r;   zAzureSTTService.__init__H   s'   F ,/?
 H$>":/?L(B8(L%
 ))(3 	
#-%	
 		
  088  
<VNN=
 .VWW^ #/$),@#D #/$,@#D .9D+!"&r&   returnc                      y)zCheck if this service can generate performance metrics.

        Returns:
            True as this service supports metrics generation.
        Tr%   rA   s    r'   can_generate_metricsz$AzureSTTService.can_generate_metrics   s     r&   c                     t        |      S )zConvert a Language enum to Azure service-specific language code.

        Args:
            language: The language to convert.

        Returns:
            The Azure-specific language identifier, or None if not supported.
        r   )rA   r,   s     r'   language_to_service_languagez,AzureSTTService.language_to_service_language   s     *(33r&   deltac                 J  K   t         |   |       d{   }d|v r|| j                  j                  xs t	        t
        j                        | j                  _        | j                  r0| j                          d{    | j                          d{    |S 7 7  7 
w)z9Apply a settings delta and reconnect if language changed.Nr,   )r:   _update_settingsr*   r,   r   r   r8   r>   r7   r?   _disconnect_connect)rA   rL   changedrE   s      r'   rN   z AzureSTTService._update_settings   s     077 ''U+Ehnn+U ; !!&&(((mmo%% 8 )%s4   B#BA'B#>B?B#B!B#B#!B#audioc                   K   	 | j                          d{    | j                  r| j                  j                  |       d y7 1# t        $ r}t	        d|        Y d}~yd}~ww xY ww)ao  Process audio data for speech-to-text conversion.

        Feeds audio data to the Azure speech recognizer for processing.
        Recognition results are handled asynchronously through callbacks.

        Args:
            audio: Raw audio bytes to process.

        Yields:
            Frame: Either None for successful processing or ErrorFrame on failure.
        NzUnknown error occurred: )error)start_processing_metricsr?   write	Exceptionr
   )rA   rR   es      r'   run_sttzAzureSTTService.run_stt   sn     	C//111!!""((/J 2  	C%=aS#ABBB	Cs>   A3A A	0A A3	A 	A0A+&A3+A00A3framec                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zzStart the speech recognition service.

        Args:
            frame: Frame indicating the start of processing.
        N)r:   startrP   rA   rZ   rE   s     r'   r\   zAzureSTTService.start   s3      gmE"""mmo 	#   848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zwStop the speech recognition service.

        Args:
            frame: Frame indicating the end of processing.
        N)r:   stoprO   r]   s     r'   r`   zAzureSTTService.stop   s6      gl5!!!    	" r^   c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zpCancel the speech recognition service.

        Args:
            frame: Frame indicating cancellation.
        N)r:   cancelrO   r]   s     r'   rb   zAzureSTTService.cancel   s6      gnU###    	$ r^   c                   K   | j                   ry	 t        | j                  d      }t        |      | _         t	        | j                         }t        | j                  |      | _        | j                  j                  j                  | j                         | j                  j                  j                  | j                         | j                  j                  j                  | j                         | j                  j                          y# t         $ r)}| j#                  d| |       d{  7   Y d}~yd}~ww xY ww)zHInitialize the Azure speech recognizer and begin continuous recognition.N   )samples_per_secondchannels)stream)speech_configaudio_configz*Uncaught exception during initialization: )	error_msg	exception)r?   r   r-   r   r   r   r>   r@   recognizingconnect_on_handle_recognizing
recognized_on_handle_recognizedcanceled_on_handle_canceled"start_continuous_recognition_asyncrW   
push_error)rA   stream_formatri   rX   s       r'   rP   zAzureSTTService._connect   s
    	-AQAQ\]^M!5m!DD&d.@.@AL&6"11'D# ##//778S8ST##..66t7Q7QR##,,44T5M5MN##FFH 	//FqcJVW "   	s;   ED D E	ED?4D75D?:E?EEc                    K   | j                   r!| j                   j                          d| _         | j                  r"| j                  j                          d| _        yyw)z)Stop recognition and close audio streams.N)r@   !stop_continuous_recognition_asyncr?   closerH   s    r'   rO   zAzureSTTService._disconnect  sQ     ""##EEG&*D#$$&!%D s   AA
transcriptis_finalc                 @   K   | j                          d{    y7 w)z+Handle a transcription result with tracing.N)stop_processing_metrics)rA   ry   rz   r,   s       r'   _handle_transcriptionz%AzureSTTService._handle_transcription  s     
 **,,,s   c                 ^   |j                   j                  t        j                  k(  rt	        |j                   j
                        dkD  rt        |j                   dd       xs | j                  j                  }t        |j                   j
                  | j                  t               ||      }t        j                  | j                  |j                   j
                  d|      | j                                t        j                  | j!                  |      | j                                y y y )Nr   r,   resultT)r   reasonr   RecognizedSpeechlentextgetattrr*   r,   r   _user_idr   asynciorun_coroutine_threadsafer}   get_event_loop
push_framerA   eventr,   rZ   s       r'   rp   z%AzureSTTService._on_handle_recognized  s    <<,"?"??CHYHYDZ]^D^u||Z>Y$..BYBYH&!! "E ,,**5<<+<+<dHMtObObOd ,,T__U-CTEXEXEZ[ E_?r&   c                    |j                   j                  t        j                  k(  rt	        |j                   j
                        dkD  rt        |j                   dd       xs | j                  j                  }t        |j                   j
                  | j                  t               ||      }t        j                  | j                  |      | j                                y y y )Nr   r,   r   )r   r   r   RecognizingSpeechr   r   r   r*   r,   r   r   r   r   r   r   r   r   s       r'   rn   z&AzureSTTService._on_handle_recognizing'  s    <<,"@"@@SIZIZE[^_E_u||Z>Y$..BYBYH-!! "E ,,T__U-CTEXEXEZ[ F`@r&   c                 .   |j                   j                  }|j                  t        j                  k(  rbd|j                   }|j
                  r|d|j
                   z  }t        j                  | j                  |      | j                                y y )Nz Azure STT recognition canceled: z - )rj   )
r   cancellation_detailsr   r   Errorerror_detailsr   r   rt   r   )rA   r   detailsrj   s       r'   rr   z#AzureSTTService._on_handle_canceled3  s    ,,33>>/555:7>>:JKI$$s7#8#8"9::	,,)4d6I6I6K	 6r&   )N)(r!   r"   r#   r$   r    Settings__annotations__r   r8   r   strr   intfloatr;   boolrI   rK   r   dictr   rN   bytesr   r   rY   r   r\   r	   r`   r   rb   rP   rO   r   r}   rp   rn   rr   __classcell__)rE   s   @r'   r)   r)   =   s~     H !%'/~~%)*.%)/3,:V' V' 	V'
 8$V' c]V' #3-V' c]V' +,V' #5/V'pd 	4X 	4(3- 	4K DcN C5 C^E4K-H C( ! !!+ !.& NR--)--9A(9K- -\
\r&   r)   )1r$   r   dataclassesr   typingr   r   r   logurur   pipecat.frames.framesr   r	   r
   r   r   r   r   pipecat.services.azure.commonr   pipecat.services.settingsr   r   pipecat.services.stt_latencyr   pipecat.services.stt_servicer   pipecat.transcriptions.languager   pipecat.utils.timer   (pipecat.utils.tracing.service_decoratorsr   azure.cognitiveservices.speechr   r   r   r   $azure.cognitiveservices.speech.audior   r   %azure.cognitiveservices.speech.dialogr   ModuleNotFoundErrorrX   rT   rW   r    r)   r%   r&   r'   <module>r      s     ! 0 0    E I 7 3 4 / ?,  B 	{ 	 	~j ~  ,FLL;qc"#FLLVW
&qc*
++,s   B C2C		C