
    qia                        d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddl m!Z!m"Z" ddl#m$Z$ 	 ddl%Z%ddl&m'Z( ddl)m*Z* de!de	e/   fdZ0e G d de             Z1 G d de      Z2 G d de      Z3y# e+$ r7Z, ejZ                  de,         ejZ                  d        e.de,       dZ,[,ww xY w)z-Async text-to-speech service implementations.    N)	dataclass)AnyAsyncGeneratorOptional)logger)	BaseModel)CancelFrameEndFrame
ErrorFrameFrame
StartFrameTTSAudioRawFrameTTSStoppedFrame)FrameDirection)TTSSettings_warn_deprecated_param)TextAggregationMode
TTSServiceWebsocketTTSService)Languageresolve_language)
traced_tts)connect)StatezException: zEIn order to use Async, you need to `pip install pipecat-ai[asyncai]`.zMissing module: languagereturnc                 B   i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   d}t#        | |d      S )zConvert a Language enum to Async language code.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding Async language code, or None if not supported.
    enfresdeitptnlarrurojahehytrhizhT)use_base_code)r   ENFRESDEITPTNLARRUROJAHEHYTRHIZHr   )r   LANGUAGE_MAPs     N/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/asyncai/tts.pylanguage_to_async_languagerA   ,   s   TT 	T 	T	
 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T 	T  	T!L& Hl$GG    c                       e Zd ZdZy)AsyncAITTSSettingsz9Settings for AsyncAITTSService and AsyncAIHttpTTSService.N)__name__
__module____qualname____doc__ rB   r@   rD   rD   K   s    CrB   rD   c                       e Zd ZU dZeZeed<    G d de      Zddddddd	ddddd
de	de
e	   de	de	de
e	   de
e   de	de	de
e   de
e   de
e   de
e   f fdZdedee	ef   f fdZdefdZdede
e	   fdZd4de	de	d ede	fd!Zd"ef fd#Zd"ef fd$Zd"ef fd%Z fd&Z fd'Zd( Zd) Z d* Z!d5de
e	   fd+Z"e#jH                  fd"e%d,e#f fd-Z&d. Z'd/ Z(de	fd0Z)de	fd1Z*de	fd2Z+e,de	de	de-e%df   fd3       Z. xZ/S )6AsyncAITTSServiceztAsync TTS service with WebSocket streaming.

    Provides text-to-speech using Async's streaming WebSocket API.
    	_settingsc                   &    e Zd ZU dZdZee   ed<   y)AsyncAITTSService.InputParamszInput parameters for Async TTS configuration.

        .. deprecated:: 0.0.105
            Use ``AsyncAITTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language to use for synthesis.
        Nr   rE   rF   rG   rH   r   r   r   __annotations__rI   rB   r@   InputParamsrN   [       	 (,(8$+rB   rQ   Nv1z/wss://api.async.com/text_to_speech/websocket/ws	pcm_s16leraw)voice_idversionurlmodelsample_rateencoding	containerparamssettingsaggregate_sentencestext_aggregation_modeapi_keyrV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   c                   t        ddd      }|t        dt         d       ||_        |t        dt         d       ||_        |	@t        dt                |
s.|	j                  r| j                  |	j                        nd|_        |
|j                  |
       t        |    d||d|dd|d	| || _	        || _
        || _        || _        || _        d
| _        d| _        d| _        y)a  Initialize the Async TTS service.

        Args:
            api_key: Async API key.
            voice_id: UUID of the voice to use for synthesis. See docs for a full list:
                https://docs.async.com/list-voices-16699698e0

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(voice=...)`` instead.

            version: Async API version.
            url: WebSocket URL for Async TTS API.
            model: TTS model to use (e.g., "async_flash_v1.0").

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(model=...)`` instead.

            sample_rate: Audio sample rate.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            aggregate_sentences: Deprecated. Use text_aggregation_mode instead.

                .. deprecated:: 0.0.104
                    Use ``text_aggregation_mode`` instead.

            text_aggregation_mode: How to aggregate text before synthesis.
            **kwargs: Additional arguments passed to the parent service.
        async_flash_v1.0NrY   voicer   rV   re   rY   r]   T)r_   r`   pause_frame_processingrZ   push_start_framepush_stop_framesr^   r   rI   )rD   r   re   rY   r   language_to_service_languageapply_updatesuper__init___api_key_api_version_url_output_container_output_encoding_output_sample_rate_receive_task_keepalive_task)selfra   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   kwargsdefault_settings	__class__s                  r@   rl   zAsyncAITTSService.__init__g   s   j .$
 ":/A7K%-""7,>H%*" "8-?@JP//D55fooF_c !)
 ))(3 		
 3"7#'#!!%		
 		
  #	 "+ (#$ !#rB   deltar   c                 l   K   t         |   |       d{   }|s|S | j                  |       |S 7 w)zgApply a settings delta.

        Settings are stored but not applied to the active connection.
        N)rk   _update_settings _warn_unhandled_updated_settings)ru   ry   changedrx   s      r@   r{   z"AsyncAITTSService._update_settings   s;     
 077N--g6 8s   424c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Async service supports metrics generation.
        TrI   ru   s    r@   can_generate_metricsz&AsyncAITTSService.can_generate_metrics        rB   r   c                     t        |      S zConvert a Language enum to Async language format.

        Args:
            language: The language to convert.

        Returns:
            The Async-specific language code, or None if not supported.
        rA   ru   r   s     r@   ri   z.AsyncAITTSService.language_to_service_language        *(33rB   text
context_idforcec                 8    |||d}t        j                  |      S )N)
transcriptr   r   )jsondumps)ru   r   r   r   msgs        r@   
_build_msgzAsyncAITTSService._build_msg   s    !eLzz#rB   framec                    K   t         |   |       d{    | j                  | _        | j	                          d{    y7 .7 w)z}Start the Async TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)rk   startrZ   rr   _connectru   r   rx   s     r@   r   zAsyncAITTSService.start   sA      gmE"""#'#3#3 mmo 	#s    A	A(A	A A	A	c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zUStop the Async TTS service.

        Args:
            frame: The end frame.
        N)rk   stop_disconnectr   s     r@   r   zAsyncAITTSService.stop   s6      gl5!!!    	"    848688c                 t   K   t         |   |       d{    | j                          d{    y7 7 w)zZCancel the Async TTS service.

        Args:
            frame: The cancel frame.
        N)rk   cancelr   r   s     r@   r   zAsyncAITTSService.cancel  s6      gnU###    	$ r   c                 |  K   t         |           d {    | j                          d {    | j                  r;| j                  s/| j                  | j                  | j                              | _        | j                  r2| j                  s%| j                  | j                               | _        y y y 7 7 wN)
rk   r   _connect_websocket
_websocketrs   create_task_receive_task_handler_report_errorrt   _keepalive_task_handlerru   rx   s    r@   r   zAsyncAITTSService._connect  s     g   %%'''??4#5#5!%!1!1$2L2LTM_M_2`!aD??4#7#7#'#3#3D4P4P4R#SD  $8? 	!'s    B<B8B<B:BB<:B<c                 R  K   t         |           d {    | j                  r*| j                  | j                         d {    d | _        | j                  r*| j                  | j                         d {    d | _        | j                          d {    y 7 7 \7 (7 wr   )rk   r   rs   cancel_taskrt   _disconnect_websocketr   s    r@   r   zAsyncAITTSService._disconnect  s     g!###""4#5#5666!%D""4#7#7888#'D ((*** 	$ 7 9 	+sE   B'B.B'B!5B':B#;B'B%B'!B'#B'%B'c                 L  K   	 | j                   r'| j                   j                  t        j                  u ry t	        j
                  d       t        | j                   d| j                   d| j                          d {   | _         | j                  j                  d| j                  j                  d| j                  | j                  | j                  d| j                  j                   d}| j#                         j%                  t'        j(                  |             d {    | j+                  d       d {    y 7 7  7 	# t,        $ rL}| j/                  d	| |
       d {  7   d | _         | j+                  d|        d {  7   Y d }~y d }~ww xY ww)NzConnecting to Asyncz	?api_key=z	&version=idmoder   r\   r[   rZ   )model_idre   output_formatr   on_connectedUnknown error occurred: 	error_msg	exceptionon_connection_error)r   stater   OPENr   debugwebsocket_connectro   rm   rn   rL   rY   re   rp   rq   rr   r   _get_websocketsendr   r   _call_event_handler	Exception
push_error)ru   init_msges      r@   r   z$AsyncAITTSService._connect_websocket&  s_    	J4??#8#8EJJ#FLL./$599+Yt}}oYt?P?P>QR% DO !NN00"&dnn.B.BC!%!7!7 $ 5 5#'#;#;"
 !NN33	H %%',,TZZ-ABBB**>:::! C: 	J//.Fqc,JVW/XXX"DO**+@QCIII	Js   F$2E F$A	E  EB&E 'E(E  E
E F$E E 
E 	F!F.E1/"FFFF$F!!F$c                   K   	 | j                          d {    | j                  rt        j                  d       | j	                         r8| j                  j                  t        j                  ddi             d {    | j                  j                          d {    t        j                  d       d | _        | j                          d {    | j                  d       d {    y 7 7 v7 V# t        $ r)}| j                  d| |       d {  7   Y d }~ld }~ww xY w7 Y7 B# d | _        | j                          d {  7   | j                  d       d {  7   w xY ww)NzDisconnecting from Async	terminateTzDisconnected from Asyncr   r   on_disconnected)stop_all_metricsr   r   r   has_active_audio_contextr   r   r   closer   r   remove_active_audio_contextr   )ru   r   s     r@   r   z'AsyncAITTSService._disconnect_websocketA  s1    	>'')))78002//..tzz;:M/NOOOoo++---67 #DO22444**+<=== * P- 	Y//.Fqc,JVW/XXX	Y 5= #DO22444**+<===s   E3C; C5A(C;  C7!C; "C9#C; <E3D0E3/D20E35C; 7C; 9C; ;	D-D(D D(#D4 (D--D4 0E32E34E0EE0)E,*E00E3c                 H    | j                   r| j                   S t        d      )NzWebsocket not connected)r   r   r   s    r@   r   z AsyncAITTSService._get_websocketS  s    ????"122rB   c                    K   |xs | j                         }|r| j                  syt        j                  |  d       | j	                  d|d      }| j                  j                  |       d{    y7 w)zFlush any pending audio.

        Args:
            context_id: The specific context to flush. If None, falls back to the
                currently active context.
        Nz: flushing audio T)r   r   r   )get_active_audio_context_idr   r   tracer   r   )ru   r   flush_idr   s       r@   flush_audiozAsyncAITTSService.flush_audioX  sg      C!A!A!Ctv-./oo384oHoo""3'''s   A.A80A61A8	directionc                 B   K   t         |   ||       d{    y7 w)zPush a frame downstream with special handling for stop conditions.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        N)rk   
push_frame)ru   r   r   rx   s      r@   r   zAsyncAITTSService.push_framef  s      g 	222s   c                   K   | j                         2 3 d {   }t        j                  |      }|s!|j                  d      }|j                  d      du rt	        j
                  d|        ^| j                  |      s^| j                         |k(  r2t	        j                  d|        | j                  |       d {    nt	        j                  d|        |j                  d      s| j                          d {    t        j                  |d         }t        || j                  d|	      }| j                  ||       d {    D7 @7 7 V7 6 y w)
Nr   finalTz#Received final message for context z4Received a delayed message, recreating the context: z+Ignoring message from unavailable context: audio   r   )r   r   loadsgetr   r   audio_context_availabler   r   create_audio_contextstop_ttfb_metricsbase64	b64decoder   rZ   append_to_audio_context)ru   messager   received_ctx_idr   r   s         r@   _receive_messagesz#AsyncAITTSService._receive_messageso  sT    !002 	K 	K'**W%C!ggl3O www4'B?BSTU //@335HLLNN_` 33ODDD
 LL#NN_!`awww,,...((W6(0@0@!P_`22?EJJJ?	K& E / K? 3sc   E"E EE B&E"E/E"2E"EA
E"EE"E E"E"E" E"c                 &  K   d}	 t        j                  |       d{    	 | j                  r| j                  j                  t        j
                  u r|| j                         }|rd|d}t        j                  d       nddi}t        j                  d       | j                  j                  t        j                  |             d{    7 7 # t        j                  $ r$}t        j                  |  d|        Y d}~yd}~ww xY ww)	zBSend periodic keepalive messages to maintain WebSocket connection.
   Nr   )r   r   zSending keepalive messager   z!Sending keepalive without contextz keepalive error: )asynciosleepr   r   r   r   r   r   r   r   r   r   
websocketsConnectionClosedwarning)ru   KEEPALIVE_SLEEPr   keepalive_messager   s        r@   r   z)AsyncAITTSService._keepalive_task_handler  s     --000??t'<'<

'J!%!A!A!CJ!*-*4-) %@A
 .:3,?)%HI//..tzz:K/LMMM# 0  N.. $'9!=>sK   DCDB)C CC DC D*D	D	DDc                   K   |rI| j                   r<	 | j                   j                  t        j                  |ddd             d {    y y y 7 # t        $ r'}t        j                  |  d| d|        Y d }~y d }~ww xY ww)NT )r   close_contextr   z: Error closing context : )r   r   r   r   r   r   error)ru   r   r   s      r@   _close_contextz AsyncAITTSService._close_context  s      $//Qoo**JJj4_abc   *:  Qv%=j\A3OPPQs@   B5A AA BA 	BA<7B<BBc                 B   K   | j                  |       d{    y7 w)z7Close the Async AI context when the bot is interrupted.Nr   ru   r   s     r@   on_audio_context_interruptedz.AsyncAITTSService.on_audio_context_interrupted  s     !!*---   c                 B   K   | j                  |       d{    y7 w)a  Close the Async AI context after all audio has been played.

        Async AI does not send a server-side signal when a context is
        exhausted, so Pipecat must explicitly close it with
        ``close_context: True`` to free server-side resources.
        Nr   r   s     r@   on_audio_context_completedz,AsyncAITTSService.on_audio_context_completed  s      !!*---r   c                V  K   t        j                  |  d| d       	 | j                  r&| j                  j                  t        j
                  u r| j                          d{    	 | j                  |d|      }| j                         j                  |       d{    | j                  |       d{    d y7 _7 %7 # t        $ r+}t        d|        t        |       Y d}~yd}~ww xY w# t        $ r}t        d|        Y d}~yd}~ww xY ww)	a!  Generate speech from text using Async API websocket endpoint.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        : Generating TTS []NT)r   r   r   r   )r   r   )r   r   r   r   r   CLOSEDr   r   r   r   start_tts_usage_metricsr   r   r   )ru   r   r   r   r   s        r@   run_ttszAsyncAITTSService.run_tts  s     	v/vQ78	C??doo&;&;u||&Kmmo%%oo4t
oS))+00555224888 J & 68  )A!'EFF%<<
  	C%=aS#ABBB	Cs   D)AD $C%D *6C
  C!C
 9C:C
 >D D)D C
 C
 
	C>!C94D 8D)9C>>D 	D&
D!D)!D&&D))r   r   Fr   )0rE   rF   rG   rH   rD   SettingsrP   r   rQ   strr   intboolr   rl   r   dictr   r{   r   r   ri   r   r   r   r
   r   r	   r   r   r   r   r   r   r   r   
DOWNSTREAMr   r   r   r   r   r   r   r   r   r   __classcell__rx   s   @r@   rK   rK   R   s)   
 "H!!
,i 
,  #'D#%)#(,15.2?Cd$ d$ 3-	d$
 d$ d$ }d$ c]d$ d$ d$ %d$ -.d$ &d^d$  ((;<d$LK DcN d 	4X 	4(3- 	4s S d WZ  ! !!+ !	T+J6>$3
(HSM ( JXIbIb 3e 3 3 KD2	Qs 	Q.S ..3 . C# C3 C>%QU+;V C CrB   rK   c                       e Zd ZU dZeZeed<    G d de      Zddddddd	ddd
	de	de
e	   dej                  de
e	   de	de	de
e   de	de	de
e   de
e   f fdZdefdZdede
e	   fdZdef fdZede	de	deedf   fd       Z xZS ) AsyncAIHttpTTSServicezHTTP-based Async TTS service.

    Provides text-to-speech using Async's HTTP streaming API for simpler,
    non-WebSocket integration. Suitable for use cases where streaming WebSocket
    connection is not required or desired.
    rL   c                   &    e Zd ZU dZdZee   ed<   y)!AsyncAIHttpTTSService.InputParamszInput parameters for Async API.

        .. deprecated:: 0.0.105
            Use ``AsyncAITTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language to use for synthesis.
        Nr   rO   rI   rB   r@   rQ   r    rR   rB   rQ   Nzhttps://api.async.comrS   rT   rU   )	rV   rY   rX   rW   rZ   r[   r\   r]   r^   ra   rV   aiohttp_sessionrY   rX   rW   rZ   r[   r\   r]   r^   c                   t        ddd      }|t        dt         d       ||_        |t        dt         d       ||_        |
@t        dt                |s.|
j                  r| j                  |
j                        nd|_        ||j                  |       t        |    d|dd|d	| || _	        || _
        || _        |	| _        || _        d
| _        || _        y)a  Initialize the Async TTS service.

        Args:
            api_key: Async API key.
            voice_id: ID of the voice to use for synthesis.

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(voice=...)`` instead.

            aiohttp_session: An aiohttp session for making HTTP requests.
            model: TTS model to use (e.g., "async_flash_v1.0").

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(model=...)`` instead.

            url: Base URL for Async API.
            version: API version string for Async API.
            sample_rate: Audio sample rate.
            encoding: Audio encoding format.
            container: Audio container format.
            params: Additional input parameters for voice customization.

                .. deprecated:: 0.0.105
                    Use ``settings=AsyncAITTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to the parent TTSService.
        rc   Nrd   rV   re   rY   r]   T)rZ   rg   rh   r^   r   rI   )rD   r   re   rY   r   ri   rj   rk   rl   rm   	_base_urlrn   rp   rq   rr   _session)ru   ra   rV   r  rY   rX   rW   rZ   r[   r\   r]   r^   rv   rw   rx   s                 r@   rl   zAsyncAIHttpTTSService.__init__  s   \ .$
 ":/A7K%-""7,>H%*" "8-?@JP//D55fooF_c !)
 ))(3 	
#!!%		

 	
  # "+ (#$ 'rB   r   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as Async HTTP service supports metrics generation.
        TrI   r   s    r@   r   z*AsyncAIHttpTTSService.can_generate_metricsS  r   rB   r   c                     t        |      S r   r   r   s     r@   ri   z2AsyncAIHttpTTSService.language_to_service_language[  r   rB   r   c                 b   K   t         |   |       d{    | j                  | _        y7 w)zStart the Async HTTP TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        N)rk   r   rZ   rr   r   s     r@   r   zAsyncAIHttpTTSService.startf  s.      gmE"""#'#3#3  	#s   /-/r   r   c                  K   t        j                  |  d| d       	 d| j                  j                  d}| j                  j                  ||| j
                  | j                  | j                  d| j                  j                  d}| j                  | j                  dd}| j                   d	}| j                  j                  |||
      4 d{   }|j                  dk7  rP|j                          d{   }| j!                  d|        d{    t#        d|j                   d|       t%               }	|j&                  j)                  d      2 3 d{   }
|
s| j+                          d{    |	j-                  |
       67 7 7 ~7 77 6 t/        |	      }ddd      d{  7   n# 1 d{  7  sw Y   nxY w| j1                  |       d{  7   t3        | j4                  d|      }| n5# t"        $ r)}| j!                  d| |       d{  7   Y d}~nd}~ww xY w| j+                          d{  7   y# | j+                          d{  7   w xY ww)a  Generate speech from text using Async's HTTP streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        r   r   r   r   r   )r   r   re   r   r   zapplication/json)rW   z	x-api-keyzContent-Typez/text_to_speech/streaming)r   headersN   zAsync API error: )r   zAsync API returned status r   i   r   )r   rZ   num_channelsr   r   r   )r   r   rL   re   rY   rp   rq   rr   r   rn   rm   r
  r  poststatusr   r   r   	bytearraycontentiter_chunkedr   extendbytesr   r   rZ   )ru   r   r   voice_configpayloadr  rX   response
error_textbufferchunk
audio_datar   r   s                 r@   r   zAsyncAIHttpTTSService.run_ttso  sU     	v/vQ783	+$(0D0DEL !NN00"%!%!7!7 $ 5 5#'#;#;"
 !NN33
G  ,,!]] 2G
 ^^$$=>C}}))#GW)M + +QY??c)'/}}!6J//6G
|4T/UUU#&@@QQST^S_$`aa ##+#3#3#@#@#K ) )%  00222MM%(+!6U
) 3 $L
 #6]
+ + + + + ..t444$  ,,%	E K 	Y//.Fqc,JVW/XXX	Y ((***$((***s  I7B6H
 FH
 #F<<F=F<FAF<F"F#F&F<=F>F<H
 F<F<FF<F<*H
 5F86H
 <GGG
H
 %G(&#H
 	I 
	H<H7,H/-H72I 7H<<I ?I7II7I4-I0.I44I7)rE   rF   rG   rH   rD   r   rP   r   rQ   r   r   aiohttpClientSessionr   rl   r   r   r   ri   r   r   r   r   r   r   r  r  s   @r@   r  r    s=    "H!!
,i 
,  #'#*%)#(,15Y( Y( 3-	Y(
 !..Y( }Y( Y( Y( c]Y( Y( Y( %Y( -.Y(vd 	4X 	4(3- 	44 4 ?+# ?+3 ?+>%QU+;V ?+ ?+rB   r  )4rH   r   r   r   dataclassesr   typingr   r   r   r!  logurur   pydanticr   pipecat.frames.framesr	   r
   r   r   r   r   r   "pipecat.processors.frame_processorr   pipecat.services.settingsr   r   pipecat.services.tts_servicer   r   r   pipecat.transcriptions.languager   r   (pipecat.utils.tracing.service_decoratorsr   r   websockets.asyncio.clientr   r   websockets.protocolr   ModuleNotFoundErrorr   r   r   r   rA   rD   rK   r  rI   rB   r@   <module>r0     s    4    ! 0 0      > I ] ] F ?,F)H Hhsm H> 	 	 	LC+ LC^N+J N+w  ,FLL;qc"#FLLXY
&qc*
++,s   (B* *C&/2C!!C&