
    qiK                         d Z ddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlm Z  dede	e!   fdZ"e G d de             Z# G d de      Z$y)zMiniMax text-to-speech service implementation.

This module provides integration with MiniMax's T2A (Text-to-Audio) API
for streaming text-to-speech synthesis.
    N)	dataclassfield)AnyAsyncGeneratorMappingOptionalSelf)logger)	BaseModel)
ErrorFrameFrame
StartFrameTTSAudioRawFrame)	NOT_GIVENTTSSettings	_NotGiven_warn_deprecated_param)
TTSService)Languageresolve_language)
traced_ttslanguagereturnc                 n   i t         j                  dt         j                  dt         j                  dt         j                  dt         j
                  dt         j                  dt         j                  dt         j                  dt         j                  d	t         j                  d
t         j                  dt         j                  dt         j                  dt         j                  dt         j                  dt         j                   dt         j"                  di t         j$                  dt         j&                  dt         j(                  dt         j*                  dt         j,                  dt         j.                  dt         j0                  dt         j2                  dt         j4                  dt         j6                  dt         j8                  dt         j:                  dt         j<                  dt         j>                  dt         j@                  d t         jB                  d!t         jD                  d"t         jF                  d#t         jH                  d$t         jJ                  d%t         jL                  d&t         jN                  d't         jP                  d(i}tS        | |d)*      S )+zConvert a Language enum to MiniMax language format.

    Args:
        language: The Language enum value to convert.

    Returns:
        The corresponding MiniMax language name, or None if not supported.
    	AfrikaansArabic	BulgarianCatalanCzechDanishGermanGreekEnglishSpanishPersianFinnishFilipinoFrenchHebrewHindiCroatian	Hungarian
IndonesianItalianJapaneseKoreanMalay	NorwegianNynorskDutchPolish
PortugueseRomanianRussianSlovak	SlovenianSwedishTamilThaiTurkish	Ukrainian
VietnamesezChinese,YueChineseF)use_base_code)*r   AFARBGCACSDADEELENESFAFIFILFRHEHIHRHUIDITJAKOMSNBNNNLPLPTRORUSKSLSVTATHTRUKVIYUEZHr   )r   LANGUAGE_MAPs     N/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/minimax/tts.pylanguage_to_minimax_languagerm   !   s|   )[)X) 	[) 	Y	)
 	W) 	X) 	X) 	W) 	Y) 	Y) 	Y) 	Y) 	j) 	X) 	X)  	W!)" 	Z#)$ 	[%)& 	\')( 	Y))* 	Z+), 	X-). 	W/)0 	[1)2 	Y3)4 	W5)6 	X7)8 	\9): 	Z;)< 	Y=)> 	X?)@ 	[A)B 	YC)D 	WE)F 	VY[\mYQ)LV Hl%HH    c                   h    e Zd ZU dZ ed       Zedz  ez  ed<    ed       Z	edz  ez  ed<    ed       Z
edz  ez  ed	<    ed
       Zedz  ez  ed<    ed       Zedz  ez  ed<    ed       Zedz  ez  ed<    ed       Zedz  ez  ed<   edeeef   def fd       Z xZS )MiniMaxTTSSettingsa  Settings for MiniMaxHttpTTSService.

    Parameters:
        speed: Speech speed (range: 0.5 to 2.0).
        volume: Speech volume (range: 0 to 10).
        pitch: Pitch adjustment (range: -12 to 12).
        emotion: Emotional tone (options: "happy", "sad", "angry", "fearful",
            "disgusted", "surprised", "calm", "fluent").
        text_normalization: Enable text normalization (Chinese/English).
        latex_read: Enable LaTeX formula reading.
        language_boost: Language boost string for multilingual support.
    c                      t         S Nr    rn   rl   <lambda>zMiniMaxTTSSettings.<lambda>g       I rn   )default_factoryNspeedc                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>h   s    Y rn   volumec                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>i   s    ) rn   pitchc                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>j   rv   rn   emotionc                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>k   s    PY rn   text_normalizationc                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>l   s    	 rn   
latex_readc                      t         S rr   rs   rt   rn   rl   ru   zMiniMaxTTSSettings.<lambda>m   s    9 rn   language_boostsettingsr   c                    t        |      }|j                  dd      }t        |t               r|j                  d|j	                  d             |j                  d|j	                  d             |j                  d|j	                  d             |j                  d|j	                  d             |j                  d|j	                  d             |j                  d	|j	                  d	             t
        |   |      S )
u   Construct settings from a plain dict, destructuring legacy nested dicts.

        Handles ``voice_setting`` (with ``vol`` → ``volume`` rename) and
        ``audio_setting`` (with prefixed field mapping).
        voice_settingNrx   rz   volr|   r~   r   r   )dictpop
isinstance
setdefaultgetsuperfrom_mapping)clsr   flatvoice	__class__s       rl   r   zMiniMaxTTSSettings.from_mappingo   s     H~$/eT"OOGUYYw%78OOHeii&67OOGUYYw%78OOIuyy';<OO0%))<P2QROOL%))L*ABw#D))rn   )__name__
__module____qualname____doc__r   rx   floatr   __annotations__rz   r|   intr~   strr   boolr   r   classmethodr   r   r	   r   __classcell__r   s   @rl   rp   rp   X   s     ',<M&NE54<)#N',=N'OFEDL9$O$):K$LE3:	!L&+<M&NGS4Z)#N27HY2Zti/Z*/@Q*RJti'R-2CT-UNC$J*U*GCH$5 *$ * *rn   rp   c                       e Zd ZU dZeZeed<    G d de      Zddddddddd	e	d
e	de	de
e	   de
e	   dej                  de
e   dede
e   de
e   f fdZdefdZdede
e	   fdZdef fdZede	de	deedf   fd       Z xZS )MiniMaxHttpTTSServicea  Text-to-speech service using MiniMax's T2A (Text-to-Audio) API.

    Provides streaming text-to-speech synthesis using MiniMax's HTTP API
    with support for various voice settings, emotions, and audio configurations.
    Supports real-time audio streaming with configurable voice parameters.

    Platform documentation:
    https://www.minimax.io/platform/document/T2A%20V2?key=66719005a427f0c8a5701643
    	_settingsc                       e Zd ZU dZej
                  Zee   ed<   dZ	ee
   ed<   dZee
   ed<   dZee   ed<   dZee   ed	<   dZee   ed
<   dZee   ed<   dZee   ed<   dZee   ed<   y)!MiniMaxHttpTTSService.InputParamsa  Configuration parameters for MiniMax TTS.

        .. deprecated:: 0.0.105
            Use ``MiniMaxTTSSettings`` directly via the ``settings`` parameter instead.

        Parameters:
            language: Language for TTS generation. Supports 40 languages.
                Note: Filipino, Tamil, and Persian require speech-2.6-* models.
            speed: Speech speed (range: 0.5 to 2.0).
            volume: Speech volume (range: 0 to 10).
            pitch: Pitch adjustment (range: -12 to 12).
            emotion: Emotional tone (options: "happy", "sad", "angry", "fearful",
                "disgusted", "surprised", "calm", "fluent").
            english_normalization: Deprecated; use `text_normalization` instead

                .. deprecated:: 0.0.96
                    The `english_normalization` parameter is deprecated and will be removed in a future version.
                    Use the `text_normalization` parameter instead.

            text_normalization: Enable text normalization (Chinese/English).
            latex_read: Enable LaTeX formula reading.
            exclude_aggregated_audio: Whether to exclude aggregated audio in final chunk.
        r         ?rx   rz   r   r|   Nr~   english_normalizationr   r   exclude_aggregated_audio)r   r   r   r   r   rK   r   r   r   rx   r   rz   r|   r   r~   r   r   r   r   r   r   rt   rn   rl   InputParamsr      s    	0 (0{{(8$2!$x$"%% x} !%#%04x~4-1HTN1%)
HTN)37 (4.7rn   r   z https://api.minimax.io/v1/t2a_v2NT)base_urlmodelvoice_idsample_ratestreamparamsr   api_keyr   group_idr   r   aiohttp_sessionr   r   r   r   c       
         @   t        dddddddddd
      }|t        dt         d       ||_        |t        dt         d	       ||_        |	Zt        d
t                |
sG|	j                  |_        |	j
                  |_        |	j                  |_        |	j                  |_        |	j                  r$| j                  |	j                        }|r||_
        |	j                  rIg d}|	j                  |v r|	j                  |_        n%t        j                  d|	j                   d|        |	j                  Uddl}|j!                         5  |j#                  d       |j%                  dt&               ddd       |	j                  |_        |	j(                  |	j(                  |_        |
|j+                  |
       t-        | \  d|dd|d| || _        || _        || _        | d| | _        || _        d| _        d| _        d| _        d| _         y# 1 sw Y   xY w)au  Initialize the MiniMax TTS service.

        Args:
            api_key: MiniMax API key for authentication.
            base_url: API base URL, defaults to MiniMax's T2A endpoint.
                Global: https://api.minimax.io/v1/t2a_v2
                Mainland China: https://api.minimaxi.chat/v1/t2a_v2
                Western United States: https://api-uw.minimax.io/v1/t2a_v2
            group_id: MiniMax Group ID to identify project.
            model: TTS model name. Defaults to "speech-02-turbo". Options include:
                "speech-2.6-hd", "speech-2.6-turbo" (latest, supports Filipino/Tamil/Persian),
                "speech-02-hd", "speech-02-turbo",
                "speech-01-hd", "speech-01-turbo".

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxTTSSettings(model=...)`` instead.

            voice_id: Voice identifier. Defaults to "Calm_Woman".

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxTTSSettings(voice=...)`` instead.

            aiohttp_session: aiohttp.ClientSession for API communication.
            sample_rate: Output audio sample rate in Hz. If None, uses pipeline default.
            stream: Whether to use streaming mode. Defaults to True.
            params: Additional configuration parameters.

                .. deprecated:: 0.0.105
                    Use ``settings=MiniMaxTTSSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional arguments passed to parent TTSService.
        zspeech-02-turbo
Calm_WomanNr   r   )
r   r   r   rx   rz   r|   r   r~   r   r   r   r   r   r   )happysadangryfearful	disgusted	surprisedneutralfluentzUnsupported emotion: z. Supported emotions: alwayszzParameter `english_normalization` is deprecated and will be removed in a future version. Use `text_normalization` instead.T)r   push_start_framepush_stop_framesr   z	?GroupId=i  pcm   rt   )!rp   r   r   r   rx   rz   r|   r   r   language_to_service_languager   r~   r
   warningr   warningscatch_warningssimplefilterwarnDeprecationWarningr   apply_updater   __init___api_key	_group_id_stream	_base_url_session_audio_bitrate_audio_format_audio_channel_audio_sample_rate)selfr   r   r   r   r   r   r   r   r   r   kwargsdefault_settingsservice_langsupported_emotionsr   r   s                   rl   r   zMiniMaxHttpTTSService.__init__   sB   d .##
 "7,>H%*"":/A7K%-" "8-?@)/ &*0-- ')/ &.4.?.? + ??#'#D#DV__#UL#:F(7 >>	*& ~~);;39>>(03FNN3CCYZlYmn
 //;#!002  --h7  Y. ;A:V:V$7,,8:@:S:S$7 ))(3 	
#!!%		

 	
  !$:Yxj9' %""#A s   
(HHr   c                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as MiniMax service supports metrics generation.
        Trt   )r   s    rl   can_generate_metricsz*MiniMaxHttpTTSService.can_generate_metricsD  s     rn   r   c                     t        |      S )zConvert a Language enum to MiniMax service language format.

        Args:
            language: The language to convert.

        Returns:
            The MiniMax-specific language name, or None if not supported.
        )rm   )r   r   s     rl   r   z2MiniMaxHttpTTSService.language_to_service_languageL  s     ,H55rn   framec                    K   t         |   |       d{    | j                  | _        t	        j
                  d| j                          y7 8w)zStart the MiniMax TTS service.

        Args:
            frame: The start frame containing initialization parameters.
        Nz*MiniMax TTS initialized with sample_rate: )r   startr   r   r
   debug)r   r   r   s     rl   r   zMiniMaxHttpTTSService.startW  sI      gmE""""&"2"2A$BRBRASTU 	#s   AA9Atext
context_idc                	  K   t        j                  |  d| d       ddd| j                   d}| j                  j                  | j                  j
                  | j                  j                  | j                  j                  d}| j                  j                  | j                  j                  |d	<   | j                  j                  | j                  j                  |d
<   | j                  j                  | j                  j                  |d<   | j                  | j                  | j                  | j                  d}| j                  ||| j                  j                   |d}| j                  j"                  | j                  j"                  |d<   	 | j$                  j'                  | j(                  ||      4 d{   }|j*                  dk7  rHd|j*                   }t-        |       	 ddd      d{    | j/                          d{    y| j1                  |       d{    t3               }	| j4                  }
|j6                  j9                  |
      2 3 d{   }|s|	j;                  |       d|	v s"|	j=                  d      }|	j=                  d|dz         }|dk(  r|dkD  r|	|d }	X|	|| }|	|d }		 t?        j@                  |dd jC                  d            }d|v rt        j                  d       |jE                  di       }|s|jE                  d      }|stG        dtI        |      |
dz        D ]\  }||||
dz  z    }|s	 tJ        jM                  |      }|r4| j/                          d{    tO        || jP                  d|       ^ 	 d|	v r*N7 7 7 7 7 V7 :# tR        $ r"}t        jT                  d|        Y d}~d}~ww xY w# t>        jV                  $ r)}t        jT                  d | d!|dd"         Y d}~d}~ww xY w6 ddd      d{  7   n# 1 d{  7  sw Y   nxY wn)# tX        $ r}t-        d#| |$       Y d}~nd}~ww xY w| j/                          d{  7   y# | j/                          d{  7   w xY ww)%a  Generate TTS audio from text using MiniMax's streaming API.

        Args:
            text: The text to synthesize into speech.
            context_id: The context ID for tracking audio frames.

        Yields:
            Frame: Audio frames containing the synthesized speech.
        z: Generating TTS []z!application/json, text/plain, */*zapplication/jsonzBearer )acceptzContent-TypeAuthorization)r   rx   r   r|   Nr~   r   r   )bitrateformatchannelr   )r   r   audio_settingr   r   r   )headersjson   zMiniMax TTS error: HTTP )errors   data:   r   zutf-8
extra_infoz$Received final chunk with extra infodataaudio   r   )r   r   num_channelsr   z Error converting hex to binary: zError decoding JSON: z, data: d   zUnknown error occurred: )r   	exception)-r
   r   r   r   r   rx   rz   r|   r~   r   r   r   r   r   r   r   r   r   r   postr   statusr   stop_ttfb_metricsstart_tts_usage_metrics	bytearray
chunk_sizecontentiter_chunkedextendfindr   loadsdecoder   rangelenbytesfromhexr   r   
ValueErrorr   JSONDecodeError	Exception)r   r   r   r   r   r   payloadresponseerror_messagebuffer
CHUNK_SIZEchunkr   
next_start
data_blockr   
chunk_data
audio_datai	hex_chunkaudio_chunkes                         rl   run_ttszMiniMaxHttpTTSService.run_ttsa  s     	v/vQ78 :.&t}}o6
 ,,^^))>>((^^))	
 >>!!-'+~~'='=M)$>>,,826..2S2SM./>>$$0*...*C*CM,' **((**22	
 ll**^^))
 >>((4(,(E(EG$%U	+}}))g *  O% O%??c)&>x>O$PM$=99O% O%h ((***Y 224888 #!__
#+#3#3#@#@#L @% @%%  MM%( #f, &H 5%+[[519%E
%+$qy)/! &,E*%=
!'
!4+%#'::jn.C.CG.L#MD+t3 &-S T ()-&")=J#- ()3)@J#- ( &+1c*ozA~%N -,6q1zA~;M,N	'0$,!-27--	2JK'2.2.D.D.F(F(F.>2=8<8H8H9:7A	/* )*-= #f,-O% O%h +Y 9@%^ )G (2 !-$*LL*J1#(N%& %-	!-  $33 %"LL"7s(:dsCSBT U %	%y $MO% O% O% O% O%b  	P%=aS#AQOOO	P ((***$((***s  FS,Q! N:Q! 
/Q9Q! N=Q! 	SO S"Q6O75Q,P80O1P84QA QA O9QO9$Q%O98Q9*O9$*OO	
 O/O91Q8Q:Q! =Q!  SQP8	O	O6	O1	,O91O6	6O99P5P0*Q0P55Q9Q! QQ! QQQQ!  R$ !	R*R=R$ RR$ 
SR S$R?8R;9R??S)r   r   r   r   rp   Settingsr   r   r   r   r   aiohttpClientSessionr   r   r   r   r   r   r   r   r   r   r   r  r   r   s   @rl   r   r      s4    "H!!!8i !8N ;#"&%)(,15M$ M$ 	M$
 M$ }M$ 3-M$ !..M$ c]M$ M$ %M$ -.M$^d 	6X 	6(3- 	6V V H+# H+3 H+>%QU+;V H+ H+rn   r   )%r   r   dataclassesr   r   typingr   r   r   r   r	   r  logurur
   pydanticr   pipecat.frames.framesr   r   r   r   pipecat.services.settingsr   r   r   r   pipecat.services.tts_servicer   pipecat.transcriptions.languager   r   (pipecat.utils.tracing.service_decoratorsr   r   rm   rp   r   rt   rn   rl   <module>r$     s     ( ? ?     ` _ 3 F ?4I8 4I 4In (* (* (*Vf+J f+rn   