
    qi                         d Z ddlmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ  G d de      Z G d de      Z G d	 d
e      Z G d de      Z G d de      Z G d de      Zeez  ez  ez  Z G d de      Zy)zAssemblyAI WebSocket API message models and connection parameters.

This module defines Pydantic models for handling AssemblyAI's real-time
transcription WebSocket messages and connection configuration.
    )ListLiteralOptional)logger)	BaseModel
ConfigDictFieldmodel_validatorc                   X    e Zd ZU dZeed<   eed<   eed<   eed<    edd      Z	e
ed<   y	)
Wordax  Represents a single word in a transcription with timing and confidence.

    Parameters:
        start: Start time of the word in milliseconds.
        end: End time of the word in milliseconds.
        text: The transcribed word text.
        confidence: Confidence score for the word (0.0 to 1.0).
        word_is_final: Whether this word is finalized and won't change.
    startendtext
confidence.word_is_final)aliasN)__name__
__module____qualname____doc__int__annotations__strfloatr	   r   bool     T/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/assemblyai/models.pyr   r      s/     J	H
I?;M4;r   r   c                       e Zd ZU dZeed<   y)BaseMessagezrBase class for all AssemblyAI WebSocket messages.

    Parameters:
        type: The message type identifier.
    typeN)r   r   r   r   r   r   r   r   r   r    r    %   s     Ir   r    c                   :    e Zd ZU dZdZed   ed<   eed<   eed<   y)BeginMessagezMessage sent when a new session begins.

    Parameters:
        type: Always "Begin" for this message type.
        id: Unique session identifier.
        expires_at: Unix timestamp when the session expires.
    Beginr!   id
expires_atN)	r   r   r   r   r!   r   r   r   r   r   r   r   r#   r#   /   s"     %D''
$GOr   r#   c                       e Zd ZU dZ ed      ZdZed   ed<   e	ed<   e
ed<   e
ed<   eed	<   eed
<   ee   ed<   dZee   ed<   dZee   ed<    edd      Zee   ed<   y)TurnMessagea  Message containing transcription data for a turn of speech.

    Parameters:
        type: Always "Turn" for this message type.
        turn_order: Sequential number of this turn in the session.
        turn_is_formatted: Whether the transcript has been formatted.
        end_of_turn: Whether this marks the end of a speaking turn.
        transcript: The transcribed text for this turn.
        end_of_turn_confidence: Confidence score for end-of-turn detection.
        words: List of individual words with timing and confidence data.
        language_code: Detected language code (e.g., "es", "fr"). Only present with
            complete utterances or when end_of_turn is True.
        language_confidence: Confidence score (0-1) for language detection. Only present
            with complete utterances or when end_of_turn is True.
        speaker: Speaker label (e.g., "A", "B"). Only present when speaker_labels is
            enabled and end_of_turn is True. Maps to 'speaker_label' in JSON response.
    T)populate_by_nameTurnr!   
turn_orderturn_is_formattedend_of_turn
transcriptend_of_turn_confidencewordsNlanguage_codelanguage_confidencespeaker_label)defaultr   speaker)r   r   r   r   r   model_configr!   r   r   r   r   r   r   r   r   r1   r   r2   r	   r5   r   r   r   r(   r(   =   sy    $ t4L"D'&/"OO!!:#'M8C='+/%/"4GGXc]Gr   r(   c                   0    e Zd ZU dZdZed   ed<   eed<   y)SpeechStartedMessagezMessage sent when speech is first detected in the audio stream.

    Parameters:
        type: Always "SpeechStarted" for this message type.
        timestamp: Audio timestamp in milliseconds when speech was detected.
    SpeechStartedr!   	timestampN)r   r   r   r   r!   r   r   r   r   r   r   r8   r8   ^   s     &5D'/
"4Nr   r8   c                   :    e Zd ZU dZdZed   ed<   eed<   eed<   y)TerminationMessagea   Message sent when the session is terminated.

    Parameters:
        type: Always "Termination" for this message type.
        audio_duration_seconds: Total duration of audio processed.
        session_duration_seconds: Total duration of the session.
    Terminationr!   audio_duration_secondssession_duration_secondsN)r   r   r   r   r!   r   r   r   r   r   r   r<   r<   j   s$     $1D'-
 0!!##r   r<   c                   .   e Zd ZU dZdZeed<   dZed   ed<   dZ	e
e   ed<   dZe
e   ed	<   dZe
e   ed
<   dZe
e   ed<   dZe
ee      ed<   dZe
e   ed<   dZed   ed<   dZe
e   ed<   dZeed<   dZe
e   ed<   dZe
e   ed<    ed      d        Zy)AssemblyAIConnectionParamsaO	  Configuration parameters for AssemblyAI WebSocket connection.

    .. deprecated:: 0.0.105
        Use ``settings=AssemblyAISTTSettings(foo=...)`` instead.

    Parameters:
        sample_rate: Audio sample rate in Hz. Defaults to 16000.
        encoding: Audio encoding format. Defaults to "pcm_s16le".
        end_of_turn_confidence_threshold: Confidence threshold for end-of-turn detection.
        min_turn_silence: Minimum silence duration when confident about end-of-turn.
        min_end_of_turn_silence_when_confident: DEPRECATED. Use min_turn_silence instead.
        max_turn_silence: Maximum silence duration before forcing end-of-turn.
        keyterms_prompt: List of key terms to guide transcription. Will be JSON serialized before sending.
        prompt: Optional text prompt to guide the transcription. Only used when speech_model is "u3-rt-pro".
        speech_model: Select between English, multilingual, and u3-rt-pro models. Defaults to "u3-rt-pro".
        language_detection: Enable automatic language detection. Only applicable to
            universal-streaming-multilingual. When enabled, Turn messages include
            language_code and language_confidence fields. Defaults to None (not sent).
        format_turns: Whether to format transcript turns. Only applicable to
            universal-streaming-english and universal-streaming-multilingual models.
            For u3-rt-pro, formatting is automatic and built-in. Defaults to True.
        speaker_labels: Enable speaker diarization. When enabled, final transcripts
            (end_of_turn=True) include a speaker field identifying the speaker
            (e.g., "Speaker A", "Speaker B"). Defaults to None (not sent).
        vad_threshold: Voice activity detection confidence threshold. Only applicable to
            u3-rt-pro. The confidence threshold (0.0 to 1.0) for classifying audio frames
            as silence. Frames with VAD confidence below this value are considered silent.
            Increase for noisy environments to reduce false speech detection. Defaults to
            0.3 (API default). For best performance when using with external VAD (e.g., Silero),
            align this value with your VAD's activation threshold to avoid the "dead zone"
            where AssemblyAI transcribes speech that your VAD hasn't detected yet.
            Defaults to None (not sent).
    i>  sample_rate	pcm_s16le)rC   	pcm_mulawencodingN end_of_turn_confidence_thresholdmin_turn_silence&min_end_of_turn_silence_when_confidentmax_turn_silencekeyterms_promptprompt	u3-rt-pro)zuniversal-streaming-englishz universal-streaming-multilingualrL   speech_modellanguage_detectionTformat_turnsspeaker_labelsvad_thresholdafter)modec                     | j                   2t        j                  d       | j                  | j                   | _        | S )zCHandle deprecated min_end_of_turn_silence_when_confident parameter.zThe 'min_end_of_turn_silence_when_confident' parameter is deprecated and will be removed in a future version. Please use 'min_turn_silence' instead.)rH   r   warningrG   )selfs    r   handle_deprecated_paramz2AssemblyAIConnectionParams.handle_deprecated_param   sC     66BNNV
 $$,(,(S(S%r   )r   r   r   r   rB   r   r   rE   r   rF   r   r   rG   rH   rI   rJ   r   r   rK   rM   rN   r   rO   rP   rQ   r
   rW   r   r   r   rA   rA   |   s     D K2=Hg./=8<$huo<&*hsm*<@*HSM@&*hsm*+/OXd3i(/ FHSM  	 'V  *.-L$%)NHTN)%)M8E?)'"
 #
r   rA   N)r   typingr   r   r   logurur   pydanticr   r   r	   r
   r   r    r#   r(   r8   r<   
AnyMessagerA   r   r   r   <module>r\      s    + *  B B<9 <$) ; H+ HB	; 	$ $ K'*>>ASS
> >r   