
    qi                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZ d
ZdZdZdZ G d de      Z G d de      Z G d de      Zy)a$  Voice Activity Detection (VAD) analyzer base classes and utilities.

This module provides the abstract base class for VAD analyzers and associated
data structures for voice activity detection in audio streams. Includes state
management, parameter configuration, and audio analysis framework.
    N)ABCabstractmethod)ThreadPoolExecutor)Enum)Optional)logger)	BaseModel)calculate_audio_volumeexp_smoothinggffffff?皙?g333333?c                        e Zd ZdZdZdZdZdZy)VADStatea  Voice Activity Detection states.

    Parameters:
        QUIET: No voice activity detected.
        STARTING: Voice activity beginning, transitioning from quiet.
        SPEAKING: Active voice detected and confirmed.
        STOPPING: Voice activity ending, transitioning to quiet.
                N)__name__
__module____qualname____doc__QUIETSTARTINGSPEAKINGSTOPPING     P/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/audio/vad/vad_analyzer.pyr   r      s     EHHHr   r   c                   J    e Zd ZU dZeZeed<   eZ	eed<   e
Zeed<   eZeed<   y)	VADParamsa`  Configuration parameters for Voice Activity Detection.

    Parameters:
        confidence: Minimum confidence threshold for voice detection.
        start_secs: Duration to wait before confirming voice start.
        stop_secs: Duration to wait before confirming voice stop.
        min_volume: Minimum audio volume threshold for voice detection.
    
confidence
start_secs	stop_secs
min_volumeN)r   r   r   r   VAD_CONFIDENCEr    float__annotations__VAD_START_SECSr!   VAD_STOP_SECSr"   VAD_MIN_VOLUMEr#   r   r   r   r   r   /   s1     'J&&J&$Iu$&J&r   r   c                       e Zd ZdZddddee   dee   fdZedefd       Z	edefd	       Z
edefd
       Zedefd       Zededefd       ZdefdZdefdZdedefdZdedefdZdedefdZy)VADAnalyzera  Abstract base class for Voice Activity Detection analyzers.

    Provides the framework for implementing VAD analysis with configurable
    parameters, state management, and audio processing capabilities.
    Subclasses must implement the core voice confidence calculation.
    N)sample_rateparamsr,   r-   c                    || _         d| _        |xs
 t               | _        d| _        d| _        d| _        d| _        t        d      | _	        y)zInitialize the VAD analyzer.

        Args:
            sample_rate: Audio sample rate in Hz. If None, will be set later.
            params: VAD parameters for detection configuration.
        r   r   r   r   )max_workersN)
_init_sample_rate_sample_rater   _params_num_channels_vad_buffer_smoothing_factor_prev_volumer   	_executor)selfr,   r-   s      r   __init__zVADAnalyzer.__init__G   sR     "-, "% ,:r   returnc                     | j                   S )zdGet the current sample rate.

        Returns:
            Current audio sample rate in Hz.
        )r1   r8   s    r   r,   zVADAnalyzer.sample_rate]   s        r   c                     | j                   S )zvGet the number of audio channels.

        Returns:
            Number of audio channels (always 1 for mono).
        )r3   r<   s    r   num_channelszVADAnalyzer.num_channelsf   s     !!!r   c                     | j                   S )zlGet the current VAD parameters.

        Returns:
            Current VAD configuration parameters.
        )r2   r<   s    r   r-   zVADAnalyzer.paramso   s     ||r   c                      y)zGet the number of audio frames required for analysis.

        Returns:
            Number of frames needed for VAD processing.
        Nr   r<   s    r   num_frames_requiredzVADAnalyzer.num_frames_requiredx   s     	r   bufferc                      y)zCalculate voice activity confidence for the given audio buffer.

        Args:
            buffer: Audio buffer to analyze.

        Returns:
            Voice confidence score between 0.0 and 1.0.
        Nr   )r8   rB   s     r   voice_confidencezVADAnalyzer.voice_confidence   s     	r   c                 d    | j                   xs || _        | j                  | j                         y)zsSet the sample rate for audio processing.

        Args:
            sample_rate: Audio sample rate in Hz.
        N)r0   r1   
set_paramsr2   )r8   r,   s     r   set_sample_ratezVADAnalyzer.set_sample_rate   s'     !22Ak%r   c                    t        j                  d|        || _        | j                         | _        | j                  | j
                  z  dz  | _        | j                  | j                  z  }t        | j                  j                  |z        | _
        t        | j                  j                  |z        | _        d| _        d| _        t        j                   | _        y)zSet VAD parameters and recalculate internal values.

        Args:
            params: VAD parameters for detection configuration.
        zSetting VAD params to: r   r   N)r   debugr2   rA   _vad_framesr3   _vad_frames_num_bytesr,   roundr!   _vad_start_framesr"   _vad_stop_frames_vad_starting_count_vad_stopping_countr   r   
_vad_state)r8   r-   vad_frames_per_secs      r   rF   zVADAnalyzer.set_params   s     	.vh78335%)%5%58J8J%JQ%N"!--0@0@@!&t||'>'>AS'S!T %dll&<&<?Q&Q R#$ #$ $,NNr   audioc                 p    t        || j                        }t        || j                  | j                        S )z<Calculate smoothed audio volume using exponential smoothing.)r
   r,   r   r6   r5   )r8   rS   volumes      r   _get_smoothed_volumez VADAnalyzer._get_smoothed_volume   s/    't/?/?@VT%6%68N8NOOr   c                    K   t        j                         }|j                  | j                  | j                  |       d{   }|S 7 w)a^  Analyze audio buffer and return current VAD state.

        Processes incoming audio data, maintains internal state, and determines
        voice activity status based on confidence and volume thresholds.

        Args:
            buffer: Audio buffer to analyze.

        Returns:
            Current VAD state after processing the buffer.
        N)asyncioget_running_looprun_in_executorr7   _run_analyzer)r8   rB   loopstates       r   analyze_audiozVADAnalyzer.analyze_audio   sB      '')**4>>4;M;MvVV Ws   ?A
AA
c                    | xj                   |z  c_         | j                  }t        | j                         |k  r| j                  S t        | j                         |k\  r| j                   d| }| j                   |d | _         | j	                  |      }| j                  |      }|| _        || j                  j                  k\  xr || j                  j                  k\  }|r| j                  xt        j                  k(  r t        j                  | _        d| _        nxt        j                  k(  r | xj                  dz  c_        nt        j                  k(  rt        j                  | _        d| _        n| j                  xt        j                  k(  r t        j                  | _        d| _        nXxt        j                  k(  r t        j                  | _        d| _        n't        j                  k(  r| xj                   dz  c_        t        | j                         |k\  r| j                  t        j                  k(  r5| j                  | j"                  k\  rt        j                  | _        d| _        | j                  t        j                  k(  r5| j                   | j$                  k\  rt        j                  | _        d| _        | j                  S )z2Analyze audio buffer and return current VAD state.Nr   r   )r4   rK   lenrQ   rD   rV   r6   r2   r    r#   r   r   r   rO   r   r   rP   rM   rN   )r8   rB   num_required_bytesaudio_framesr    rU   speakings          r   r[   zVADAnalyzer._run_analyzer   s&   F"!77t #55??"$""#'99++,?-?@L#//0B0CDD..|<J..|<F &D!T\\%<%<<b4<<KbKbAbHoo'*2*;*;340***00A50!***2*;*;340oo****2..340****2*;*;340!**00A50= $""#'99B OOx000((D,B,BB&//DO'(D$ OOx000((D,A,AA&nnDO'(D$r   )r   r   r   r   r   intr   r9   propertyr,   r>   r-   r   rA   bytesr%   rD   rG   rF   rV   r   r^   r[   r   r   r   r+   r+   ?   s    8<[_ ;x} ;XiEX ;, !S ! ! "c " " 	   S   	u 	 	 	&3 &3 3&P% PE P
% H  6E 6h 6r   r+   )r   rX   abcr   r   concurrent.futuresr   enumr   typingr   logurur   pydanticr	   pipecat.audio.utilsr
   r   r$   r'   r(   r)   r   r   r+   r   r   r   <module>rn      s^     # 1     Et  '	 ' u# ur   