
    qiI                     Z    d Z ddlmZmZmZ ddlmZ ddlmZ ddl	m
Z
mZ  G d de
      Zy)	a  AIC-integrated VAD analyzer that lazily binds to the AIC SDK backend.

This module provides VAD analyzer implementations that query the AIC SDK's
is_speech_detected() and map it to a float confidence (1.0/0.0).

Classes:
    AICVADAnalyzer: For aic-sdk (uses 'aic_sdk' module)
    )AnyCallableOptional)VadParameter)logger)VADAnalyzer	VADParamsc            
            e Zd ZdZddddddeeg ef      dee   dee   dee   f fdZdeg ef   fd	Z	d
 Z
 fdZdef fdZdefdZdedefdZ xZS )AICVADAnalyzera  VAD analyzer that lazily binds to the AIC VadContext via a factory.

    The analyzer can be constructed before the AIC Processor exists. Once the filter has
    started and the Processor is available, the provided factory will succeed and the
    VadContext will be obtained. The context's is_speech_detected() boolean state is
    then mapped to 1.0 (speech) or 0.0 (no speech) to satisfy the VADAnalyzer interface.

    AIC VAD runtime parameters:
      - speech_hold_duration:
          Controls for how long the VAD continues to detect speech after the audio signal
          no longer contains speech (in seconds).
          Range: 0.0 to 100x model window length
          Default (SDK): 0.05s (50ms)
      - minimum_speech_duration:
          Controls for how long speech needs to be present in the audio signal before the
          VAD considers it speech (in seconds).
          Range: 0.0 to 1.0
          Default (SDK): 0.0s
      - sensitivity:
          Controls the sensitivity (energy threshold) of the VAD. This value is used by
          the VAD as the threshold a speech audio signal's energy has to exceed in order
          to be considered speech.
          Range: 1.0 to 15.0
          Formula: Energy threshold = 10 ** (-sensitivity)
          Default (SDK): 6.0
    N)vad_context_factoryspeech_hold_durationminimum_speech_durationsensitivityr   r   r   r   c                    t        dddd      }t        | 	  d|       || _        d| _        || _        || _        || _        y)a  Create an AIC VAD analyzer.

        Args:
            vad_context_factory:
                Zero-arg callable that returns the AIC VadContext.
                This may raise until the filter's Processor has been created; the analyzer
                will retry on set_sample_rate/first use.
            speech_hold_duration:
                Optional override for AIC VAD speech hold duration (in seconds).
                Range: 0.0 to 100x model window length.
                If None, the SDK default (0.05s) is used.
            minimum_speech_duration:
                Optional override for minimum speech duration before VAD reports
                speech detected (in seconds).
                Range: 0.0 to 1.0.
                If None, the SDK default (0.0s) is used.
            sensitivity:
                Optional override for AIC VAD sensitivity (energy threshold).
                Range: 1.0 to 15.0. Energy threshold = 10 ** (-sensitivity).
                If None, the SDK default (6.0) is used.
        g      ?        )
confidence
start_secs	stop_secs
min_volumeN)sample_rateparams)r	   super__init___vad_context_factory_vad_ctx_pending_speech_hold_duration _pending_minimum_speech_duration_pending_sensitivity)selfr   r   r   r   fixed_params	__class__s         K/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/audio/vad/aic_vad.pyr   zAICVADAnalyzer.__init__.   sO    < !CC3[^_T,?$7!'+>R*AX-5@!    c                 2    || _         | j                          y)z0Attach or replace the factory post-construction.N)r   _ensure_vad_context_initialized)r   r   s     r"   bind_vad_context_factoryz'AICVADAnalyzer.bind_vad_context_factoryU   s    $7!,,.r#   c                 
   | j                   t        y	 | j                  4| j                   j                  t        j                  | j                         | j
                  4| j                   j                  t        j                  | j
                         | j                  5| j                   j                  t        j                  | j                         yy# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)z/Apply optional AIC VAD parameters if available.Nz/AIC VAD parameter application deferred/failed: )r   r   r   set_parameterSpeechHoldDurationr   MinimumSpeechDurationr   Sensitivity	Exceptionr   debug)r   es     r"   _apply_vad_paramsz AICVADAnalyzer._apply_vad_paramsZ   s    == L$8	P11=++ 33T5W5W 44@++ 668]8] ((4++L,D,DdF_F_` 5 	PLLJ1#NOO	Ps   C C 	D C==Dc                 >   | j                   y | j                  sy 	 | j                         | _         | j                          t        |   | j
                         t        j                  d       y # t        $ r"}t        j                  d|        Y d }~y d }~ww xY w)Nz(AIC VAD context initialized in analyzer.z*Deferring AIC VAD context initialization: )	r   r   r/   r   
set_params_paramsr   r-   r,   )r   r.   r!   s     r"   r%   z.AICVADAnalyzer._ensure_vad_context_initializedm   s    ==$((	K 557DM""$Gt||,LLCD 	KLLEaSIJJ	Ks   AA1 1	B:BBr   c                     | j                   xs || _        | j                          	 t        |   | j
                         y# t        $ r Y yw xY w)zsSet the sample rate for audio processing.

        Args:
            sample_rate: Audio sample rate in Hz.
        N)_init_sample_rate_sample_rater%   r   r1   r2   r,   )r   r   r!   s     r"   set_sample_ratezAICVADAnalyzer.set_sample_rate|   sL     !22Ak,,.	Gt||, 		s   A 	AAreturnc                 T    | j                   dkD  rt        | j                   dz        S dS )zGet the number of audio frames required for analysis.

        Returns:
            Number of frames needed for VAD processing.
        r   g{Gz?   )r   int)r   s    r"   num_frames_requiredz"AICVADAnalyzer.num_frames_required   s,     04/?/?!/Cs4##d*+LLr#   bufferc                     | j                          | j                  y	 | j                  j                         }|rdS dS # t        $ r"}t	        j
                  d|        Y d}~yd}~ww xY w)a  Return voice activity detection result for the given audio buffer.

        Note:
            The AIC SDK provides binary speech detection (not a probability score).
            This method returns 1.0 when speech is detected and 0.0 otherwise,
            rather than a true confidence value.

        Args:
            buffer: Audio buffer (unused - AIC VAD state is updated internally
                by the enhancement pipeline).

        Returns:
            1.0 if speech is detected, 0.0 otherwise.
        Nr   g      ?zAIC VAD inference error: )r%   r   is_speech_detectedr,   r   error)r   r<   	is_speechr.   s       r"   voice_confidencezAICVADAnalyzer.voice_confidence   sh      	,,.== 	88:I#3,, 	LL4QC89	s   ? ? 	A*A%%A*)__name__
__module____qualname____doc__r   r   r   floatr   r&   r/   r%   r:   r6   r;   bytesrA   __classcell__)r!   s   @r"   r   r      s    < <@0437'+%A &hr3w&78%A 'uo	%A
 "*%%A e_%AN/HRW<M /
P&K3 MS Mu  r#   r   N)rE   typingr   r   r   aic_sdkr   logurur   pipecat.audio.vad.vad_analyzerr   r	   r    r#   r"   <module>rN      s)    + *    A][ ]r#   