
    qia                     v    d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZ  G d d	e      Zy)
zVoice Activity Detection controller for managing speech state transitions.

This module provides a controller that wraps a VADAnalyzer to track speech state
and emit events when speech starts, stops, or is actively detected.
    N)Type)VADAnalyzerVADState)FrameInputAudioRawFrameSpeechControlParamsFrame
StartFrameVADParamsUpdateFrame)FrameDirection)
BaseObjectc                        e Zd ZdZdddedef fdZdefdZde	fd	Z
defd
ZdededefdZd Zej$                  fdedefdZdee   fdZ xZS )VADControllera(  Manages voice activity detection state and emits speech events.

    Wraps a `VADAnalyzer` to process audio and trigger events based on speech
    state transitions. Tracks whether the user is speaking, quiet, or
    transitioning between states.

    Event handlers available:

    - on_speech_started: Called when speech begins.
    - on_speech_stopped: Called when speech ends.
    - on_speech_activity: Called periodically while speech is detected.
    - on_push_frame: Called when the controller wants to push a frame.
    - on_broadcast_frame: Called when the controller wants to broadcast a frame.

    Example::

        @vad_controller.event_handler("on_speech_started")
        async def on_speech_started(controller):
            ...

        @vad_controller.event_handler("on_speech_stopped")
        async def on_speech_stopped(controller):
            ...

        @vad_controller.event_handler("on_speech_activity")
        async def on_speech_activity(controller):
            ...

        @vad_controller.event_handler("on_push_frame")
        async def on_push_frame(controller, frame: Frame, direction: FrameDirection):
            ...

        @vad_controller.event_handler("on_broadcast_frame")
        async def on_broadcast_frame(controller, frame_cls: Type[Frame], **kwargs):
            ...
    g?)speech_activity_periodvad_analyzerr   c                4   t         |           || _        t        j                  | _        d| _        || _        | j                  dd       | j                  dd       | j                  dd       | j                  dd       | j                  dd       y	)
a  Initialize the VAD controller.

        Args:
            vad_analyzer: The `VADAnalyzer` instance for processing audio.
            speech_activity_period: Minimum interval in seconds between
                `on_speech_activity` events. Defaults to 0.2.
        r   on_speech_startedT)syncon_speech_stoppedon_speech_activityon_push_frameon_broadcast_frameN)	super__init___vad_analyzerr   QUIET
_vad_state_speech_activity_time_speech_activity_period_register_event_handler)selfr   r   	__class__s      R/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/audio/vad/vad_controller.pyr   zVADController.__init__B   s     	)$,NN &'" (>$$$%8t$D$$%8t$D$$%9$E$$_4$@$$%9$E    framec                 ~  K   t        |t              r| j                  |       d{    yt        |t              r| j	                  |       d{    yt        |t
              rO| j                  j                  |j                         | j                  t        |j                         d{    yy7 7 g7 
w)zProcess a frame and handle VAD-related events.

        Handles `StartFrame` to initialize the sample rate and `InputAudioRawFrame`
        to analyze audio for voice activity.

        Args:
            frame: The frame to process.
        N
vad_params)
isinstancer	   _startr   _handle_audior
   r   
set_paramsparamsbroadcast_framer   r    r$   s     r"   process_framezVADController.process_frameZ   s      eZ(++e$$$12$$U+++34))%,,7&&'?ELL&YYY 5 %+ Zs4   %B=B7)B=B9AB=0B;1B=9B=;B=c                    K   | j                   j                  |j                         | j                  t        | j                   j
                         d {    y 7 w)Nr&   )r   set_sample_rateaudio_in_sample_rater-   r   r,   r.   s     r"   r)   zVADController._startk   sF     **5+E+EF""#;HZHZHaHa"bbbs   AAAAc                    K   | j                  |j                  | j                         d{   | _        | j                  t        j                  k(  r| j                  d       d{    yy7 A7 w)a   Process an audio chunk and emit speech events as needed.

        Analyzes the audio for voice activity and triggers `on_speech_started`,
        `on_speech_stopped`, or `on_speech_activity` events based on state changes.

        Args:
            frame: Audio frame to process.
        Nr   )_handle_vadaudior   r   SPEAKING_call_event_handlerr.   s     r"   r*   zVADController._handle_audiop   s`      !% 0 0doo NN??h///**+?@@@ 0 O As!   *A2A.:A2'A0(A20A2r5   	vad_statereturnc                 l  K   | j                   j                  |       d{   }||k7  r|t        j                  k7  rn|t        j                  k7  r[|t        j
                  k(  r| j                  d       d{    n,|t        j                  k(  r| j                  d       d{    |}|S 7 7 77 w)zGHandle Voice Activity Detection results and trigger appropriate events.Nr   r   )r   analyze_audior   STARTINGSTOPPINGr6   r7   r   )r    r5   r8   new_vad_states       r"   r4   zVADController._handle_vad~   s     "00>>uEEY&!2!22!2!22 1 11../BCCC(..0../BCCC%I F DCs4   B4B.AB48B09,B4%B2&	B40B42B4c                    K   t        j                          | j                  z
  }|| j                  k\  r3t        j                          | _        | j                  d       d{    yy7 w)zHandle user speaking frame.r   N)timer   r   r7   )r    	diff_times     r"   _maybe_speech_activityz$VADController._maybe_speech_activity   sU     IIK$"<"<<	444)-D&**+?@@@ 5@s   AA) A'!A)	directionc                 F   K   | j                  d||       d{    y7 w)a-  Request a frame to be pushed through the pipeline.

        This emits an on_push_frame event that must be handled by a processor
        to actually push the frame into the pipeline.

        Args:
            frame: The frame to push.
            direction: The direction to push the frame.
        r   Nr7   )r    r$   rC   s      r"   
push_framezVADController.push_frame   s       &&yIIIs   !!	frame_clsc                 H   K    | j                   d|fi | d{    y7 w)a[  Request a frame to be broadcast upstream and downstream.

        This emits an on_broadcast_frame event that must be handled by a processor
        to actually broadcast the frame in the pipeline.

        Args:
            frame_cls: The class of the frame to broadcast.
            **kwargs: Arguments to pass to the frame constructor.
        r   NrE   )r    rG   kwargss      r"   r-   zVADController.broadcast_frame   s&      'd&&';YQ&QQQs   " ")__name__
__module____qualname____doc__r   floatr   r   r/   r	   r)   r   r*   bytesr   r4   rB   r   
DOWNSTREAMrF   r   r-   __classcell__)r!   s   @r"   r   r      s    #J VY F[ FU F0Z Z"c* c
A); Au  h  A JXIbIb 
Je 
J 
J
RtE{ 
Rr#   r   )rM   r@   typingr   pipecat.audio.vad.vad_analyzerr   r   pipecat.frames.framesr   r   r   r	   r
   "pipecat.processors.frame_processorr   pipecat.utils.base_objectr   r    r#   r"   <module>rX      s7      @  > 0ORJ ORr#   