
    qi$f                         d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ dd	l%m&Z&m'Z' dd
l(m)Z) dZ* G d de'      Z+y)zBase input transport implementation for Pipecat.

This module provides the BaseInputTransport class which handles audio and video
input processing, including VAD, turn analysis, and interruption management.
    N)Optional)logger)BaseTurnAnalyzerEndOfTurnState)VADAnalyzerVADState)BotStartedSpeakingFrameBotStoppedSpeakingFrameCancelFrameEmulateUserStartedSpeakingFrameEmulateUserStoppedSpeakingFrameEndFrameFilterUpdateSettingsFrameFrameInputAudioRawFrameInputImageRawFrameMetricsFrameSpeechControlParamsFrame
StartFrame	StopFrameSystemFrameUserSpeakingFrameUserStartedSpeakingFrameUserStoppedSpeakingFrameVADParamsUpdateFrameVADUserStartedSpeakingFrameVADUserStoppedSpeakingFrame)MetricsData)FrameDirectionFrameProcessor)TransportParamsg      ?c                       e Zd ZdZdef fdZdeddfdZd Ze	de
fd	       Ze	dee   fd
       Ze	dee   fd       ZdefdZdefdZdefdZdefdZdefdZdefdZdefdZdedef fdZ d Z!d Z"d Z#dede$fdZ%dede$de$fdZ&dede$de$fdZ'd Z(de)fdZ*de+fd Z,	 d+de$d!efd"Z-dede$de$fd#Z.d$ Z/d%e0fd&Z1d'e2fd(Z3dede$d)e$fd*Z4 xZ5S ),BaseInputTransporta)  Base class for input transport implementations.

    Handles audio and video input processing including Voice Activity Detection,
    turn analysis, audio filtering, and user interaction management. Supports
    interruption handling and provides hooks for transport-specific implementations.
    paramsc                 .   t        |   di | || _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        | j                  j                  rUddl}|j                         5  |j                  d       |j                  dt               ddd       d| j                  _        | j                  j"                  rUddl}|j                         5  |j                  d       |j                  dt               ddd       d| j                  _        | j                  j&                  rlddl}|j                         5  |j                  d       |j                  d	t               ddd       | j                  j&                  | j                  _        | j                  j*                  | j                  _        | j                  j.                  | j                  _        | j                  j2                  | j                  _        | j                  j6                  | j                  _        | j                  j:                  | j                  _        | j                  j>                  | j                  _         | j                  jB                  | j                  _"        | j                  jF                  rDddl}|j                         5  |j                  d       |j                  d
t               ddd       | j                  jH                  rEddl}|j                         5  |j                  d       |j                  dt               ddd       yy# 1 sw Y   xY w# 1 sw Y   axY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   yxY w)zInitialize the base input transport.

        Args:
            params: Transport configuration parameters.
            **kwargs: Additional arguments passed to parent class.
        r   Fg?NalwayszYParameter 'vad_enabled' is deprecated, use 'audio_in_enabled' and 'vad_analyzer' instead.TzParameter 'vad_audio_passthrough' is deprecated, audio passthrough is now always enabled. Use 'audio_in_passthrough' to disable.z<Parameters 'camera_*' are deprecated, use 'video_*' instead.zpParameter 'turn_analyzer' is deprecated, use `LLMUserAggregator`'s new `user_turn_strategies` parameter instead.zgParameter 'vad_analyzer' is deprecated, use `LLMUserAggregator`'s new `vad_analyzer` parameter instead. )%super__init___params_sample_rate_bot_speaking_user_speaking_user_speaking_frame_time_user_speaking_frame_period_audio_task_pausedvad_enabledwarningscatch_warningssimplefilterwarnDeprecationWarningaudio_in_enabledvad_audio_passthroughaudio_in_passthroughcamera_in_enabledvideo_in_enabledcamera_out_enabledvideo_out_enabledcamera_out_is_livevideo_out_is_livecamera_out_widthvideo_out_widthcamera_out_heightvideo_out_heightcamera_out_bitratevideo_out_bitratecamera_out_frameratevideo_out_frameratecamera_out_color_formatvideo_out_color_formatturn_analyzervad_analyzer)selfr$   kwargsr3   	__class__s       O/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/transports/base_input.pyr)   zBaseInputTransport.__init__>   s    	"6"  # $)*& ,/(  
 <<##((* %%h/o& -1DLL)<<--((* %%h/ W& 15DLL-<<))((* %%h/R& -1LL,J,JDLL)-1\\-L-LDLL*-1\\-L-LDLL*+/<<+H+HDLL(,0LL,J,JDLL)-1\\-L-LDLL*/3||/P/PDLL,26,,2V2VDLL/<<%%((* %%h/@& <<$$((* %%h/8&  %a   $  s<   3(M(M%
(M2(M?&(NM"%M/2M<?NNenabledreturnNc                 V    t        j                  d|        || j                  _        y)zEnable or disable audio streaming on transport start.

        Args:
            enabled: Whether to start audio streaming immediately on transport start.
        zEnabling audio on start. N)r   debugr*   audio_in_stream_on_start)rM   rQ   s     rP   enable_audio_in_stream_on_startz2BaseInputTransport.enable_audio_in_stream_on_start   s$     	0	:;07-    c                    K   yw)zvStart audio input streaming.

        Override in subclasses to implement transport-specific audio streaming.
        Nr'   rM   s    rP   start_audio_in_streamingz+BaseInputTransport.start_audio_in_streaming   s     
 	s   c                     | j                   S )z`Get the current audio sample rate.

        Returns:
            The sample rate in Hz.
        )r+   rY   s    rP   sample_ratezBaseInputTransport.sample_rate   s        rW   c                     ddl }|j                         5  |j                  d       |j                  dt               ddd       | j
                  j                  S # 1 sw Y   | j
                  j                  S xY w)aF  Get the Voice Activity Detection analyzer.

        .. deprecated:: 0.0.101
            This method is deprecated and will be removed in a future version.
            Use `LLMUserAggregator`'s new `vad_analyzer` parameter instead.

        Returns:
            The VAD analyzer instance if configured, None otherwise.
        r   Nr&   zdMethod 'vad_analyzer' is deprecated. Use `LLMUserAggregator`'s new `vad_analyzer` parameter instead.)r3   r4   r5   r6   r7   r*   rL   rM   r3   s     rP   rL   zBaseInputTransport.vad_analyzer   si     	$$& 	!!(+MM4"	 ||(((	 ||(((   (AA9c                     ddl }|j                         5  |j                  d       |j                  dt               ddd       | j
                  j                  S # 1 sw Y   | j
                  j                  S xY w)aA  Get the turn-taking analyzer.

        .. deprecated:: 0.0.99
            This method is deprecated and will be removed in a future version.
            Use `LLMUserAggregator`'s new `user_turn_strategies` parameter instead.

        Returns:
            The turn analyzer instance if configured, None otherwise.
        r   Nr&   zmMethod 'turn_analyzer' is deprecated. Use `LLMUserAggregator`'s new `user_turn_strategies` parameter instead.)r3   r4   r5   r6   r7   r*   rK   r^   s     rP   rK   z BaseInputTransport.turn_analyzer   si     	$$& 	!!(+MM<"	 ||)))	 ||)))r_   framec                   K   d| _         d| _        | j                  j                  xs |j                  | _        | j                  j
                  r7| j                  j
                  j                  | j                         d{    | j                  j                  r/| j                  j                  j                  | j                         | j                  j                  r/| j                  j                  j                  | j                         | j                  j                  s| j                  j                  r| j                  j                  r | j                  j                  j                  nd}| j                  j                  r | j                  j                  j                  nd}| j                  t        ||       d{    yy7 M7 	w)zStart the input transport and initialize components.

        Args:
            frame: The start frame containing initialization parameters.
        FN
vad_paramsturn_params)r1   r-   r*   audio_in_sample_rater+   audio_in_filterstartrL   set_sample_raterK   r$   broadcast_framer   )rM   ra   rd   re   s       rP   rh   zBaseInputTransport.start   sX     # LL==[A[A[ <<'',,..44T5F5FGGG <<$$LL%%55d6G6GH <<%%LL&&66t7H7HI<<$$(B(B=A\\=V=V2299\`J?C||?Y?Y$,,44;;_cK&&(Z[ '   	 )C H"s%   B GGEGG	GGc                    K   | j                          d{    | j                  j                  r-| j                  j                  j                          d{    yy7 H7 w)zStop the input transport and cleanup resources.

        Args:
            frame: The end frame signaling transport shutdown.
        N_cancel_audio_taskr*   rg   stoprM   ra   s     rP   rn   zBaseInputTransport.stop  S      %%'''<<'',,..33555 ( 	( 6"   A#AAA#A!A#!A#c                 n   K   d| _         | j                          d{    | j                          y7 w)z{Pause the input transport temporarily.

        Args:
            frame: The stop frame signaling transport pause.
        TN)r1   rm   _create_audio_taskro   s     rP   pausezBaseInputTransport.pause  s2      %%'''! 	(s   535c                    K   | j                          d{    | j                  j                  r-| j                  j                  j                          d{    yy7 H7 w)zCancel the input transport and stop all processing.

        Args:
            frame: The cancel frame signaling immediate cancellation.
        Nrl   ro   s     rP   cancelzBaseInputTransport.cancel   rp   rq   c                 ,   K   | j                          yw)zCalled when the transport is ready to stream.

        Args:
            frame: The start frame containing initialization parameters.
        N)rs   ro   s     rP   set_transport_readyz&BaseInputTransport.set_transport_ready,  s      	!s   c                    K   | j                   j                  r'| j                  s| j                  |       d{    yyy7 w)zPush a video frame downstream if video input is enabled.

        Args:
            frame: The input video frame to process.
        N)r*   r<   r1   
push_framero   s     rP   push_video_framez#BaseInputTransport.push_video_frame5  s8      <<((//%((( 2>((s   7AAAc                    K   | j                   j                  r1| j                  s$| j                  j	                  |       d{    yyy7 w)zPush an audio frame to the processing queue if audio input is enabled.

        Args:
            frame: The input audio frame to process.
        N)r*   r8   r1   _audio_in_queueputro   s     rP   push_audio_framez#BaseInputTransport.push_audio_frame>  s@      <<((&&**5111 2>(1s   AAAA	directionc                   K   t         |   ||       d{    t        |t              r4| j	                  ||       d{    | j                  |       d{    yt        |t              r4| j                  |       d{    | j	                  ||       d{    yt        |t              r4| j                  |       d{    | j	                  ||       d{    yt        |t              r4| j                  |       d{    | j	                  ||       d{    yt        |t              r?t        j                  d       | j                  t         j"                  d       d{    yt        |t$              r?t        j                  d       | j                  t         j&                  d       d{    yt        |t(              r| j	                  ||       d{    yt        |t*              r4| j	                  ||       d{    | j-                  |       d{    yt        |t.              r4| j	                  ||       d{    | j1                  |       d{    yt        |t2              r| j4                  j6                  r| j4                  j6                  j9                  |j:                         | j=                  t>        |j:                  | j4                  j@                  r | j4                  j@                  j:                  nd       d{    yyt        |tB              rD| j4                  jD                  r.| j4                  jD                  j                  |       d{    y| j	                  ||       d{    y7 7 e7 O7 (7 7 7 7 7 7 I7 7 7 7 7 o7 Y7 7 R7 9w)zProcess incoming frames and handle transport-specific logic.

        Args:
            frame: The frame to process.
            direction: The direction of frame flow in the pipeline.
        NzEmulating user started speakingTemulatedzEmulating user stopped speakingrc   )#r(   process_frame
isinstancer   rz   rh   r   rv   r	   '_deprecated_handle_bot_started_speakingr
   '_deprecated_handle_bot_stopped_speakingr   r   rT   $_deprecated_handle_user_interruptionr   SPEAKINGr   QUIETr   r   rn   r   rt   r   r*   rL   
set_paramsr$   rj   r   rK   r   rg   )rM   ra   r   rO   s      rP   r   z BaseInputTransport.process_frameK  s     g#E9555 eZ( //%333**U###{+++e$$$//%33367>>uEEE//%33367>>uEEE//%333>?LL:;;;H<M<MX\;]]]>?LL:;;;HNNUY;ZZZ{+//%333x( //%333))E"""y)//%333**U### 34||(())44U\\B**,$||||11 !% : : A A +    ) 89dll>Z>Z,,..<<UCCC //%333i 	6 4#$3E3E3 ^ [ 4
 4"3# D 4sW  ON#)ON&ON))ON,ON/)ON2	O"N5#)ON8O&N;'AO5N>6AOO*O/O0*OOO3O
4*OOO7O8B5O-O.AOOOOO&O)O,O/O2O5O8O;O>OOOO
OOOOOOc                     | j                   sU| j                  j                  r>t        j                         | _        | j                  | j                               | _         yyy)z;Create the audio processing task if audio input is enabled.N)r0   r*   r8   asyncioQueuer}   create_task_audio_task_handlerrY   s    rP   rs   z%BaseInputTransport._create_audio_task  sJ    DLL$A$A#*==?D #//0H0H0JKD %BrW   c                 ~   K   | j                   r+| j                  | j                          d{    d| _         yy7 w)z-Cancel and cleanup the audio processing task.N)r0   cancel_taskrY   s    rP   rm   z%BaseInputTransport._cancel_audio_task  s9     ""4#3#3444#D 4s   +=;=c                   K   t         j                  }d}	 	 t        j                  | j                  j                         t               d{   }d}| j                  j                  r<| j                  j                  j                  |j                         d{   |_
        |j                  s| j                  j                          |}| j                  j                  r| j                  ||       d{   }| j                  j                  r| j                  |||       d{    | j                  j                  r+|t         j                   k(  r| j#                          d{    | j                  j$                  r| j'                  |       d{    | j                  j                          7 o7 #7 7 7 Y7 ,# t        j(                  $ r |sY | j*                  rt-        j.                  d       t         j                  }| j                  j                  r$| j                  j                  j1                          | j                  j                  r)| j3                  t         j                         d{  7   nf| j                  j                  r*| j                  j                  j4                  j6                  nd}| j'                  t9        |             d{  7   Y =w xY ww)z=Main audio processing task handler for VAD and turn analysis.FT)timeoutNzGForcing VAD user stopped speaking due to timeout receiving audio frame!        	stop_secs)r   r   r   wait_forr}   getAUDIO_INPUT_TIMEOUT_SECSr*   rg   filteraudio	task_donerL   _deprecated_handle_vadrK   _deprecated_run_turn_analyzerr   #_deprecated_user_currently_speakingr:   rz   TimeoutErrorr-   r   warningclearr   r$   r   r   )rM   	vad_stateaudio_receivedra   previous_vad_stater   s         rP   r   z&BaseInputTransport._audio_task_handler  sU    &nn	 <`292B2B((,,.8P3 -
 "& <<//(,(D(D(K(KEKK(X"XEK {{((224 &/"<<,,&*&A&A%&S SI<<--<<UIOabbb<<,,h>O>O1OBBDDD <<44//%000$$..0M - #Y !T c E
 1 '' `% &&NNa !)I||112288:||11"GGWWW  $||88 !LL55<<FF!$ "
 #oo.IT].^___-`s   K;6G
 F<AG
 F?/G
 K;-G
 :G;0G
 +G,A G
 ,G-.G
 GG
 :K;<G
 ?G
 G
 G
 G
 G
 
K8 K;#B$K8J
A&K8.K1/K84K;7K88K;audio_framec                    K   t         j                  }| j                  j                  r7| j                  j                  j	                  |j
                         d{   }|S 7 w)z'Analyze audio frame for voice activity.N)r   r   r*   rL   analyze_audior   )rM   r   states      rP   _deprecated_vad_analyzez*BaseInputTransport._deprecated_vad_analyze  sJ     <<$$,,33AA+BSBSTTE Us   AA$A"A$r   c                   K   | j                  |       d{   }||k7  r|t        j                  k7  r|t        j                  k7  r|t        j                  k(  rf| j
                  j                  r*| j
                  j                  j                  j                  nd}| j                  t        |             d{    nx|t        j                  k(  re| j
                  j                  r*| j
                  j                  j                  j                  nd}| j                  t        |             d{    |}|S 7 '7 7 wHHandle Voice Activity Detection results and generate appropriate frames.Nr   )
start_secsr   )r   r   STARTINGSTOPPINGr   r*   rL   r$   r   rz   r   r   r   r   )rM   r   r   new_vad_stater   r   s         rP   _deprecated_new_handle_vadz-BaseInputTransport._deprecated_new_handle_vad  s     #::;GGY&!2!22!2!22 1 11 ||00 LL--44?? 
 oo&AZ&XYYY(..0BF,,B[B[DLL--44>>ad  oo&AI&VWWW%I) H Z
 Xs5   ED>B$E<E=A8E5E6	EEEc                    K   | j                   j                  s| j                  r| j                  ||       d{   S | j	                  ||       d{   S 7 7 w)r   N)r*   rK   _deprecated_openaillmcontext_deprecated_old_handle_vadr   )rM   r   r   s      rP   r   z)BaseInputTransport._deprecated_handle_vad  sS      <<%%)J)J88iPPP88iPPP QPs!   8AAAAAAc                    K   t        j                          | j                  z
  }|| j                  k\  r7| j                  t               d{    t        j                          | _        yy7 w)zHandle user speaking frame.N)timer.   r/   rj   r   )rM   	diff_times     rP   r   z6BaseInputTransport._deprecated_user_currently_speaking  sU     IIK$"@"@@	888&&'8999-1YY[D* 99s   A	A-A+ A-c                    K   d| _         yw)z3Update bot speaking state when bot starts speaking.TNr,   ro   s     rP   r   z:BaseInputTransport._deprecated_handle_bot_started_speaking  s     !   	c                    K   d| _         yw)z2Update bot speaking state when bot stops speaking.FNr   ro   s     rP   r   z:BaseInputTransport._deprecated_handle_bot_stopped_speaking  s     "r   r   c                 8  K   |t         j                  k(  rt        j                  d       d| _        | j                  t        |       d{    | j                   xs | j                   }|r%| j                  r| j                          d{    y| j                  r#| j                  rt        j                  d       yyy|t         j                  k(  r<t        j                  d       d| _        | j                  t        |       d{    yy7 7 7 
w)z8Handle user interruption events based on speaking state.zUser started speakingTr   NzkUser started speaking while bot is speaking with interruption config - deferring interruption to aggregatorzUser stopped speakingF)r   r   r   rT   r-   rj   r   interruption_strategiesr,   _allow_interruptionsbroadcast_interruptionr   r   )rM   r   r   "should_push_immediate_interruptions       rP   r   z7BaseInputTransport._deprecated_handle_user_interruption  s     )))LL01"&D&&'?(&SSS 000J8J8J4J /
 2d6O6O11333--$2D2D; 3E-
 (..(LL01"'D&&'?(&SSS	 )# T 4 Ts8   A
DDADDA>DDDDDc                   K   | j                  |       d{   }||k7  r|t        j                  k7  r|t        j                  k7  rod}| j                  j
                  du xs! | j                  j
                  j                   }|t        j                  k(  rx| j                  j                  r*| j                  j                  j                  j                  nd}| j                  t        |             d{    |rt        j                  }n|t        j                  k(  rw| j                  j                  r*| j                  j                  j                  j                  nd}| j                  t        |             d{    |rt        j                  }|r| j!                  |       d{    |}|S 7 7 7 :7 wr   )r   r   r   r   r*   rK   speech_triggeredr   rL   r$   r   rz   r   r   r   r   r   )rM   r   r   r   interruption_statecan_create_user_framesr   r   s           rP   r   z-BaseInputTransport._deprecated_old_handle_vad=  s     #::;GGY&!2!22!2!22!% **d2 C||11BBB #  1 11 ||00 LL--44?? 
 oo&AZ&XYYY))1):):&(..0BF,,B[B[DLL--44>>ad  oo&AI&VWWW))1&!??@RSSS%II H* Z X
 TsG   GF;C"G:F>;B
GG ,G2G3	G>G GGc                   K   | j                   j                  rb| j                   j                  j                          d{   \  }}| j                  |       d{    | j	                  |       d{    yy7 ;7 !7 
w)z<Handle end-of-turn analysis and generate prediction results.N)r*   rK   analyze_end_of_turn$_deprecated_handle_prediction_result'_deprecated_handle_end_of_turn_complete)rM   r   
predictions      rP   _deprecated_handle_end_of_turnz1BaseInputTransport._deprecated_handle_end_of_turng  sk     <<%%&*ll&@&@&T&T&V VE:;;JGGG>>uEEE & VGEs5   >B A<BA>B5B 6B>B Br   c                    K   |t         j                  k(  r(| j                  t        j                         d{    yy7 w)z*Handle completion of end-of-turn analysis.N)r   COMPLETEr   r   r   )rM   r   s     rP   r   z:BaseInputTransport._deprecated_handle_end_of_turn_completen  s4     N+++;;HNNKKK ,Ks   6A?Aresultc                 X   K   | j                  t        |g             d{    y7 w)z8Handle a prediction result event from the turn analyzer.)dataN)rz   r   )rM   r   s     rP   r   z7BaseInputTransport._deprecated_handle_prediction_results  s     ool9:::s    *(*r   c                   K   |t         j                  k(  xs |t         j                  k(  }| j                  j                  j                  |j                  |      }|t        j                  k(  r| j                  |       d{    y|t         j                  k(  r||k7  r| j                          d{    yyy7 87 	w)z4Run turn analysis on audio frame and handle results.N)r   r   r   r*   rK   append_audior   r   r   r   r   r   )rM   ra   r   r   	is_speechend_of_turn_states         rP   r   z0BaseInputTransport._deprecated_run_turn_analyzerw  s      !2!22Ti8CTCT6T	 LL66CCEKKQZ[ 7 77>>?PQQQ(..(Y:L-L55777 .M( R 8s$   B B?B;0B?3B=4B?=B?)F)6__name__
__module____qualname____doc__r!   r)   boolrV   rZ   propertyintr\   r   r   rL   r   rK   r   rh   r   rn   r   rt   r   rv   rx   r   r{   r   r   r   r   r   rs   rm   r   r   r   r   r   r   r	   r   r
   r   r   r   r   r   r   r   r   r   __classcell__)rO   s   @rP   r#   r#   6   s   ^ ^@8t 8 8 !S ! ! )h{3 ) ), *x(89 * *,   F
6 
6
" 
"
6+ 
6"z "),> )2,> 2;4 ;4> ;4BL$C`Z9K PX -:B	4Q-Q:BQ	Q9"CZ "#CZ #
 5:T!T-1T>(-(:B(	(TFL> L
; ;8'84<8RZ8rW   r#   ),r   r   r   typingr   logurur   %pipecat.audio.turn.base_turn_analyzerr   r   pipecat.audio.vad.vad_analyzerr   r   pipecat.frames.framesr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   pipecat.metrics.metricsr   "pipecat.processors.frame_processorr   r    !pipecat.transports.base_transportr!   r   r#   r'   rW   rP   <module>r      s`        A     . 0 M = L	8 L	8rW   