
    qi58                     .   d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ dd	lm Z  dd
l!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- dZ.e G d de+             Z/ G d de       Z0y)a  HeyGen implementation for Pipecat.

This module provides integration with the HeyGen platform for creating conversational
AI applications with avatars. It manages conversation sessions and provides real-time
audio/video streaming capabilities through the HeyGen API.
    N)	dataclass)OptionalUnion)logger)create_stream_resampler)AudioRawFrameBotStartedSpeakingFrameCancelFrameEndFrameFrameImageRawFrameOutputAudioRawFrameOutputImageRawFrameOutputTransportReadyFrameSpeechOutputAudioRawFrame
StartFrameTTSAudioRawFrameTTSStartedFrameUserStartedSpeakingFrameUserStoppedSpeakingFrame)FrameDirectionFrameProcessorSetup)	AIService)NewSessionRequest)LiveAvatarNewSessionRequest)HEY_GEN_SAMPLE_RATEHeyGenCallbacksHeyGenClientServiceType)ServiceSettings)TransportParamsgffffff?c                       e Zd ZdZy)HeyGenVideoSettingsz&Settings for the HeyGen video service.N)__name__
__module____qualname____doc__     O/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/heygen/video.pyr#   r#   8   s    0r)   r#   c                   P    e Zd ZU dZeZeed<   dddddedej                  de
eeef      de
e   d	e
e   d
df fdZdef fdZ fdZd ZdefdZdefdZdefdZdefdZdef fdZdef fdZdef fdZde de!f fdZ"d
e#fdZ$d Z%d Z&d  Z'd! Z(de)fd"Z*d# Z+ xZ,S )$HeyGenVideoServicea  A service that integrates HeyGen's interactive avatar capabilities into the pipeline.

    This service manages the lifecycle of a HeyGen avatar session by handling bidirectional
    audio/video streaming, avatar animations, and user interactions. It processes various frame types
    to coordinate the avatar's behavior and maintains synchronization between audio and video streams.

    The service supports:

    - Real-time avatar animation based on audio input
    - Voice activity detection for natural interactions
    - Interrupt handling for more natural conversations
    - Audio resampling for optimal quality
    - Automatic session management

    Args:
        api_key (str): HeyGen API key for authentication
        session (aiohttp.ClientSession): HTTP client session for API requests
        session_request (NewSessionRequest, optional): Configuration for the HeyGen session.
            Defaults to using the "Shawn_Therapist_public" avatar with "v2" version.
    	_settingsN)session_requestservice_typesettingsapi_keysessionr.   r/   r0   returnc                   t        d      }||j                  |       t        |   dd|i| || _        || _        d| _        d| _        t               | _	        d| _
        || _        || _        d| _        d| _        d| _        y)a  Initialize the HeyGen video service.

        Args:
            api_key: HeyGen API key for authentication
            session: HTTP client session for API requests
            session_request: Configuration for the HeyGen session
            service_type: Service type for the avatar session
            settings: Runtime-updatable settings. HeyGen has no model concept, so this
                is primarily used for the ``extra`` dict.
            **kwargs: Additional arguments passed to parent AIService
        N)modelr0   Fr   r(   )r    apply_updatesuper__init___api_key_session_client
_send_taskr   
_resampler_is_interrupting_session_request_service_type_other_participant_has_joined	_event_id_audio_chunk_size)	selfr1   r2   r.   r/   r0   kwargsdefault_settings	__class__s	           r*   r8   zHeyGenVideoService.__init__X   s    * +6))(3="2=f=/32613 % /)-2*!"r)   setupc                   K   t         |   |       d{    t        | j                  | j                  t        ddddt              | j                  | j                  t        | j                  | j                  | j                        d      | _        | j                  j                  |       d{    y7 7 w)a  Set up the HeyGen video service with necessary configuration.

        Initializes the HeyGen client, establishes connections, and prepares the service
        for audio/video processing. This includes setting up audio/video streams,
        configuring callbacks, and initializing the resampler.

        Args:
            setup: Configuration parameters for the frame processor.
        Ni  T)audio_in_sample_rateaudio_in_enabledvideo_in_enabledaudio_out_enabledaudio_out_sample_rate)on_connectedon_participant_connectedon_participant_disconnected)r1   r2   paramsr.   r/   	callbacksconnect_as_user)r7   rH   r   r9   r:   r!   r   r?   r@   r   _on_connected_on_participant_connected_on_participant_disconnectedr;   )rD   rH   rG   s     r*   rH   zHeyGenVideoService.setup~   s      gmE"""#MMMM"%*!%!%"&&9 !11++%!//)-)G)G,0,M,M
 !#
& ll  ''') 	#( 	(s"   B>B:BB>4B<5B><B>c                    K   t         |           d{    | j                  j                          d{    d| _        y7 .7 w)zClean up the service and release resources.

        Terminates the HeyGen client session and cleans up associated resources.
        N)r7   cleanupr;   )rD   rG   s    r*   rY   zHeyGenVideoService.cleanup   s?     
 goll""$$$ 	 $s   AA!AAAAc                 6   K   t        j                  d       yw)z%Handle bot connected to LiveKit room.z$HeyGen bot connected to LiveKit roomNr   inforD   s    r*   rU   z HeyGenVideoService._on_connected   s     :;s   participant_idc                 $  K   t        j                  d|        | j                  sdd| _        | j                  j	                  || j
                         d{    | j                  j                  || j                         d{    yy7 47 w)z$Handle participant connected events.zParticipant connected TN)r   r\   rA   r;   capture_participant_video_on_participant_video_framecapture_participant_audio_on_participant_audio_datarD   r^   s     r*   rV   z,HeyGenVideoService._on_participant_connected   s     ,^,<=>1115D.,,88 @ @   ,,88 ? ?   2s$   ABB-BBBBc                 <   K   t        j                  d|        yw)z'Handle participant disconnected events.zParticipant disconnected Nr[   rd   s     r*   rW   z/HeyGenVideoService._on_participant_disconnected   s     //?@As   video_framec                    K   t        |j                  |j                  |j                        }| j	                  |       d{    y7 w)z/Handle incoming video frames from participants.)imagesizeformatN)r   rh   ri   rj   
push_frame)rD   rf   frames      r*   ra   z.HeyGenVideoService._on_participant_video_frame   s?     ###!!%%

 ooe$$$   AAA	Aaudio_framec                    K   t        |j                  |j                  |j                        }| j	                  |       d{    y7 w)z-Handle incoming audio data from participants.)audiosample_ratenum_channelsN)r   rp   rq   rr   rk   )rD   rn   rl   s      r*   rc   z-HeyGenVideoService._on_participant_audio_data   s?     )###//$11

 ooe$$$rm   rl   c                   K   t         |   |       d{    t        t        dz  dz        | _        | j
                  j                  || j                         d{    | j                          d{    y7 e7 7 	w)a  Start the HeyGen video service and initialize the avatar session.

        Creates necessary tasks for audio/video processing and establishes
        the connection with the HeyGen service.

        Args:
            frame: The start frame containing initialization parameters.
        N      )r7   startintr   rC   r;   _create_send_taskrD   rl   rG   s     r*   rv   zHeyGenVideoService.start   su      gmE"""!$&9A&=%C!Dll  (>(>???$$&&&	 	# 	@&s4   BA<ABA>B6B 7B>B Bc                    K   t         |   |       d{    | j                          d{    | j                          d{    y7 57 7 	w)zStop the HeyGen video service gracefully.

        Performs cleanup by ending the conversation and cancelling ongoing tasks
        in a controlled manner.

        Args:
            frame: The end frame.
        N)r7   stop_end_conversation_cancel_send_taskry   s     r*   r{   zHeyGenVideoService.stop   sL      gl5!!!$$&&&$$&&& 	"&&1   AAAAAAAAAc                    K   t         |   |       d{    | j                          d{    | j                          d{    y7 57 7 	w)zCancel the HeyGen video service.

        Performs an immediate termination of the service, cleaning up resources
        without waiting for ongoing operations to complete.

        Args:
            frame: The cancel frame.
        N)r7   cancelr|   r}   ry   s     r*   r   zHeyGenVideoService.cancel   sL      gnU###$$&&&$$&&& 	$&&r~   	directionc                 ~  K   t         |   ||       d{    t        |t              r3| j	                          d{    | j                  ||       d{    yt        |t              r=| j                  j                          d{    | j                  ||       d{    yt        |t              r5| j                  j                          | j                  ||       d{    yt        |t              r| j                  |       d{    yt        |t              r| j                          d{    yt        |t              r3| j!                          d{    | j                  ||       d{    y| j                  ||       d{    y7 7 f7 O7 7 7 7 7 v7 O7 77 w)a  Process incoming frames and coordinate avatar behavior.

        Handles different types of frames to manage avatar interactions:
        - UserStartedSpeakingFrame: Activates avatar's listening animation
        - UserStoppedSpeakingFrame: Deactivates avatar's listening state
        - TTSAudioRawFrame: Processes audio for avatar speech
        - Other frames: Forwards them through the pipeline

        Args:
            frame: The frame to be processed.
            direction: The direction of frame processing (input/output).
        N)r7   process_frame
isinstancer   _handle_user_started_speakingrk   r   r;   stop_agent_listeningr   transport_readyr   _handle_audio_framer   start_ttfb_metricsr	   stop_ttfb_metrics)rD   rl   r   rG   s      r*   r   z HeyGenVideoService.process_frame   sh     g#E9555e5644666//%33378,,33555//%33389LL((*//%333/0**5111/))+++67 ((***//%333//%333- 	6 7353 41+
 +33s   F=F"'F=F% F=F(2F=F+F=&F.'AF=+F1,)F=F3(F=>F5?(F='F7(F=F9F=F;F=%F=(F=+F=.F=1F=3F=5F=7F=9F=;F=c                      y)zzCheck if the service can generate metrics.

        Returns:
            True if metrics generation is supported.
        Tr(   r]   s    r*   can_generate_metricsz'HeyGenVideoService.can_generate_metrics  s     r)   c                 6  K   d| _         | j                  j                  | j                         d{    | j	                          d{    d| _         | j                          d{    | j                  j                          d{    y7 ^7 H7 +7 w)aS  Handle the event when a user starts speaking.

        Manages the interruption flow by:
        1. Setting the interruption flag
        2. Signaling the client to interrupt current speech
        3. Cancelling ongoing audio sending tasks
        4. Creating a new send task
        5. Activating the avatar's listening animation
        TNF)r>   r;   	interruptrB   r}   rx   start_agent_listeningr]   s    r*   r   z0HeyGenVideoService._handle_user_started_speaking#  s      !%ll$$T^^444$$&&& %$$&&&ll00222	 	5&&2sE   0BBB
BB)B*!BBBBBBc                 b   K   d| _         | j                  j                          d{    y7 w)zEnd the current conversation and reset state.

        Stops the HeyGen client and cleans up conversation-specific resources.
        FN)rA   r;   r{   r]   s    r*   r|   z$HeyGenVideoService._end_conversation4  s&     
 .3*ll!!!s   %/-/c                    K   | j                   s>t        j                         | _        | j	                  | j                               | _         yyw)z2Create the audio sending task if it doesn't exist.N)r<   asyncioQueue_queuecreate_task_send_task_handlerr]   s    r*   rx   z$HeyGenVideoService._create_send_task<  s:     !--/DK"..t/F/F/HIDO s   AAc                 ~   K   | j                   r+| j                  | j                          d{    d| _         yy7 w)z+Cancel the audio sending task if it exists.N)r<   cancel_taskr]   s    r*   r}   z$HeyGenVideoService._cancel_send_taskB  s4     ??""4??333"DO 3s   +=;=c                 V   K   | j                   j                  |       d{    y7 w)zQueue an audio frame for processing.

        Places the audio frame in the processing queue for synchronized
        delivery to the HeyGen service.

        Args:
            frame: The audio frame to process.
        N)r   put)rD   rl   s     r*   r   z&HeyGenVideoService._handle_audio_frameH  s      kkooe$$$s   )')c                   K   | j                   j                  }t               }d| _        	 	 t	        j
                  | j                  j                         t               d{   }| j                  ryt        |t              r| j                  t        |j                        | _        | j                  j                  |j                   |j"                  |       d{   }|j%                  |       t'        |      | j(                  k\  rn|d| j(                   }|| j(                  d }| j                   j+                  t-        |      | j                         d{    t'        |      | j(                  k\  rn| j                  j/                          k7 47 7 ># t        j0                  $ rT | j                  E| j                   j3                  | j                         d{  7   d| _        |j5                          Y ow xY ww)zHandle sending audio frames to the HeyGen client.

        Continuously processes audio frames from the queue and sends them to the
        HeyGen client. Handles timeouts and silence detection for proper audio
        streaming management.
        N)timeout)r;   out_sample_rate	bytearrayrB   r   wait_forr   getAVATAR_VAD_STOP_SECSr>   r   r   stridr=   resamplerp   rq   extendlenrC   agent_speakbytes	task_doneTimeoutErroragent_speak_endclear)rD   rq   audio_bufferrl   rp   chunks         r*   r   z%HeyGenVideoService._send_task_handlerS  s     ll22 {)%..t{{/@J^__((e%56~~-),UXX"&//":":U%6%6# E !''.l+t/E/EE ,-Et/E/E F'3D4J4J4L'M"ll66uU|T^^TTT	 l+t/E/EE
 %%'' _ U'' )>>-,,66t~~FFF%)DN &&()s   )H6F "F#F 3H4A*F FA=F FF :F HF F F AH$G'%HHHH)-r$   r%   r&   r'   r#   Settings__annotations__r   aiohttpClientSessionr   r   r   r   r   r8   r   rH   rY   rU   rV   rW   r   ra   r   rc   r   rv   r   r{   r
   r   r   r   r   boolr   r   r|   rx   r}   r   r   r   __classcell__)rG   s   @r*   r,   r,   ?   s;   * #H"" \`.226$# $# &&	$#
 "%(CEV(V"WX$# {+$# ./$# 
$#L(!4 (@<
c 
B B%] %%M %' '' ''+ '#4 #4> #4Jd 3""J#	%/B 	%$)r)   r,   )1r'   r   dataclassesr   typingr   r   r   logurur   pipecat.audio.utilsr   pipecat.frames.framesr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   "pipecat.processors.frame_processorr   r   pipecat.services.ai_servicer   .pipecat.services.heygen.api_interactive_avatarr   &pipecat.services.heygen.api_liveavatarr   pipecat.services.heygen.clientr   r   r   r   pipecat.services.settingsr    !pipecat.transports.base_transportr!   r   r#   r,   r(   r)   r*   <module>r      s     ! "   7    " S 1 L N  6 =   	/ 	 	x) x)r)   