
    qiO6                         d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZ dddddddd	d	d
	dede
eee   f   dededededededee   dee   dededefdZ G d d      ZdededefdZdedefdZd  Zy)!zAWS Transcribe utility functions and classes for WebSocket streaming.

This module provides utilities for creating presigned URLs, building event messages,
and handling AWS event stream protocol for real-time transcription services.
    N)DictOptionalpcmi>     ThighF)	media_encodingsample_ratenumber_of_channels$enable_partial_results_stabilizationpartial_results_stabilityvocabulary_namevocabulary_filter_nameshow_speaker_labelenable_channel_identificationregioncredentialslanguage_coder   r	   r
   r   r   r   r   r   r   returnc                     |j                  d      }|j                  d      }|j                  d      }|r|st        d      t        ||||       }|j                  |||||	|
||||
      S )aX  Create a presigned URL for AWS Transcribe streaming.

    Args:
        region: AWS region for the service.
        credentials: Dictionary containing AWS credentials. Must include
            'access_key' and 'secret_key', with optional 'session_token'.
        language_code: Language code for transcription (e.g., "en-US").
        media_encoding: Audio encoding format. Defaults to "pcm".
        sample_rate: Audio sample rate in Hz. Defaults to 16000.
        number_of_channels: Number of audio channels. Defaults to 1.
        enable_partial_results_stabilization: Whether to enable partial result stabilization.
        partial_results_stability: Stability level for partial results.
        vocabulary_name: Custom vocabulary name to use.
        vocabulary_filter_name: Vocabulary filter name to apply.
        show_speaker_label: Whether to include speaker labels.
        enable_channel_identification: Whether to enable channel identification.

    Returns:
        Presigned WebSocket URL for AWS Transcribe streaming.

    Raises:
        ValueError: If required AWS credentials are missing.
    
access_key
secret_keysession_tokenzAWS credentials are required)r   r   r   r   )
r	   r   r   r   r   r   r   r
   r   r   )get
ValueErrorAWSTranscribePresignedURLget_request_url)r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   url_generators                   L/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/aws/utils.pyget_presigned_urlr      s    L .J.JOOO4MZ788 .*MZ`M
 ((#%'5-&C--Q"; )      c                   t    e Zd ZdZ	 ddedededefdZ	 	 	 	 	 	 	 	 	 ddeded	ed
edededededededefdZy)r   zGenerator for AWS Transcribe presigned WebSocket URLs.

    Handles AWS Signature Version 4 signing process to create authenticated
    WebSocket URLs for streaming transcription requests.
    r   r   r   r   c                 *   || _         || _        || _        d| _        d| _        || _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        y)a*  Initialize the presigned URL generator.

        Args:
            access_key: AWS access key ID.
            secret_key: AWS secret access key.
            session_token: AWS session token for temporary credentials.
            region: AWS region for the service. Defaults to "us-east-1".
        GET
transcribe z/stream-transcription-websockethostzAWS4-HMAC-SHA256N)r   r   r   methodservicer   endpointr&   amz_date	datestampcanonical_uricanonical_headerssigned_headers	algorithmcredential_scopecanonical_querystringpayload_hashcanonical_requeststring_to_sign	signaturerequest_url)selfr   r   r   r   s        r   __init__z"AWSTranscribePresignedURL.__init___   s     %$*#	>!#$+ "%'"!# r    r	   r   r   r   r   r   r   r
   r   r   r   c                 4   d| j                    d| _        d| j                    d| _        t        j                  j	                         }|j                  d      | _        |j                  d      | _        d| j                   d| _        | j                   d| j                    d| j                   d	| _
        d
| j                  z   | _        | xj                  d| j                  z   dz   | j                  z   z  c_        | xj                  d| j                  z   z  c_        | xj                  dz  c_        | j                  rA| xj                  dt        j                   j#                  | j                  d      z   z  c_        | xj                  d| j$                  z   z  c_        |r| xj                  dz  c_        |	r| xj                  dz  c_        |r| xj                  d|z   z  c_        |r| xj                  d|z   z  c_        |dkD  r!| xj                  dt'        |      z   z  c_        |
r| xj                  d|
z   z  c_        |r!| xj                  dt'        |      z   z  c_        |r| xj                  dz  c_        |r| xj                  d|z   z  c_        |r| xj                  d|z   z  c_        t)        j*                  dj-                  d            j/                         | _        | j2                   d| j4                   d| j                   d| j                   d| j$                   d| j0                   | _        | j                   d| j                    d| j                   d}| j                   d| j                   d| dt)        j*                  | j6                  j-                  d            j/                         z   }t9        j:                  d | j<                   j-                  d      | j                  j-                  d      t(        j*                        j?                         }t9        j:                  || j                   j-                  d      t(        j*                        j?                         }t9        j:                  || j                  j-                  d      t(        j*                        j?                         }t9        j:                  |d!t(        j*                        j?                         }t9        j:                  ||j-                  d      t(        j*                        j/                         | _         | xj                  d"| j@                  z   z  c_        | j                  | j4                  z   d#z   | j                  z   | _!        | jB                  S )$a6  Generate a presigned WebSocket URL for AWS Transcribe.

        Args:
            sample_rate: Audio sample rate in Hz.
            language_code: Language code for transcription.
            media_encoding: Audio encoding format.
            vocabulary_name: Custom vocabulary name.
            vocabulary_filter_name: Vocabulary filter name.
            show_speaker_label: Whether to include speaker labels.
            enable_channel_identification: Whether to enable channel identification.
            number_of_channels: Number of audio channels.
            enable_partial_results_stabilization: Whether to enable partial result stabilization.
            partial_results_stability: Stability level for partial results.

        Returns:
            Presigned WebSocket URL with authentication parameters.
        zwss://transcribestreaming.z.amazonaws.com:8443ztranscribestreaming.z%Y%m%dT%H%M%SZz%Y%m%dzhost:
z%2Fz%2Faws4_requestzX-Amz-Algorithm=z&X-Amz-Credential=z&X-Amz-Date=z&X-Amz-Expires=300z&X-Amz-Security-Token=r%   )safez&X-Amz-SignedHeaders=z#&enable-channel-identification=truez*&enable-partial-results-stabilization=truez&language-code=z&media-encoding=r   z&number-of-channels=z&partial-results-stability=z&sample-rate=z&show-speaker-label=truez&vocabulary-filter-name=z&vocabulary-name=utf-8/z/aws4_requestAWS4s   aws4_requestz&X-Amz-Signature=?)"r   r)   r&   datetimeutcnowstrftimer*   r+   r-   r(   r0   r/   r1   r   r   urllibparsequoter.   strhashlibsha256encode	hexdigestr2   r'   r,   r3   hmacnewr   digestr5   r6   )r7   r	   r   r   r   r   r   r   r
   r   r   nowr0   r4   k_datek_region	k_service	k_signings                     r   r   z)AWSTranscribePresignedURL.get_request_url   s   < 5T[[MATU*4;;-7JK	&&(%56h/#(2!6#'>>"2#dkk]#dll^Sb c &8$..%H""" 4??2U:T=R=RR	
" 	""nt}}&DD"""&::"&&*BV\\EWEW"" FX F + & 	""&=@S@S&SS"(&&*OO&/&&*VV&&&*;m*KK&&&*<~*MM&!&&*@3GYCZ*ZZ&$&&*GJc*cc&&&/C<L*LL&&&*DD&!&&*DG]*]]&&&*=*OO& $NN299W+=>HHJ %)KK=43E3E2FbIcIcHddfgkg}g}f~  A  BF  BU  BU  AV  VX  Y]  Yj  Yj  Xk  "l #nn-Qt{{m1T\\N-X~~br2B1C2FnnT33::7CDNNPQ 	 4??#$++G4dnn6K6KG6TV]VdVd

&( 	 88FDKK$6$6w$?PWWYHHXt||':':7'CW^^T[[]	HHYHOOQ	~,,W5w~~

)+ 	
 	""&9DNN&JJ"  ==4+=+==CdF`F``r    N)z	us-east-1)	r%   r   r%   r%   FFr   Fr%   )	__name__
__module____qualname____doc__rF   r8   intboolr    r    r   r   r   X   s     S^+.?BLOH  #!&(#(.3"#5:)+f f  f  	f 
 f  !$f  !f  (,f   f  /3f  $'f  
f r    r   header_nameheader_valuec                    | j                  d      }t        t        |      g      }t        dg      }|j                  d      }t        j                  dt        |            }t               }|j                  |       |j                  |       |j                  |       |j                  |       |j                  |       |S )zBuild a header following AWS event stream format.

    Args:
        header_name: Name of the header.
        header_value: Value of the header.

    Returns:
        Encoded header as a bytearray following AWS event stream protocol.
    r<      >H)rI   byteslenstructpack	bytearrayextend)rZ   r[   namename_byte_length
value_typevaluevalue_byte_lengthheader_lists           r   get_headersrk      s     g&Dc$i[)sJ(ED#e*5 +K'(tz"()ur    payloadc                 *   t        dd      }t        dd      }t        dd      }t               }|j                  |       |j                  |       |j                  |       t        j                  dt        |      t        |       z   dz         }t        j                  dt        |            }t        d	gd
z        }||dd ||dd t        j                  dt        j                  |      dz        }t               }	|	j                  |       |	j                  |       |	j                  |       |	j                  |        t        |	      }
t        j                  dt        j                  |
      dz        }|	j                  |       t        |	      S )aM  Build an event message for AWS Transcribe streaming.

    Creates a properly formatted AWS event stream message containing audio data
    for real-time transcription. Follows the AWS event stream protocol with
    prelude, headers, payload, and CRC checksums.

    Args:
        payload: Raw audio bytes to include in the event message.

    Returns:
        Complete event message as bytes, ready to send via WebSocket.

    Note:
        Implementation matches AWS sample:
        https://github.com/aws-samples/amazon-transcribe-streaming-python-websockets/blob/main/eventstream.py
    z:content-typezapplication/octet-streamz:event-type
AudioEventz:message-typeevent>I   r      N       )	rk   rc   rd   ra   rb   r`   binasciicrc32r_   )rl   content_type_headerevent_type_headermessage_type_headerheaderstotal_byte_lengthheaders_byte_lengthpreludeprelude_crcmessage_as_listmessagemessage_crcs               r   build_event_messager     sb   $ &o7QR#M<@%ow?kGNN&'NN$%NN&' D#g,W*E*JK ++dCL9 a G#GBQK%GABK ++dHNN7$;j$HIK  kO7#;'7#7# O$G++dHNN7$;j$HIK ;'!!r    c                 ~   | dd }t        j                  d|      \  }}t        j                  d| dd       d   }| dd|z    }| d|z   d }t        j                  d| dd       d   }|t        j                  |      dz  k(  sJ d	       |t        j                  | dd       dz  k(  sJ d
       i }|rv|d   }	|dd|	z    j	                  d      }
|d|	z      }t        j                  d|d|	z   d|	z          d   }|d|	z   d|	z   |z    j	                  d      }|||
<   |d|	z   |z   d }|rv|t        j                  |      fS )a  Decode an AWS event stream message.

    Parses an AWS event stream message to extract headers and payload,
    verifying CRC checksums for data integrity.

    Args:
        message: Raw event stream message bytes received from AWS.

    Returns:
        A tuple of (headers, payload) where:

        - headers: Dictionary of parsed headers
        - payload: Dictionary of parsed JSON payload

    Raises:
        AssertionError: If CRC checksum verification fails.
    Nrr   z>IIrp      r   rt   zPrelude CRC check failedzMessage CRC check failedr   r<   r^      rs   )ra   unpackru   rv   decodejsonloads)r   r}   total_lengthheaders_lengthr~   rz   rl   r   headers_dictname_lenre   rg   	value_lenrh   s                 r   decode_eventr   <  s   & bqkG#)==#@ L.--gam4Q7Kb2./Gb>)B/G--gbcl3A6K (..1J>>Z@ZZ>(.."6CC_E__C L
1:q1x<(//8Q\*
MM$Hq8|(LMaP	Hq8|i'?@GGP"T!h,245  G,,,r    )rV   ru   r@   rG   rK   r   ra   urllib.parserC   typingr   r   rF   rW   rX   r   r   rc   rk   r_   r   r   rY   r    r   <module>r      s          !  15%+%),0$*/>> c8C=()> 	>
 > > > +/>  #> c]> %SM> > $(> 	>BN  N bS  	 46" 6"5 6"r)-r    