
    qi)              
       :   d Z ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ dZdefdZdefd	Zdefd
ZdededefdZdededefdZd ZdededefdZdedededefdZdedededefdZdedededefdZd ededededef
d!Zdedededefd"Zdedefd#Zy)$zAudio utility functions for Pipecat.

This module provides common audio processing utilities including mixing,
format conversion, volume calculation, and codec transformations for
various audio formats used in Pipecat pipelines.
    N)BaseAudioResamplerSOXRAudioResamplerSOXRStreamAudioResampler   returnc                      ddl }|j                         5  |j                  d       |j                  dt        d       ddd       t        di | S # 1 sw Y   xY w)a  Create a default audio resampler instance.

    .. deprecated:: 0.0.74
        This function is deprecated and will be removed in a future version.
        Use `create_stream_resampler` for real-time processing scenarios or
        `create_file_resampler` for batch processing of complete audio files.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRAudioResampler instance.
    r   Nalwaysz`create_default_resampler` is deprecated. Use `create_stream_resampler` for real-time processing scenarios or `create_file_resampler` for batch processing of complete audio files.   )
stacklevel )warningscatch_warningssimplefilterwarnDeprecationWarningr   )kwargsr   s     E/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/audio/utils.pycreate_default_resamplerr      se     		 	 	" 
h'T  	 	

 '''
 
s   *AAc                      t        di | S )zCreate an audio resampler instance for batch processing of complete audio files.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRAudioResampler instance.
    r   r   r   s    r   create_file_resamplerr   8   s     '''    c                      t        di | S )zCreate a stream audio resampler instance.

    Args:
        **kwargs: Additional keyword arguments passed to the resampler constructor.

    Returns:
        A configured SOXRStreamAudioResampler instance.
    r   r   r   s    r   create_stream_resamplerr   D   s     $-f--r   audio1audio2c                    t        j                  | t         j                        }t        j                  |t         j                        }t        t	        |      t	        |            }t        j
                  |d|t	        |      z
  fd      }t        j
                  |d|t	        |      z
  fd      }|j                  t         j                        |j                  t         j                        z   }t        j                  |dd      j                  t         j                        }|j                  t         j                        j                         S )a  Mix two audio streams together by adding their samples.

    Both audio streams are assumed to be 16-bit signed integer PCM data.
    If the streams have different lengths, the shorter one is zero-padded
    to match the longer stream.

    Args:
        audio1: First audio stream as raw bytes (16-bit signed integers).
        audio2: Second audio stream as raw bytes (16-bit signed integers).

    Returns:
        Mixed audio data as raw bytes with samples clipped to 16-bit range.
    dtyper   constant)modei i  )
np
frombufferint16maxlenpadastypeint32cliptobytes)r   r   data1data2
max_lengthpadded1padded2mixed_audios           r   	mix_audior4   P   s     MM&1EMM&1E SZU,J ffUQ
SZ 78zJGffUQ
SZ 78zJG ..*W^^BHH-EEK''+vu5<<RXXFKbhh'//11r   
left_audioright_audioc                 n   t        j                  | t         j                        }t        j                  |t         j                        }t        t	        |      t	        |            }|d| }|d| }t        j
                  ||f      }|j                  t         j                        j                         S )a	  Interleave left and right mono audio channels into stereo audio.

    Takes two mono audio streams and combines them into a single stereo
    stream by interleaving the samples (L, R, L, R, ...). If the channels
    have different lengths, both are truncated to the shorter length.

    Args:
        left_audio: Left channel audio as raw bytes (16-bit signed integers).
        right_audio: Right channel audio as raw bytes (16-bit signed integers).

    Returns:
        Interleaved stereo audio data as raw bytes.
    r    N)r$   r%   r&   minr(   column_stackr*   r-   )r5   r6   leftright
min_lengthstereos         r   interleave_stereo_audior>   o   s     ==2884DMM+RXX6ESYE
+JD+:E__dE]+F=="**,,r   c                 H    | |z
  ||z
  z  }t        dt        d|            }|S )a*  Normalize a value to the range [0, 1] and clamp it to bounds.

    Args:
        value: The value to normalize.
        min_value: The minimum value of the input range.
        max_value: The maximum value of the input range.

    Returns:
        Normalized value clamped to the range [0, 1].
    r      )r'   r8   )value	min_value	max_value
normalizednormalized_clampeds        r   normalize_valuerF      s2     )#	I(=>JQAz 23r   audiosample_ratec                    t        j                  | t         j                        }|j                  t         j                        }|j
                  |z  }t        j                  ||      }|j                  |      }t        |dd      }|S )a  Calculate the loudness level of audio data using EBU R128 standard.

    Uses the pyloudnorm library to calculate integrated loudness according
    to the EBU R128 recommendation, then normalizes the result to [0, 1].

    Args:
        audio: Audio data as raw bytes (16-bit signed integers).
        sample_rate: Sample rate of the audio in Hz.

    Returns:
        Normalized loudness value between 0 (quiet) and 1 (loud).
    r    )
block_sizeiP   )
r$   r%   r&   r*   float64sizepylnMeterintegrated_loudnessrF   )rG   rH   audio_npaudio_floatrJ   meterloudnesss          r   calculate_audio_volumerU      sm     }}U"((3H//"**-K,JJJ{z:E((5H xb1HOr   rA   
prev_valuefactorc                     ||| |z
  z  z   S )a  Apply exponential smoothing to a value.

    Exponential smoothing is used to reduce noise in time-series data by
    giving more weight to recent values while still considering historical data.

    Args:
        value: The new value to incorporate.
        prev_value: The previous smoothed value.
        factor: Smoothing factor between 0 and 1. Higher values give more
                weight to the new value.

    Returns:
        The exponentially smoothed value.
    r   )rA   rV   rW   s      r   exp_smoothingrY      s     %*"4555r   
ulaw_bytesin_rateout_rate	resamplerc                 t   K   t        j                  | d      }|j                  |||       d{   }|S 7 w)u  Convert μ-law encoded audio to PCM and optionally resample.

    Args:
        ulaw_bytes: μ-law encoded audio data as raw bytes.
        in_rate: Original sample rate of the μ-law audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        PCM audio data as raw bytes at the specified output rate.
    r   N)audioopulaw2linresample)rZ   r[   r\   r]   in_pcm_bytesout_pcm_bytess         r   ulaw_to_pcmrd      ?      ##J2L $,,\7HMMM N   -868	pcm_bytesc                 t   K   |j                  | ||       d{   }t        j                  |d      }|S 7 w)u  Convert PCM audio to μ-law encoding and optionally resample.

    Args:
        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
        in_rate: Original sample rate of the PCM audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        μ-law encoded audio data as raw bytes at the specified output rate.
    Nr   )ra   r_   lin2ulaw)rg   r[   r\   r]   rb   out_ulaw_bytess         r   pcm_to_ulawrk      ?      #++IwIIL %%lA6N J   868
alaw_bytesc                 t   K   t        j                  | d      }|j                  |||       d{   }|S 7 w)a  Convert A-law encoded audio to PCM and optionally resample.

    Args:
        alaw_bytes: A-law encoded audio data as raw bytes.
        in_rate: Original sample rate of the A-law audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        PCM audio data as raw bytes at the specified output rate.
    r   N)r_   alaw2linra   )rn   r[   r\   r]   rb   rc   s         r   alaw_to_pcmrq      re   rf   c                 t   K   |j                  | ||       d{   }t        j                  |d      }|S 7 w)a  Convert PCM audio to A-law encoding and optionally resample.

    Args:
        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
        in_rate: Original sample rate of the PCM audio in Hz.
        out_rate: Desired output sample rate in Hz.
        resampler: Audio resampler instance for rate conversion.

    Returns:
        A-law encoded audio data as raw bytes at the specified output rate.
    Nr   )ra   r_   lin2alaw)rg   r[   r\   r]   rb   out_alaw_bytess         r   pcm_to_alawru   	  rl   rm   c                     t        j                  | t         j                        }t        j                  |      j	                         }|t
        k  S )u-  Determine if an audio sample contains silence by checking amplitude levels.

    This function analyzes raw PCM audio data to detect silence by comparing
    the maximum absolute amplitude against a predefined threshold. The audio
    is expected to be clean speech or complete silence without background noise.

    Args:
        pcm_bytes: Raw PCM audio data as bytes (16-bit signed integers).

    Returns:
        bool: True if the audio sample is considered silence (below threshold),
              False otherwise.

    Note:
        Normal speech typically produces amplitude values between ±500 to ±5000,
        depending on factors like loudness and microphone gain. The threshold
        (SPEAKING_THRESHOLD) is set well below typical speech levels to
        reliably detect silence vs. speech.
    r    )r$   r%   r&   absr'   SPEAKING_THRESHOLD)rg   
audio_datarC   s      r   
is_silencerz     s?    * y9J z"&&(I ***r   )__doc__r_   numpyr$   
pyloudnormrN   -pipecat.audio.resamplers.base_audio_resamplerr   'pipecat.audio.resamplers.soxr_resamplerr   .pipecat.audio.resamplers.soxr_stream_resamplerr   rx   r   r   r   bytesr4   r>   rF   intfloatrU   rY   rd   rk   rq   ru   boolrz   r   r   r   <module>r      sj      L F S  (*< (8	('9 	(	.); 	.2e 2U 2u 2>- -E -e -4 % c e 66 6E 65 6U 6$ #/2?Q.   Pb * #/2?Q
.   Pb *+% +D +r   