
    IuigS              )          d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZ d Z	 	 	 d6dedee	e      de
ee	e         ded	ed
ee   fdZ	 	 d7dedee	e      dee	e      ded	ed
ee   fdZ	 	 d8dedee	e      ded	ed
ee   f
dZ	 	 d7dedee	e      ded	ed
ee   f
dZ	 d9dddddddd dddddddede	e   de
e	e      dedededededededed ed!e
e	e	e         d"e
eee	e   e	e   f      d#ed$ed
ee   f"d%Z	 	 d:d&d dddddd ddddd'dd(ded)ee	e   e	e	e      f   ded	ed*ed+ededededededed ed!e
e	e	e         d"e
eee	e   e	e   f      d,e
e	e      d-ed.eegef   d
ee   f&d/Z	 	 d:d&d dddddd ddddd'dd(ded)ee	e   e	e	e      f   ded	ed*ed+ededededededed ed!e
e	e	e         d"e
eee	e   e	e   f      d,e
e	e      d-ed.eegef   d
ee   f&d0Z G d1 d2      Zd3 Z d4 Z!d5 Z"y);    N)AsyncIterableCallableIterableListOptionalUnion)GenerationResultGenerationStepResult	GeneratorScoringResultTranslationResult
Translatorc                  *   t        t        dt               t        t        dt               t        t        dt               t        t
        dt               t        t
        dt               t        t
        dt               t        t
        dt               y)z4Registers additional attributes to compiled modules.translate_iterablescore_iterablegenerate_tokensgenerate_iterableasync_generate_tokensN)
setattrr   translator_translate_iterabletranslator_score_iterabletranslator_generate_tokensr   generator_generate_iterablegenerator_score_iterablegenerator_generate_tokensgenerator_async_generate_tokens     H/opt/pipecat/venv/lib/python3.12/site-packages/ctranslate2/extensions.pyregister_extensionsr       sd    J,.KLJ(*CDJ)+EFI*,GHI')ABI(*CDI.0OPr   
translatorsourcetarget_prefixmax_batch_size
batch_typereturnc              +   ~   K   |g}||j                  |       t        | j                  |||fi |E d{    y7 w)a6  Translates an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Translator.translate_batch`
    to efficiently translate an arbitrarily large stream of data. It enables the
    following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel translations (if the translator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      source: An iterable of tokenized source examples.
      target_prefix: An optional iterable of tokenized target prefixes.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any translation options accepted by
        :meth:`ctranslate2.Translator.translate_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.TranslationResult` instances.

    Example:
      This method can be used to efficiently translate text files:

      .. code-block:: python

          # Replace by your own tokenization and detokenization functions.
          tokenize_fn = lambda line: line.strip().split()
          detokenize_fn = lambda tokens: " ".join(tokens)

          with open("input.txt") as input_file:
              source = map(tokenize_fn, input_file)
              results = translator.translate_iterable(source, max_batch_size=64)

              for result in results:
                  tokens = result.hypotheses[0]
                  target = detokenize_fn(tokens)
                  print(target)
    N)append_process_iterabletranslate_batch)r!   r"   r#   r$   r%   kwargs	iterabless          r   r   r      sR     ` I ' ""	
   s   3=;=targetc              +   V   K   t        | j                  ||g||fi |E d{    y7 w)a}  Scores an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Translator.score_batch`
    to efficiently score an arbitrarily large stream of data. It enables the
    following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel scoring (if the translator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      source: An iterable of tokenized source examples.
      target: An iterable of tokenized target examples.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any scoring options accepted by
        :meth:`ctranslate2.Translator.score_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.ScoringResult` instances.
    Nr)   score_batch)r!   r"   r-   r$   r%   r+   s         r   r   r   [   s;     < !		
   s   )')	generatorstart_tokensc              +   T   K   t        | j                  |g||fi |E d{    y7 w)ac  Generates from an iterable of tokenized prompts.

    This method is built on top of :meth:`ctranslate2.Generator.generate_batch`
    to efficiently run generation on an arbitrarily large stream of data. It enables
    the following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel generations (if the generator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      start_tokens: An iterable of tokenized prompts.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any generation options accepted by
        :meth:`ctranslate2.Generator.generate_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationResult` instances.
    N)r)   generate_batch)r1   r2   r$   r%   r+   s        r   r   r      s8     8 !  		
      (&(tokensc              +   T   K   t        | j                  |g||fi |E d{    y7 w)a9  Scores an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Generator.score_batch`
    to efficiently score an arbitrarily large stream of data. It enables
    the following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel scoring (if the generator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      tokens: An iterable of tokenized examples.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any score options accepted by
        :meth:`ctranslate2.Generator.score_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.ScoringResult` instances.
    Nr/   )r1   r6   r$   r%   r+   s        r   r   r      s8     8 !		
   r5         Fi   )max_decoding_lengthmin_decoding_lengthsampling_topksampling_toppsampling_temperaturereturn_log_probrepetition_penaltyno_repeat_ngram_sizedisable_unksuppress_sequences	end_tokenmax_input_lengthuse_vmapr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   c             #   v   K   t        | j                  |g||gnd|	|
|||||||||||      E d{    y7 w)a{  Yields tokens as they are generated by the model.

    Arguments:
      source: Source tokens.
      target_prefix: Optional target prefix tokens.
      max_decoding_length: Maximum prediction length.
      min_decoding_length: Minimum prediction length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
      max_input_length: Truncate inputs after this many tokens (set 0 to disable).
      use_vmap: Use the vocabulary mapping file saved in this model

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    N)r@   rA   rB   rC   rD   r:   r;   r<   r=   r>   return_scoresrE   rF   )_generate_tokensr*   )r!   r"   r#   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   s                   r   r   r      s\     \  ""	(4$-1-//##1%)!  s   /979i   T)
max_length
min_lengthr<   r=   r>   r?   r@   rA   rB   rC   rD   static_promptcache_static_promptcallbackpromptrJ   rK   rL   rM   rN   c             #      K   t        |      dkD  rt        |d   t              r|g}t        | j                  |fi d|d|d|
d|d|d|d|d	|d
|d|d|d|d|	d|d|ddd|E d{    y7 w)a  Yields tokens as they are generated by the model.

    Arguments:
      prompt: Batch of start tokens. If the decoder starts from a
        special start token like <s>, this token should be added to this input.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      max_length: Maximum generation length.
      min_length: Minimum generation length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one these tokens (defaults to the model EOS token).
      static_prompt: If the model expects a static prompt (a.k.a. system prompt)
        it can be set here to simplify the inputs and optionally cache the model
        state for this prompt to accelerate future generations.
      cache_static_prompt: Cache the model state after the static prompt and
        reuse it for future generations using the same static prompt.
      callback: Optional function that is called for each generated token when
        obj:`beam_size` is 1. If the callback function returns ``True``, the
        decoding will stop for this batch index.

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    r   r$   r%   r@   rA   rB   rC   rD   rJ   rK   r<   r=   r>   rH   rL   rM   include_prompt_in_resultFrN   N)len
isinstancestrrI   r4   )r1   rO   r$   r%   rJ   rK   r<   r=   r>   r?   r@   rA   rB   rC   rD   rL   rM   rN   s                     r   r   r     s     p 6{Q:fQi5   & 	
 . 2   .    $ $ 2 &  $!" 0#$ "'%& '  s   A0A:2A83A:c                 K   t        |      dkD  rt        |d   t              r|g}t        | j                  |fi d|d|d|
d|d|d|d|d	|d
|d|d|d|d|	d|d|ddd|2 3 d{   }| 7 
6 yw)a  Yields tokens asynchronously as they are generated by the model.

    Arguments:
      prompt: Batch of start tokens. If the decoder starts from a
        special start token like <s>, this token should be added to this input.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      max_length: Maximum generation length.
      min_length: Minimum generation length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
      static_prompt: If the model expects a static prompt (a.k.a. system prompt)
        it can be set here to simplify the inputs and optionally cache the model
        state for this prompt to accelerate future generations.
      cache_static_prompt: Cache the model state after the static prompt and
        reuse it for future generations using the same static prompt.
      callback: Optional function that is called for each generated token when
        obj:`beam_size` is 1. If the callback function returns ``True``, the
        decoding will stop for this batch index.

    Returns:
      An async generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    r   r$   r%   r@   rA   rB   rC   rD   rJ   rK   r<   r=   r>   rH   rL   rM   rQ   FrN   N)rR   rS   rT   AsyncGeneratorr4   )r1   rO   r$   r%   rJ   rK   r<   r=   r>   r?   r@   rA   rB   rC   rD   rL   rM   rN   step_results                      r   r   r   _  s    p 6{Q:fQi5+   & 	
 . 2   .    $ $ 2 &  $!" 0#$ "'%& '  k* + s*   A-B/B 3A>4B 7B>B  Bc                   $    e Zd Zd Zd Zd Zd Zy)rV   c                     t        j                         | _        t        j                         | _        d | _        || _        || _        || _	        y N)
asyncioQueuequeue	threadingEventshutdown_eventiterator_taskprocess_funcargsr+   )selfrb   rc   r+   s       r   __init__zAsyncGenerator.__init__  s<    ]]_
'oo/!(	r   c                 z  K   t        | j                  g| j                  i | j                  D ]^  }| j                  j                  |       d {    t        j                  d       d {    | j                  j                         s^ n | j                  j                  d        d {    y 7 c7 H7 	w)Ng-C6?)
rI   rb   rc   r+   r]   putr[   sleepr`   is_set)rd   rW   s     r   producerzAsyncGenerator.producer  s     +
 $		
-1[[
 	K **..-----'''""))+	 jjnnT""" .' 	#s<   AB;B5B;.B7/B;!B;/B90B;7B;9B;c                 V    t        j                  | j                               | _        | S rZ   )r[   create_taskrj   ra   )rd   s    r   	__aiter__zAsyncGenerator.__aiter__  s     $00Ar   c                 J  K   | j                   j                         rt        	 | j                  j	                          d {   }| | j                   j                          t        |S 7 (# t        j                  $ r! | j                   j                          t        w xY wwrZ   )r`   ri   StopAsyncIterationr]   getsetr[   CancelledError)rd   items     r   	__anext__zAsyncGenerator.__anext__  s     %%'$$	%))D|##'')((K	 *
 %% 	%##%$$	%s.   !B#A, A*'A, )B#*A, ,4B  B#N)__name__
__module____qualname__re   rj   rm   rt   r   r   r   rV   rV     s    
#%r   rV   c              /     	
K   t        j                         	t        j                         |j	                  dd       

d 
	
fd}|j                  dd|d        | |i |	fd}t        j                  |d      }|j                          	 	j	                         }|nt        |t              r|	 | +|j                          y # t        $ r j                          Y ,w xY ww)	NrN   c                      y)NFr   )rW   s    r   <lambda>z"_generate_tokens.<locals>.<lambda>  s    r   c                 ^     |       }j                  |        j                         xs |S rZ   )rg   ri   )rW   user_callback_resultgenerator_closedstep_resultsuser_callbacks     r   	_callbackz#_generate_tokens.<locals>._callback  s1    ,[9%&&(@,@@r   Tr9   )asynchronous	beam_sizerN   c                      	 D ]  } | j                           	 j                  d        y # t        $ r}j                  |       Y d }~1d }~ww xY wrZ   )result	Exceptionrg   )r   easync_resultsr~   s     r   _catch_exceptionz*_generate_tokens.<locals>._catch_exception  sS    	 '    	  	 Q	 s   - 	AAA)r-   daemon)r]   r\   r^   r_   rp   updateThreadstartrS   r   GeneratorExitrq   join)rb   rc   r+   r   r   threadrW   r   r}   r~   r   s          @@@@r   rI   rI     s     ;;=L (JJz40M1A MM !	
 !$1&1M %5dCF
LLN
"&&(k9-	   KKM  	  "	s*   B7C3>C C3C0-C3/C00C3c              +   "  K   |dk  rt        d      t        |      dk(  r|d   }nt        j                  | }|j	                  ||dd       |dkD  r|dz  n|}t        j                         }t        |||      D ]i  } |j                   | |i |       |s|d   j                         s2 |j                         j                          |sV|d   j                         r8k |r% |j                         j                          |r$y y w)Nr9   zmax_batch_size must be >= 1r   T)r$   r%   r      )
ValueErrorrR   	itertoolszip_longestr   collectionsdeque_batch_iteratorextenddonepopleftr   )	rb   r,   r$   r%   r+   iterableread_batch_sizer]   streamss	            r   r)   r)     s    677
9~Q<(()4
MM,$ 	
 .<a-?nr)^OE"8_jI +\75f56a%--/((** a+ emmo$$&& s$   BDD-#DD%'DDc              #     K   d }d}| D ]  }t        |t              s|f}|dk(  r|rqt        |d         |k(  r`| d }nY|dk(  rFt        |t        |d               }|r:t        |d         dz   |z  |kD  r#| d }t        |d         }nt	        d|z        |t        d |D              }t        ||      D ]1  \  }}|t        |      dkD  rt	        d      |j                  |       3  || y y w)Nr   examplesr6   r9   zInvalid batch type %sc              3       K   | ]  }g   y wrZ   r   ).0_s     r   	<genexpr>z"_batch_iterator.<locals>.<genexpr>F  s     01B0s   z+Input iterables do not have the same length)rS   tuplerR   maxr   zipr(   )r   
batch_sizer%   r   rJ   examplebatchelements           r   r   r   -  s    GJ "'5)jG#3wqz?j88#ZWQZ9JC
Oa/:=
J _
 4zABB?000G!'73 	"NE73w<!#3 !NOOLL!	"-"6  s   C3C5)N    r   )@   r   )r   r   rZ   )r   r   )#r[   r   r   r]   r^   typingr   r   r   r   r   r   ctranslate2._extr	   r
   r   r   r   r   r    rT   intr   r   r   r   floatboolr   r   r   rV   rI   r)   r   r   r   r   <module>r      s        K K Q 48 ::T#Y: HT#Y/0: 	:
 :  :B  $$T#Y$ T#Y$ 	$
 $ m$T  	""49%" " 	" "P  	""T#Y" " 	" m"P *.?
  # "#! ! !48<@ #??I? DI&?
 ? ? ? ?  ? ? ? ? ? !d3i1? c49d3i789?  !?" #?$ "#%?J  	N "#! ! !48<@)- $7;'NN$s)T$s)_,-N N 	N N N N N  N N N N N !d3i1N  c49d3i789!N" DI&#N$ %N& ,-t34'N( "#)Nh  	O "#! ! !48<@)- $7;'OO$s)T$s)_,-O O 	O O O O O  O O O O O !d3i1O  c49d3i789!O" DI&#O$ %O& ,-t34'O( '()Od%% %%P3l'< r   