
    qi.                         d dl Z d dlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZ  ej                  e      Z G d de	      Z e ed	
             G d de
             Zy)    N   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                       e Zd ZdZd Zd Zy)%ZeroShotClassificationArgumentHandlerz
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    c                     t        |t              r=|j                  d      D cg c]#  }|j                         s|j                         % }}|S c c}w )N,)
isinstancestrsplitstrip)selflabelslabels      a/opt/pipecat/venv/lib/python3.12/site-packages/transformers/pipelines/zero_shot_classification.py_parse_labelsz3ZeroShotClassificationArgumentHandler._parse_labels   sA    fc"17c1BTekkmekkmTFT Us
   AAc           
      F   t        |      dk(  st        |      dk(  rt        d      |j                  |d         |k(  rt        d| d      t        |t              r|g}g }|D ]2  }|j                  |D cg c]  }||j                  |      g c}       4 ||fS c c}w )Nr   z>You must include at least one label and at least one sequence.z"The provided hypothesis_template "z" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {} where the label should go.)len
ValueErrorformatr   r   extend)r   	sequencesr   hypothesis_templatesequence_pairssequencer   s          r   __call__z.ZeroShotClassificationArgumentHandler.__call__   s    v;!s9~2]^^%%fQi04GG45H4I Jq q 
 i%"I! 	gH!!^d"eUZH.A.H.H.O#P"ef	g y(( #fs   7B
N)__name__
__module____qualname____doc__r   r!        r   r   r      s    

)r'   r   T)has_tokenizerc                        e Zd ZdZdZdZdZdZ e       f fd	Z	e
d        Zddej                  fdZd Zdeee   z  f fd	Zdd
Zd ZddZ xZS )ZeroShotClassificationPipelinea  
    NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
    language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
    hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
    **much** more flexible.

    Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
    pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
    label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
    config's :attr:*~transformers.PreTrainedConfig.label2id*.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="facebook/bart-large-mnli")
    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["english", "german"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-classification"`.

    The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
    of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
    FTc                 |    || _         t        |   di | | j                  dk(  rt        j                  d       y y )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.r&   )_args_parsersuper__init__entailment_idloggerwarning)r   args_parserkwargs	__class__s      r   r/   z'ZeroShotClassificationPipeline.__init__Y   s@    '"6"#NNk $r'   c                     | j                   j                  j                  j                         D ](  \  }}|j	                         j                  d      s&|c S  y)Nentailr,   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r0   z,ZeroShotClassificationPipeline.entailment_idb   sL    **++44::< 	JE3{{}''1
	 r'   c                 n   d}| j                   j                  :t        j                  d       | j                   j                  | j                   _        	 | j                  |||||      }|S # t
        $ r?}dt        |      v r%| j                  ||||t        j                        }n|Y d}~|S d}~ww xY w)ze
        Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
        ptNzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)		tokenizer	pad_tokenr1   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r   rC   rA   rD   r4   rB   inputses	            r   _parse_and_tokenizez2ZeroShotClassificationPipeline._parse_and_tokenizei   s     >>##+LL) (,~~'?'?DNN$	^^#5-% $ F2 %  	c!f$ "'9#1#1AA (    %	s   A, ,	B454B//B4c                     i }d|v r!| j                   j                  |d         |d<   d|v r|d   |d<   i }d|v r|d   |d<   |i |fS )Ncandidate_labelsr   multi_label)r-   r   )r   r4   preprocess_paramspostprocess_paramss       r   _sanitize_parametersz3ZeroShotClassificationPipeline._sanitize_parameters   sz    '484E4E4S4STZ[mTn4o01 F*7=>S7T34F"06}0E}- "&888r'   r   c                     t        |      dk(  rn)t        |      dk(  rd|vr	|d   |d<   nt        d|       t        |   |fi |S )a  
        Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
        information.

        Args:
            sequences (`str` or `list[str]`):
                The sequence(s) to classify, will be truncated if the model input is too large.
            candidate_labels (`str` or `list[str]`):
                The set of possible class labels to classify each sequence into. Can be a single label, a string of
                comma-separated labels, or a list of labels.
            hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
                The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
                similar syntax for the candidate label to be inserted into the template. For example, the default
                template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
                model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
                works well in many cases, but it may be worthwhile to experiment with different templates depending on
                the task setting.
            multi_label (`bool`, *optional*, defaults to `False`):
                Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
                the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
                independent and probabilities are normalized for each candidate by doing a softmax of the entailment
                score vs. the contradiction score.

        Return:
            A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

            - **sequence** (`str`) -- The sequence for which this is the output.
            - **labels** (`list[str]`) -- The labels sorted by order of likelihood.
            - **scores** (`list[float]`) -- The probabilities for each of the labels.
        r   r   rO   z%Unable to understand extra arguments )r   r   r.   r!   )r   r   argsr4   r5   s       r   r!   z'ZeroShotClassificationPipeline.__call__   s^    H t9>Y!^ 2& @)-aF%&DTFKLLw	4V44r'   c              #      K   | j                  |||      \  }}t        t        ||            D ]6  \  }\  }}| j                  |g      }	||d   |t	        |      dz
  k(  d|	 8 y w)Nr   r   candidate_labelr    is_last)r-   	enumerateziprM   r   )
r   rK   rO   r   r   r   irX   sequence_pairmodel_inputs
             r   
preprocessz)ZeroShotClassificationPipeline.preprocess   s     $($5$5f>NPc$d!	3<SAQSa=b3c 	/A/22M?CK $3%aL$4 5 99 	 	s   A&A(c                 $   |d   }|d   }| j                   j                  D ci c]  }|||   
 }}| j                  j                  }dt	        j
                  |      j                  v rd|d<    | j                  di |}|||d   d|}|S c c}w )NrX   r    	use_cacheFrY   rW   r&   )rE   model_input_namesr8   forwardinspect	signature
parameters)	r   rK   rX   r    kmodel_inputsmodel_forwardoutputsmodel_outputss	            r   _forwardz'ZeroShotClassificationPipeline._forward   s     !23*%.2nn.N.NO6!9OO

**'++M:EEE(-L%$**,|,  / i(
 	
  Ps   Bc                 z   |D cg c]  }|d   	 }}|D cg c]  }|d   	 }}t        j                  |D cg c]#  }|d   j                         j                         % c}      }|j                  d   }t        |      }	||	z  }
|j                  |
|	df      }|st        |      dk(  r`| j                  }|dk(  rdnd}|d||gf   }t        j                  |      t        j                  |      j                  dd	      z  }|d
   }nM|d| j                  f   }t        j                  |      t        j                  |      j                  dd	      z  }t        t        |d   j                                     }|d   |D cg c]  }||   	 c}|d|f   j                         dS c c}w c c}w c c}w c c}w )NrX   r    logitsr   r,   r   .T)keepdims).r   )r    r   scores)npconcatenatefloatnumpyshaper   reshaper0   expsumlistreversedargsorttolist)r   rk   rP   rj   rO   r   outputrn   Nnnum_sequencesreshaped_outputsr0   contradiction_identail_contr_logitsrp   entail_logitstop_indsr\   s                      r   postprocessz*ZeroShotClassificationPipeline.postprocess   s   FST7G$56TT8EFWWZ(F	FP] ^f!1!7!7!9!?!?!A ^_LLO !Q!>>=!R*@A#./14 ..M%2a%7rQ"239I=8Y3Y"ZVV/0266:M3N3R3RSU`d3R3eeFF^F -S$2D2D-DEMVVM*RVVM-B-F-FrTX-F-YYF!2!2!456!!4<=q'*=Q[)002
 	
+ UF ^* >s   F)F.(F3F8)NzThis example is {}.)F)r"   r#   r$   r%   _load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r/   propertyr0   r   
ONLY_FIRSTrM   rS   r   ry   r!   r_   rl   r   __classcell__)r5   s   @r   r*   r*   +   s    %N O!#O#H#J    '+tPbPmPm(T
9+5c?+5Z$
r'   r*   )rd   rt   rq   tokenization_pythonr   utilsr   r   baser   r	   r
   
get_loggerr"   r1   r   r*   r&   r'   r   <module>r      sa      4 / J J 
		H	%)O )< ,4@AY
] Y
 BY
r'   