
    qixS                     
   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ  ej                  e      Zd	Z	 	 	 	 d*d
ee
   dededz  fdZ	 	 	 	 d*d
ee
   dededz  fdZ G d de      Z G d de	      Z G d de	      Z G d de      Z G d de	      Z G d de	      Z G d de	      Z G d de	      Z G d d e	      Z G d! d"e	      Z G d# d$e	      Zd%dd%d%dd%d%d%d%d&	Zeeeeeeeeeed'
Z d(d(d(d(d(d)d(d(d(d(d'
Z!y)+zGLUE processors and helpers    N)Enum   )PreTrainedTokenizer)logging   )DataProcessorInputExampleInputFeaturesa  This {0} will be removed from the library soon, preprocessing should be handled with the Hugging Face Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamples	tokenizer
max_lengthc                 ~    t        j                  t        j                  d      t               t        | |||||      S )av  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        Will return a list of task-specific `InputFeatures` which can be fed to the model.

    function)r   task
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarning"_glue_convert_examples_to_features)r   r   r   r   r   r   s         S/opt/pipecat/venv/lib/python3.12/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr   #   s9    . MM%,,Z8-H-)
*bm     c                    ||j                   }|`t        |          }|+|j                         }t        j	                  d| d|        $t
        |   t        j	                  d d|        t        |      D ci c]  \  }}||
 c}}dt        dt        t        z  d z  ffd}	| D 
cg c]
  }
 |	|
       }}
 || D 
cg c]  }
|
j                  |
j                  f c}
|dd	      }g }t        t        |             D ];  }|D ci c]  }|||   |    }}t        di |d
||   i}|j                  |       = t        | d d       D ]W  \  }}
t        j	                  d       t        j	                  d|
j                           t        j	                  d||           Y |S c c}}w c c}
w c c}
w c c}w )NzUsing label list z
 for task zUsing output mode examplereturnc                     | j                   y dk(  r| j                      S dk(  rt        | j                         S t              )Nclassification
regression)labelfloatKeyError)r   	label_mapr   s    r   label_from_examplez>_glue_convert_examples_to_features.<locals>.label_from_exampleV   sJ    == **W]]++L(''{##r   r   T)r   padding
truncationr"      z*** Example ***zguid: z
features:  )model_max_lengthglue_processors
get_labelsloggerinfoglue_output_modes	enumerater	   intr#   text_atext_brangelenr
   appendguid)r   r   r   r   r   r   	processorir"   r&   r   labelsbatch_encodingfeatureskinputsfeaturer%   s        `           @r   r   r   @   s    //
#D)+	"--/JKK+J<z$HI+D1KKK,[MD6JK*3J*?@ha@I$L $S5[45G $ :BBg )BFB9ABg'..'..	)B	N H3x=! !3ABa!^A&q))BB:&:q	: 	!  !- 0
7%&fW\\N+,j!./0
 OA A C 	C Cs   F23F8
F=Gc                       e Zd ZdZdZy)
OutputModer    r!   N)__name__
__module____qualname__r    r!   r*   r   r   rB   rB   w   s    %NJr   rB   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y Nr9   super__init__r   r   r   r   r   selfargskwargs	__class__s      r   rL   zMrpcProcessor.__init__   /    $)&))00=}Mr   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S See base class.idx	sentence1utf-8	sentence2r"   r	   numpydecodestrrN   tensor_dicts     r   get_example_from_tensor_dictz*MrpcProcessor.get_example_from_tensor_dict   n    $$&$**,33G<$**,33G<G$**,-	
 	
r   c                     t         j                  dt        j                  j	                  |d              | j                  | j                  t        j                  j	                  |d            d      S )rU   zLOOKING AT 	train.tsvtrain)r.   r/   ospathjoin_create_examples	_read_tsvrN   data_dirs     r   get_train_examplesz MrpcProcessor.get_train_examples   sQ    k"'',,x"E!FGH$$T^^BGGLL;4W%XZabbr   c                     | j                  | j                  t        j                  j	                  |d            d      S rU   zdev.tsvdevrh   ri   re   rf   rg   rj   s     r   get_dev_exampleszMrpcProcessor.get_dev_examples   .    $$T^^BGGLL94U%VX]^^r   c                     | j                  | j                  t        j                  j	                  |d            d      S rU   ztest.tsvtestrp   rj   s     r   get_test_exampleszMrpcProcessor.get_test_examples   .    $$T^^BGGLL:4V%WY_``r   c                 
    ddgS rU   01r*   rN   s    r   r-   zMrpcProcessor.get_labels       Szr   c           	          g }t        |      D ]F  \  }}|dk(  r| d| }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             H |S )5Creates examples for the training, dev and test sets.r   -r      ru   Nr8   r3   r4   r"   r1   r7   r	   
rN   linesset_typer   r:   liner8   r3   r4   r"   s
             r   rh   zMrpcProcessor._create_examples   s~     ' 	`GAtAvZq$D!WF!WF$.DDGEOOLd6&X]^_	` r   rC   rD   rE   __doc__rL   r`   rl   rq   rv   r-   rh   __classcell__rQ   s   @r   rG   rG   |   s-    9N
c
_ar   rG   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zMnliProcessor.__init__   rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rU   rV   premiserX   
hypothesisr"   rZ   r^   s     r   r`   z*MnliProcessor.get_example_from_tensor_dict   sn    $$&	"((*11':%++-44W=G$**,-	
 	
r   c                     | j                  | j                  t        j                  j	                  |d            d      S rU   rc   rd   rp   rj   s     r   rl   z MnliProcessor.get_train_examples   .    $$T^^BGGLL;4W%XZabbr   c                     | j                  | j                  t        j                  j	                  |d            d      S )rU   zdev_matched.tsvdev_matchedrp   rj   s     r   rq   zMnliProcessor.get_dev_examples   s/    $$T^^BGGLLK\4]%^`mnnr   c                     | j                  | j                  t        j                  j	                  |d            d      S )rU   ztest_matched.tsvtest_matchedrp   rj   s     r   rv   zMnliProcessor.get_test_examples   s/    $$T^^BGGLLK]4^%_aoppr   c                 
    g dS )rU   )contradiction
entailmentneutralr*   r|   s    r   r-   zMnliProcessor.get_labels   s    99r   c           	          g }t        |      D ]U  \  }}|dk(  r| d|d    }|d   }|d   }|j                  d      rdn|d   }	|j                  t        ||||	             W |S )	r   r   r      	   ru   Nr   )r1   
startswithr7   r	   r   s
             r   rh   zMnliProcessor._create_examples   s     ' 	`GAtAvZqa	*D!WF!WF$//7DT"XEOOLd6&X]^_	` r   r   r   s   @r   r   r      s-    =N
coq:r   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   z MnliMismatchedProcessor.__init__   rR   r   c                     | j                  | j                  t        j                  j	                  |d            d      S )rU   zdev_mismatched.tsvdev_mismatchedrp   rj   s     r   rq   z(MnliMismatchedProcessor.get_dev_examples   s/    $$T^^BGGLLK_4`%acsttr   c                     | j                  | j                  t        j                  j	                  |d            d      S )rU   ztest_mismatched.tsvtest_mismatchedrp   rj   s     r   rv   z)MnliMismatchedProcessor.get_test_examples   s/    $$T^^BGGLLK`4a%bduvvr   )rC   rD   rE   r   rL   rq   rv   r   r   s   @r   r   r      s    HNuwr   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	ColaProcessorz/Processor for the CoLA data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zColaProcessor.__init__   rR   r   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S rU   rV   sentencerX   Nr"   rZ   r^   s     r   r`   z*ColaProcessor.get_example_from_tensor_dict   U    $$&
#))+227;G$**,-	
 	
r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   z ColaProcessor.get_train_examples   r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zColaProcessor.get_dev_examples   rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zColaProcessor.get_test_examples  rw   r   c                 
    ddgS ry   r*   r|   s    r   r-   zColaProcessor.get_labels  r}   r   c           	          |dk(  }|r|dd }|rdnd}g }t        |      D ]8  \  }}| d| }||   }	|rdn|d   }
|j                  t        ||	d|
             : |S )r   ru   r   Nr   r   r   r   )rN   r   r   	test_mode
text_indexr   r:   r   r8   r3   r"   s              r   rh   zColaProcessor._create_examples	  s    &	!"IE#Q
 ' 	^GAtZq$D*%F%D47EOOLd6$V[\]		^
 r   r   r   s   @r   r   r      s-    9N
c_ar   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zSst2Processor.__init__  rR   r   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S r   rZ   r^   s     r   r`   z*Sst2Processor.get_example_from_tensor_dict  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   z Sst2Processor.get_train_examples(  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zSst2Processor.get_dev_examples,  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zSst2Processor.get_test_examples0  rw   r   c                 
    ddgS ry   r*   r|   s    r   r-   zSst2Processor.get_labels4  r}   r   c           	          g }|dk(  rdnd}t        |      D ]A  \  }}|dk(  r| d| }||   }|dk(  rdn|d   }	|j                  t        ||d|	             C |S )r   ru   r   r   r   Nr   r   )
rN   r   r   r   r   r:   r   r8   r3   r"   s
             r   rh   zSst2Processor._create_examples8  s    "f,Q!
 ' 	^GAtAvZq$D*%F$.DDGEOOLd6$V[\]	^ r   r   r   s   @r   r   r     s-    :N
c_ar   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	StsbProcessorz0Processor for the STS-B data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zStsbProcessor.__init__I  rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rT   rZ   r^   s     r   r`   z*StsbProcessor.get_example_from_tensor_dictM  ra   r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   z StsbProcessor.get_train_examplesV  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zStsbProcessor.get_dev_examplesZ  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zStsbProcessor.get_test_examples^  rw   r   c                     dgS )rU   Nr*   r|   s    r   r-   zStsbProcessor.get_labelsb  s	    vr   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S )	r   r   r      r   ru   Nr   r   r   r   s
             r   rh   zStsbProcessor._create_examplesf       ' 	`GAtAvZqa	*D!WF!WF$.DDHEOOLd6&X]^_	` r   r   r   s   @r   r   r   F  s-    :N
c_ar   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QqpProcessorz.Processor for the QQP data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zQqpProcessor.__init__w  rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rU   rV   	question1rX   	question2r"   rZ   r^   s     r   r`   z)QqpProcessor.get_example_from_tensor_dict{  ra   r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   zQqpProcessor.get_train_examples  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zQqpProcessor.get_dev_examples  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zQqpProcessor.get_test_examples  rw   r   c                 
    ddgS ry   r*   r|   s    r   r-   zQqpProcessor.get_labels  r}   r   c           	          |dk(  }|rdnd}|rdnd}g }t        |      D ]G  \  }}|dk(  r| d|d    }		 ||   }
||   }|rdn|d	   }|j                  t        |	|
||
             I |S # t        $ r Y Ww xY w)r   ru   r   r      r   r   r   Nr)   r   )r1   
IndexErrorr7   r	   )rN   r   r   r   q1_indexq2_indexr   r:   r   r8   r3   r4   r"   s                r   rh   zQqpProcessor._create_examples  s    &	!1q!1q ' 
	`GAtAvZqa	*Dhh )tAw OOLd6&X]^_
	`   s   A++	A76A7r   r   s   @r   r   r   t  s-    8N
c_ar   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QnliProcessorz/Processor for the QNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zQnliProcessor.__init__  rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rU   rV   questionrX   r   r"   rZ   r^   s     r   r`   z*QnliProcessor.get_example_from_tensor_dict  sn    $$&
#))+227;
#))+227;G$**,-	
 	
r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   z QnliProcessor.get_train_examples  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zQnliProcessor.get_dev_examples  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zQnliProcessor.get_test_examples  rw   r   c                 
    ddgS rU   r   not_entailmentr*   r|   s    r   r-   zQnliProcessor.get_labels      .//r   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S 	r   r   r   r   r   ru   Nr   r   r   r   s
             r   rh   zQnliProcessor._create_examples  r   r   r   r   s   @r   r   r     s-    9N
c_a0r   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	RteProcessorz.Processor for the RTE data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zRteProcessor.__init__  rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rT   rZ   r^   s     r   r`   z)RteProcessor.get_example_from_tensor_dict  ra   r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   zRteProcessor.get_train_examples  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zRteProcessor.get_dev_examples  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zRteProcessor.get_test_examples  rw   r   c                 
    ddgS r   r*   r|   s    r   r-   zRteProcessor.get_labels  r   r   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r   r   r   s
             r   rh   zRteProcessor._create_examples  r   r   r   r   s   @r   r   r     s-    8N
c_a0r   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	WnliProcessorz/Processor for the WNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rI   rJ   rM   s      r   rL   zWnliProcessor.__init__  rR   r   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rT   rZ   r^   s     r   r`   z*WnliProcessor.get_example_from_tensor_dict  ra   r   c                     | j                  | j                  t        j                  j	                  |d            d      S r   rp   rj   s     r   rl   z WnliProcessor.get_train_examples  r   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rn   rp   rj   s     r   rq   zWnliProcessor.get_dev_examples  rr   r   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   rp   rj   s     r   rv   zWnliProcessor.get_test_examples  rw   r   c                 
    ddgS ry   r*   r|   s    r   r-   zWnliProcessor.get_labels   r}   r   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r   r   r   s
             r   rh   zWnliProcessor._create_examples$  r   r   r   r   s   @r   r   r     s-    9N
c_ar   r   r   )	colamnlimrpcsst-2sts-bqqpqnlirtewnli)
r   r   zmnli-mmr   r   r   r   r   r  r  r    r!   )NNNN)"r   re   r   enumr   tokenization_pythonr   utilsr   r   r	   r
   
get_loggerrC   r.   r   listr2   r   r   rB   rG   r   r   r   r   r   r   r   r   r   glue_tasks_num_labelsr,   r0   r*   r   r   <module>r	     s   " 	   6  = = 
		H	%m  "	< " d
@ "	4< 4"4 d
4n 
,M ,^+M +\wm w ,M ,^+M +\+M +\1= 1h+M +\+= +\+M +^ 
  &  r   