
    qi                         d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZ ddlmZ  ej"                  e      Z G d	 d
e
d      Z G d ded      Ze G d de	             ZdgZy)z
Processor class for UDOP.
    )logging   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixin
TextKwargsUnpack)PreTokenizedInput	TextInput)auto_docstringc                   j    e Zd ZU ee   eee      z  dz  ed<   eee      eeee         z  dz  ed<   y)UdopTextKwargsNword_labelsboxes)__name__
__module____qualname__listint__annotations__     Z/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/udop/processing_udop.pyr   r      sC    cT$s)_,t33S	?T$tCy/22T99r   r   F)totalc                   4    e Zd ZU eed<   ddddddddddd	iZy)UdopProcessorKwargstext_kwargsTFr   )	add_special_tokenspadding
truncationstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverboseN)r   r   r   r   r   	_defaultsr   r   r   r   r   #   s2    "&).*/&+"

Ir   r   c            
            e Zd ZdZ fdZe	 	 ddedz  deez  e	e   z  e	e   z  de
e   defd       Zd	 Zed
        Z xZS )UdopProcessoras  
    Constructs a UDOP processor which combines a LayoutLMv3 image processor and a UDOP tokenizer into a single processor.

    [`UdopProcessor`] offers all the functionalities you need to prepare data for the model.

    It first uses [`LayoutLMv3ImageProcessor`] to resize, rescale and normalize document images, and optionally applies OCR
    to get words and normalized bounding boxes. These are then provided to [`UdopTokenizer`],
    which turns the words and bounding boxes into token-level `input_ids`, `attention_mask`, `token_type_ids`, `bbox`.
    Optionally, one can provide integer `word_labels`, which are turned into token-level `labels` for token
    classification tasks (such as FUNSD, CORD).

    Additionally, it also supports passing `text_target` and `text_pair_target` to the tokenizer, which can be used to
    prepare labels for language modeling tasks.
    c                 &    t         |   ||       y )N)super__init__)selfimage_processor	tokenizer	__class__s      r   r-   zUdopProcessor.__init__E   s    )4r   Nimagestextkwargsreturnc                 6    | j                   t        fd| j                  j                  i|}|d   j	                  dd       }|d   j	                  dd       }|d   j	                  dd       }|d   j                  dd      }|d   j                  dd      }	|d   j                  d	d       }
| j                  j                  r|t        d
      | j                  j                  r|t        d      |r|	st        d      |
 | j                  di |d   S  | j                  dd|i|d   }|j	                  dd       }|j	                  dd       }|d   j	                  d	d        |d   j	                  dd        ||d   d<   ||n||d   d<   ||d   d<   |3| j                  j                  r|t        |t              r|g}||d   d<    | j                  dd||n|i|d   }|du r| j                  |d   |d         |d<   |j                  |       |S )Ntokenizer_init_kwargsr   r   r   	text_pairr#   Fr%   text_targetzdYou cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.zaYou cannot provide word labels if you initialized the image processor with apply_ocr set to True.zKYou cannot return overflowing tokens without returning the offsets mapping.r2   images_kwargswordstext_pair_targetr3   Tpixel_valuesoverflow_to_sample_mappingr   )_merge_kwargsr   r0   init_kwargspopgetr/   	apply_ocr
ValueError
isinstancestrget_overflowing_imagesupdate)r.   r2   r3   r4   output_kwargsr   r   r8   r#   r%   r9   featuresfeatures_wordsfeatures_boxesencoded_inputss                  r   __call__zUdopProcessor.__call__H   s    +**
"&.."<"<
 
 m,00$?#M266}dK!-044[$G	$1-$@$D$DE`bg$h!!.}!=!A!ABZ\a!b#M266}dK))u/@v  )){/Fs  %-Cjkk"!4>> .  ,t++\6\]?=[\H%\\'48N%\\'48N-(,,]DA-(,,-?F8AM-(5=B=NETbM-(1:EM-(7 D$8$8$B$ByGXdC( 6D<Jm,[9+T^^ !-T>.N )D0+/+F+F^,n=Y.Z,( OON+Or   c                     g }|D ]  }|j                  ||           t        |      t        |      k7  r#t        dt        |       dt        |             |S )Nz`Expected length of images to be the same as the length of `overflow_to_sample_mapping`, but got z and )appendlenrD   )r.   r2   r>   images_with_overflow
sample_idxs        r   rG   z$UdopProcessor.get_overflowing_images   sy    !4 	<J ''z(:;	< #$,F(GG,-.eC8R4S3TV 
 $#r   c                 ~    | j                   j                  }| j                  j                  }t        ||z   dgz         S )Nbbox)r0   model_input_namesr/   r   )r.   tokenizer_input_namesimage_processor_input_namess      r   rV   zUdopProcessor.model_input_names   s=     $ @ @&*&:&:&L&L#),GG6(RSSr   )NN)r   r   r   __doc__r-   r   r   r   r   r   r
   r   r   rN   rG   propertyrV   __classcell__)r1   s   @r   r*   r*   4   s    5  %)Z^FT!F ++d9o=EV@WWF ,-	F
 
F FR$ T Tr   r*   N)rY   transformersr   image_processing_utilsr   image_utilsr   processing_utilsr   r   r	   r
   tokenization_utils_baser   r   utilsr   
get_loggerr   loggerr   r   r*   __all__r   r   r   <module>re      s~    ! 2 % T T C # 
		H	%:Zu :
*% " pTN pT pTf 
r   