
    qiM                     r    d Z ddlmZmZ ddlmZmZmZ ddlm	Z	 ddl
mZmZmZ e G d de	             ZdgZy	)
z
Processor class for Nougat.
    )OptionalUnion)PreTokenizedInput	TextInputTruncationStrategy   )ProcessorMixin)PaddingStrategy
TensorTypeauto_docstringc            B       *    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'dedz  dedz  deeef   dz  dddedz  d	edz  d
edz  dedz  dee	z  dz  dedz  de	e
e	   z  dz  de	e
e	   z  dz  ded   deedf   dz  deez  e
e   z  e
e   z  dz  deez  e
e   z  e
e   z  dz  deez  e
e   z  e
e   z  dz  dedeez  ez  deez  ez  dz  dedz  dedededz  deez  dz  dedz  dedz  d ed!ed"ed#ed$ef@d%       Zd& Z xZS )(NougatProcessorc                 &    t         |   ||       y )N)super__init__)selfimage_processor	tokenizer	__class__s      ^/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/nougat/processing_nougat.pyr   zNougatProcessor.__init__   s    )4    Ndo_crop_margin	do_resizesizeresamplePILImageResamplingdo_thumbnaildo_align_long_axisdo_pad
do_rescalerescale_factordo_normalize
image_mean	image_stddata_formatChannelDimensioninput_data_format	text_pairtext_targettext_pair_targetadd_special_tokenspadding
truncation
max_lengthstrideis_split_into_wordspad_to_multiple_ofreturn_tensorsreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbosec#                     ||t        d      |!| j                  ||||||||	|
|||||||      }#|$| j                  ||||||||||||||||| |!|"      }$|#S |$S $d   #d<   |#S )ag  
        do_crop_margin (`bool`, *optional*):
            Whether to automatically crop white margins from document images. When enabled, the processor detects
            and removes white space around the edges of document pages, which is useful for processing scanned
            documents or PDFs with large margins.
        do_thumbnail (`bool`, *optional*):
            Whether to create a thumbnail version of the image. When enabled, a smaller version of the image is
            generated alongside the main processed image, which can be useful for preview or faster processing.
        do_align_long_axis (`bool`, *optional*):
            Whether to automatically align images so that the longer axis is horizontal. When enabled, portrait
            images are rotated to landscape orientation, which is typically better for document processing tasks.
        zBYou need to specify either an `images` or `text` input to process.)r   r   r   r   r   r   r   r    r!   r"   r#   r$   r2   r%   r'   )r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   	input_idslabels)
ValueErrorr   r   )%r   imagestextr   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   inputs	encodingss%                                        r   __call__zNougatProcessor.__call__   s    d >dlabb))-#!)#5%-)%#-'"3! * F$ #'!1#5%%$7#5-&;&;*C+E'=+' ' I, <M^(5F8Mr   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to NougatTokenizer's [`~PreTrainedTokenizer.post_process_generation`].
        Please refer to the docstring of this method for more information.
        )r   post_process_generation)r   argskwargss      r   rD   z'NougatProcessor.post_process_generation   s     
 6t~~55tFvFFr   )"NNNNNNNNNNNNNNchannels_firstNNNNTFNNr   FNNNNFFFFT)__name__
__module____qualname__r   r   booldictstrintfloatlistr   r   r   r   r
   r   r   rB   rD   __classcell__)r   s   @r   r   r      s   5  &*!%&*)-$(*.""&-1$(15044DCGfjhlmq#'05=A!%$))-26-1-1*/+0',#Gd t	d
 $;d 38nt#d 'd Tkd !4Kd td 4Kd ed*d Tkd DK'$.d 4;&-d  01!d" !&8!89D@#d$ 004	?BTJ[E\\_cc%d& !22T)_DtL]G^^aee'd( $&77$y/IDQbLccfjj)d* !+d, o--d. 3J!33d:/d0 $J1d2 3d4 "5d6  $J7d8 j(4/9d:  $d{;d<  $d{=d> $(?d@ %)AdB !%CdD EdF Gd dLGr   r   N)__doc__typingr   r   $transformers.tokenization_utils_baser   r   r   processing_utilsr	   utilsr
   r   r   r   __all__ r   r   <module>rY      sJ    # a a . @ @ pGn pG pGf 
r   