
    qi                         d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	m
Z
 ddlmZmZ  G d d	ed
      Z ej                  e      Ze G d de             ZdgZy)z
Processor class for Donut.
    N   )
ImageInput)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)auto_docstringloggingc                       e Zd Zi Zy)DonutProcessorKwargsN)__name__
__module____qualname__	_defaults     \/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/donut/processing_donut.pyr   r      s    Ir   r   F)totalc            	            e Zd Zd	 fd	Ze	 	 d	dedz  deee   z  ez  e	z  dz  de
e   fd       Zed        Zd
dZ xZS )DonutProcessorNc                 &    t         |   ||       y )N)super__init__)selfimage_processor	tokenizerkwargs	__class__s       r   r   zDonutProcessor.__init__#   s    )4r   imagestextr   c                 <   ||t        d       | j                  t        fd| j                  j                  i|}| | j
                  |fi |d   }|-||d   j                  dd        | j                  |fi |d   }|S |S d   d<   |d   |d<   |S )	NzBYou need to specify either an `images` or `text` input to process.tokenizer_init_kwargsimages_kwargstext_kwargsadd_special_tokensF	input_idslabels)
ValueError_merge_kwargsr   r   init_kwargsr   
setdefault)r   r    r!   r   output_kwargsinputs	encodingss          r   __call__zDonutProcessor.__call__&   s     >dlabb*** 
"&.."<"<
 
 )T))&SM/4RSF!m,778LeT&tL}]/KLI<M^(5F8"+K"8F;Mr   c                 N    | j                   j                  }t        |ddgz         S )Nr'   r(   )r   model_input_nameslist)r   image_processor_input_namess     r   r2   z DonutProcessor.model_input_namesF   s*    &*&:&:&L&L#/;2IIJJr   c                    || j                   j                         }i }|r[t        j                  d|t        j                        }|n2||j                         d }d|vrn|d|j                  d      dz    }|t        d      t        d        }t        j                  |      }t        j                  d| d|t        j                        }	|	|j                  |d      }n|	j                         }	t        j                  |      }
t        j                  |	      }t        j                  |
 d| |t        j                  t        j                  z        }||j                  d      j                         }d|v r3d|v r/| j                  |d|	      }|rt        |      dk(  r|d
   }|||<   ntg ||<   |j                  d      D ]?  }|j                         }||v r|d
   dk(  r|dd dk(  r|dd }||   j                  |       A t        ||         dk(  r||   d
   ||<   ||j!                  |	      t        |	      z   d j                         }|dd dk(  r|g| j                  |dd d|	      z   S |r[|r|r|gS |S |rg S d|iS )zS
        Convert a (generated) token sequence into an ordered JSON format.
        Nz<s_>   z</s_ z(.*?)T)is_inner_valueadded_vocabr   z<sep/><z/>   text_sequence)r   get_added_vocabresearch
IGNORECASEstartindexlenescapereplacegroupDOTALLstrip
token2jsonsplitappendfind)r   tokensr9   r:   outputpotential_startstart_tokenkeykey_escaped	end_tokenstart_token_escapedend_token_escapedcontentvalueleafs                  r   rK   zDonutProcessor.token2jsonL   s    ..88:K iiFO& !6!6!8!:;K+%%&B(9(9#(>(BCKc%jCH95C))C.K		T+a"8&"--PI R8%OO-	&(ii&<#$&IIi$8!))*+51B0CDfbmm^`^g^gNg &%mmA.446G(W-? $Ze f "5zQ(-a*/F3K&(s$+MM)$< 5D#'::<D#{2tAw#~$rs)W[J['+Abz"3K..t4	5
 vc{+q0*0+a.F3KI 6Y G IJPPR"1:*"8doofQRjQUcno&oooU X -F8969'2Fov-FFr   )NN)FN)r   r   r   r   r
   r   strr3   r	   r   r   r   r0   propertyr2   rK   __classcell__)r   s   @r   r   r   !   s    5  %)GKT! DIo	),==D -.	 > K K
8Gr   r   )__doc__r@   image_utilsr   processing_utilsr   r   r   tokenization_utils_baser   r	   utilsr
   r   r   
get_loggerr   loggerr   __all__r   r   r   <module>rf      sm    
 % H H C ,+5  
		H	% bG^ bG bGJ 
r   