
    qi                        d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZ dd	lmZmZ dd
lmZmZ erddlmZ  ej0                  e      ZdefdZd Z G d ded      Ze G d de             ZdgZ y)z
Processor class for IDEFICS2.
    N)
accumulate)TYPE_CHECKINGUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ProcessingKwargsProcessorMixinUnpack)
AddedToken	TextInput)auto_docstringlogging)PreTokenizedInputreturnc                 H    t        | t              xr | j                  d      S )Nhttp)
isinstancestr
startswith)vals    b/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/idefics2/processing_idefics2.pyis_urlr   (   s    c3:CNN6$::    c                 2    t        |       xs t        |       S N)r   r	   )elems    r   is_image_or_image_urlr    ,   s    $</>$//r   c                       e Zd ZdddddiZy)Idefics2ProcessorKwargstext_kwargsTF)add_special_tokenspaddingis_split_into_wordsN)__name__
__module____qualname__	_defaults r   r   r"   r"   0   s    "&#(
Ir   r"   F)totalc                        e Zd Z	 ddededz  f fdZd Ze	 	 ddee	e   z  e	e	e      z  de
ede	e   e	d   f   d	ee   d
efd       Z xZS )Idefics2ProcessorNimage_seq_lenchat_templatec                 $   t        |d      st        ddd      j                  | _        t        ddd      j                  | _        d| j                  | j                  gi}|j                  |       |j                  | j                        | _        n3|j                  | _        |j                  | _        |j                  | _        t        ddd      | _	        |j                  d| j                  gi       || _
        t        | 1  |||	       y
)ap  
        image_seq_len (`int`, *optional*, defaults to 64):
            The length of the image sequence i.e. the number of <image> tokens per image in the input.
            This parameter is used to build the string from the input prompt and image tokens and should match the
            config.perceiver_config.resampler_n_latents value for the model used.
        image_tokenz<fake_token_around_image>FT)
normalizedspecialz<image>additional_special_tokensz<end_of_utterance>)r0   N)hasattrr   contentfake_image_tokenr2   r$   convert_tokens_to_idsimage_token_idimage_boundary_tokenend_of_utterance_tokenr/   super__init__)selfimage_processor	tokenizerr/   r0   kwargstokens_to_add	__class__s          r   r>   zIdefics2Processor.__init__<   s     y-0$./JW\fj$k$s$sD!))tT\\D84;P;PRVRbRb:cdM((7"+"A"A$BRBR"SD$-$B$BD!(44D"+":":D&01ERWae&f#$$&ADD_D_C`%ab*)=Qr   c                     g }|D ]_  }g }|D ]E  }t        |      r|j                  |        t        |      s,|j                  t        |             G |j                  |       a |S r   )r	   appendr   r
   )r?   promptsprompt_imagespromptimagesr   s         r   _extract_images_from_promptsz.Idefics2Processor._extract_images_from_promptsV   sn     	)FF 4!$'MM$'D\MM*T"23	4
   (	) r   rJ   textr   rB   r   c                    ||t        d       | j                  t        fd| j                  j                  i|}|d   j                  dd       }g }i }|it        |t              r|g}n.t        |t              st        |d   t              st        d      | j                  }| j                  }	| |	| j                  z   | }
| j                  j                  r|
dz  }
g }t        j                  t        j                   |       d      }|D ]q  }|j#                  |j%                  |	             |j'                  |	|
      }|j'                  | | |       }|j)                  | d	|      }|j#                  |       s  | j                  |fi |d   }| j+                  ||d
g       |j-                  |       |t/        |      r|gg}nt        |t        t0        f      rt/        |d         r|t3        |      t5        |      k7  r*t        d	 dt3        |       d	|	 dt5        |       d	      dgt        t7        |            z   }t9        t5        |            D cg c]  }|||   ||dz        }}nO|g}nKt        |t        t0        f      s5t        |d   t        t0        f      st/        |d   d         st        d      |D cg c]  }t5        |       }}|||k(  st        d| d| d      |D cg c]  }|D cg c]  }t;        |       c} }}} | j                  |fi |d   }|j-                  |       t=        ||      S c c}w c c}w c c}w c c}}w )Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr#   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings   z
(?=[^\s<]) image)
modalitieszThe total number of zP tokens in the prompts should be the same as the number of images passed. Found z tokens and z images.   zdInvalid input images. Please provide a single image or a list of images or a list of list of images.z!The number of images in the text z and images  z should be the same.images_kwargs)tensor_type)
ValueError_merge_kwargsr"   rA   init_kwargspopr   r   listr8   r2   r/   r@   do_image_splittingrecompileescaperF   countreplacesub_check_special_mm_tokensupdater    tuplesumlenr   ranger
   r   )r?   rJ   rL   rB   output_kwargsrO   n_images_in_textinputsr8   r2   	image_strprompt_stringsclosing_fake_patternsampletext_inputscumsum_images_in_textin_images_in_imagesimimage_inputss                       r   __call__zIdefics2Processor.__call__b   s    <FNJKK***#
"&.."<"<
 

 '}599:JDQ$$vd+JtAw4L !dee  $44**K+,[4;M;M-M,NO_N`aI##66%M	N#%::"))<L2M1Nj.Y#Z  . ''[(ABY?+;*<=M<N(OTdSeg-115E4Fa2H&Q%%f-. )$..X=;WXK)).+SZR[)\MM+&$V,!(FT5M27LVTUY7W#+,F;(2;- @&&)*:&;%<Ak],WZ[aWbVcckm 
 ./C$zBR7S2T,T) "'s+;'<!= 4Q7:OPQTUPU:VWF 
 %XF ve}5"6!9tUm<-fQil; z  =C!C&#f+!C!C(:>N(N 78H7IWiVjj~ 
 GMMF7"z"~7MFM/4//Y-:XYLMM,'F??7  "D 8Ms$   *MM#	M-M("M-(M-)N@   N)NN)r'   r(   r)   intr   r>   rK   r   r   r[   r   r   r   r"   r   rv   __classcell__)rD   s   @r   r.   r.   :   s     eiR>ARWZ]aWaR4
  JNbfX@T*--T*5E0FFX@ I2DOTJ]E^^_X@ 01	X@
 
X@ X@r   r.   )!__doc__r]   	itertoolsr   typingr   r   feature_extraction_utilsr   image_utilsr   r	   r
   processing_utilsr   r   r   tokenization_utils_baser   r   utilsr   r   r   
get_loggerr'   loggerboolr   r    r"   r.   __all__r+   r   r   <module>r      s    
   ' 4 A A 
 = , < 
		H	%;4 ;0.e  @@ @@ @@F 
r   