
    qi                         d dl ZddlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZ  G d d	ed
      Ze G d de	             ZdgZy)    N   )BatchFeature)
ImageInputmake_nested_list_of_images)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)auto_docstringc                       e Zd ZdddiiZy)Gemma3nProcessorKwargstext_kwargspaddingFN)__name__
__module____qualname__	_defaults     `/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/gemma3n/processing_gemma3n.pyr   r      s    	5)Ir   r   F)totalc                        e Zd Z	 	 	 ddedef fdZe	 	 	 ddedz  deez  e	e   z  e	e   z  de
j                  e	e   z  e	e
j                     z  e	e	e      z  dz  dee   d	ef
d
       Zed        Z xZS )Gemma3nProcessorNaudio_seq_lengthimage_seq_lengthc                    || _         |j                  | _        |j                  | _        |j                  | _        dj	                  |j                  g|z        }d|j                   | |j
                   d| _        || _        |j                  | _        |j                  | _	        |j                  | _
        dj	                  |j                  g|z        }	d|j                   |	 |j                   d| _        t        
| 8  d||||d| y)a  
        audio_seq_length (int, *optional*, defaults to 188):
            The number of audio soft tokens that will be added to the text prompt
        image_seq_length (int, *optional*, defaults to 256):
            The number of image soft tokens that should be added to
         z

)feature_extractorimage_processor	tokenizerchat_templateNr   )r   audio_token_id	boa_tokenaudio_tokenjoin	eoa_tokenfull_audio_sequencer   image_token_id	boi_tokenimage_token	eoi_tokenfull_image_sequencesuper__init__)selfr   r    r!   r"   r   r   kwargsaudio_tokens_expandedimage_tokens_expanded	__class__s             r   r/   zGemma3nProcessor.__init__!   s!     !1'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  0'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  	
/+'		

 	
r   imagestextaudior1   returnc           	         |||t        d       | j                  t        fd| j                  j                  i|}t        |t              r|g}n.t        |t              st        |d   t              st        d      |e | j                  |fi |d   }|s|D cg c]  }| j                   }}|D cg c](  }|j                  | j                  | j                        * }}ni }|| j                  j                  |      }t        |      }	 | j                  |	fi |d   }
|s5|	D cg c]*  }dj!                  | j"                  gt%        |      z        , }}t%        |	      t%        |      k7  r$t        dt%        |	       d	t%        |       d
      |D cg c](  }|j                  | j"                  | j&                        * }}ni }
|d   j)                  dd       } | j                  dd|i|d   ddi}| j+                  ||dg       |d   }t-        j.                  |      }d||| j0                  k(  <   d||| j2                  k(  <   |j5                         D ci c]  \  }}||j7                          }}}|j7                         |d<   t9        i ||
||      S c c}w c c}w c c}w c c}w c c}}w )Nz5Provide at least one of `text`, `images`, or `audio`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsaudio_kwargsimages_kwargs z1Received inconsistently sized batches of images (z) and text (z).r   return_tensorsr6   npimage)
modalities	input_ids   r   token_type_ids)datatensor_typer   )
ValueError_merge_kwargsr   r!   init_kwargs
isinstancestrlist	TypeErrorr   r%   replacer(   r    fetch_imagesr   r&   r+   lenr-   pop_check_special_mm_tokensr?   
zeros_liker)   r#   itemstolistr   )r0   r5   r6   r7   r1   output_kwargsaudio_inputs_promptbatched_imagesimage_inputsr>   text_inputs	array_idsrD   kvs                    r   __call__zGemma3nProcessor.__call__G   s    <FNu}TUU***"
"&.."<"<
 
 dC 6DD$'
47C0H_``1411%Y=;XYL278Q((88 ^bbSYFNN4#3#3T5M5MNbDbL))66v>F7?N/4//a-P_B`aL Q_`v$"2"2!3c&k!AB``>"c$i/ GNH[G\\hilmqirhssuv 
 ^bbSYFNN4#3#3T5M5MNbDbL&}599:JDQ$dnnd$d-2Nd_cd%%dKWI%N  ,	y1;<yD$7$778;<yD$7$7781<1B1B1DEAq!((*}EE(6(=(=(?$%!PK!P<!P<!P^lmmK 9 c a c Fs   J*8-J/./J4-J9&J>c                     | j                   j                  dgz   }| j                  j                  }| j                  j                  }|D cg c]
  }|dk7  s	| }}t	        ||z   |z         S c c}w )NrD   	num_crops)r!   model_input_namesr    r   rL   )r0   tokenizer_input_namesimage_processor_input_namesaudio_processor_input_namesnames        r   rc   z"Gemma3nProcessor.model_input_names   sz     $ @ @DTCU U&*&:&:&L&L#&*&<&<&N&N#8S&kW[_jWjt&k#&k),GGJeeff 'ls   
A-A-)N      )NNN)r   r   r   intr/   r   r   r   r
   rL   r?   ndarrayfloatr	   r   r   r`   propertyrc   __classcell__)r4   s   @r   r   r      s      # #$
 $
 $
L  %)Z^X\	>nT!>n ++d9o=EV@WW>n zzDK'$rzz*::T$u+=NNQUU	>n
 /0>n 
>n >n@ g gr   r   )numpyr?   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r   r	   tokenization_utils_baser
   r   utilsr   r   r   __all__r   r   r   <module>rv      sW      4 A H H C #-U  ng~ ng ngb 
r   