
    qiq                         d dl mZ d dlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ dd	lmZ d
dlmZ  ej                   e      Ze G d de             Ze
 G d de             ZdgZy)    )	dataclassN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )AutoModelForImageTextToText   )ShieldGemma2Configc                   :    e Zd ZU dZdZej                  dz  ed<   y)0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)__name__
__module____qualname____doc__r   torchTensor__annotations__     h/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   r   !   s     *.M5<<$&-r   r   c                        e Zd ZU eed<   dZdddddZdef fdZd	 Zd
 Z	d Z
d Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                   dz  dej                  dz  dedz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  dedz  dedz  dedz  deej                   z  defd       Z xZS )"ShieldGemma2ForImageClassificationconfig)imagetextzmodel.model.language_modelzmodel.model.vision_towerz!model.model.multi_modal_projectorzmodel.lm_head)zmodel.language_model.modelzmodel.vision_towerzmodel.multi_modal_projectorzmodel.language_model.lm_headc                     t         |   |       t        |dd      | _        t        |dd      | _        t        j                  |      | _        | j                          y )N)r   yes_token_indexi *  no_token_indexi  )	super__init__getattrr!   r"   r   from_configmodel	post_init)selfr   	__class__s     r   r$   z+ShieldGemma2ForImageClassification.__init__5   sS    '&v/@&I%f.>E0<<FK
r   c                 R    | j                   j                         j                         S N)r'   get_decoderget_input_embeddingsr)   s    r   r.   z7ShieldGemma2ForImageClassification.get_input_embeddings<   s    zz%%'<<>>r   c                 V    | j                   j                         j                  |       y r,   )r'   r-   set_input_embeddings)r)   values     r   r1   z7ShieldGemma2ForImageClassification.set_input_embeddings?   s    

 55e<r   c                 R    | j                   j                         j                         S r,   )r'   r-   get_output_embeddingsr/   s    r   r4   z8ShieldGemma2ForImageClassification.get_output_embeddingsB   s    zz%%'==??r   c                 V    | j                   j                         j                  |       y r,   )r'   r-   set_output_embeddings)r)   new_embeddingss     r   r6   z8ShieldGemma2ForImageClassification.set_output_embeddingsE   s    

 66~Fr   N	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idscache_positioninputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictlogits_to_keepreturnc                      | j                   d|||||||||	|
||||d|}|j                  }|ddd| j                  | j                  gf   }t	        j
                  |d      }t        ||      S )aY  
        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   N)dim)logitsr   r   )r'   rJ   r!   r"   r   softmaxr   )r)   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   	lm_kwargsoutputsrJ   selected_logitsr   s                       r   forwardz*ShieldGemma2ForImageClassification.forwardH   s    N $** 
%)%+))'/!5#)
 
"  B)=)=t?R?R(S!STo2>?"'
 	
r   )NNNNNNNNNNNNNr   )r   r   r   r   r   input_modalities_checkpoint_conversion_mappingr$   r.   r1   r4   r6   r   r   
LongTensorFloatTensorr   r   boolintr   rO   __classcell__)r*   s   @r   r   r   *   s   (&B8'J(7	&"1 ?=@G  .215.204(,262626*.!%)-,0#'-.=
##d*=
 ''$.=
 t+	=

 &&-=
 =
 ((4/=
 ((4/=
 ((4/=
   4'=
 $;=
  $;=
 #Tk=
 D[=
 ell*=
" 
:#=
 =
r   r   )dataclassesr   r   cache_utilsr   modeling_outputsr   modeling_utilsr   utilsr   r	   autor   configuration_shieldgemma2r   
get_loggerr   loggerr   r   __all__r   r   r   <module>ra      s~    "    D - / : 
		H	% .7[ . . [
 [
 [
~ )r   