
    qi?                       d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 dd	lmZ dd
lmZmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1  e*jd                  e3      Z4dZ5e e)d       G d de                    Z6e e)d       G d de                    Z7 G d dejp                        Z9 G d  d!ejp                        Z: G d" d#ejp                        Z; G d$ d%ejp                        Z< G d& d'ejp                        Z= G d( d)ejp                        Z> G d* d+ejp                        Z? G d, d-ejp                        Z@ G d. d/ejp                        ZA	 	 dcd0ejp                  d1ej                  d2ej                  d3ej                  d4ej                  dz  d5eCdz  d6eCd7e$e(   fd8ZD G d9 d:ejp                        ZE G d; d<ejp                        ZF G d= d>ejp                        ZG G d? d@ejp                        ZH G dA dBe      ZI G dC dDejp                        ZJ G dE dFejp                        ZKe) G dG dHe"             ZL G dI dJeL      ZM e)dK       G dL dMeL             ZN e)dN       G dO dPeL             ZO G dQ dRejp                        ZP G dS dTejp                        ZQ G dU dVejp                        ZR e)dW       G dX dYeL             ZS e)dZ       G d[ d\eL             ZT G d] d^ejp                        ZU e)d_       G d` daeL             ZVg dbZWy)dzPyTorch BridgeTower Model    )OrderedDict)Callable)	dataclassN)nn)CrossEntropyLoss   )initialization)ACT2FNQuickGELUActivation)CacheDynamicCacheEncoderDecoderCache)create_bidirectional_maskcreate_causal_mask)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputModelOutputSequenceClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringlogging	torch_int)can_return_tuple   )BridgeTowerConfigBridgeTowerTextConfigBridgeTowerVisionConfigRobertaTokenizerz.
    Output type of [`BridgeTowerModel`].
    )custom_introc                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  dz  ed<   dZ
eej                     dz  ed<   dZeej                     dz  ed<   y)BridgeTowerModelOutputa  
    text_features (`torch.FloatTensor` of shape `(batch_size, text_sequence_length, hidden_size)`):
        Sequence of hidden-states at the text output of the last layer of the model.
    image_features (`torch.FloatTensor` of shape `(batch_size, image_sequence_length, hidden_size)`):
        Sequence of hidden-states at the image output of the last layer of the model.
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size x 2)`):
        Concatenation of last layer hidden-state of the first token of the text and image sequence (classification
        token), respectively, after further processing through layers used for auxiliary pretraining tasks.
    Ntext_featuresimage_featurespooler_outputhidden_states
attentions)__name__
__module____qualname____doc__r(   torchFloatTensor__annotations__r)   r*   r+   tupler,        f/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/bridgetower/modeling_bridgetower.pyr'   r'   1   s|     /3M5$$t+2/3NE%%,3.2M5$$t+259M5**+d2926Je''(4/6r6   r'   z>
    Output type of ['BridgeTowerForContrastiveLearning']
    c                   H   e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                     dz  ed<   dZe
ej                     dz  ed<   dZe
ej                     dz  ed<   dZe
ej                     dz  ed<   dZe
ej                     dz  ed	<   y)
BridgeTowerContrastiveOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`):
        Image-text contrastive loss.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    text_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The text embeddings obtained by applying the projection layer to the pooler_output.
    image_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The image embeddings obtained by applying the projection layer to the pooler_output.
    cross_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The text-image cross-modal embeddings obtained by applying the projection layer to the pooler_output.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.
    Nlosslogitstext_embedsimage_embedscross_embedsr+   r,   )r-   r.   r/   r0   r:   r1   r2   r3   r;   r<   r4   r=   r>   r+   r,   r5   r6   r7   r9   r9   I   s      &*D%

d
")'+FE$+37Ku(()D0748L%))*T1848L%))*T1859M5**+d2926Je''(4/6r6   r9   c                        e Zd Z fdZdej
                  dej
                  fdZddej
                  dej
                  dz  fdZ xZS )BridgeTowerResidualAttentionc                 h   t         |           t        j                  |j                  |j                  dz        | _        t        j                  |j                  |j                        | _        t        j                  t        dt        j                  |j                  |j                  dz        fdt               fdt        j                  |j                  dz  |j                        fg            | _        t        j                  |j                  |j                        | _        d | _        y )N@   epsc_fc   geluc_proj)super__init__r   MultiheadAttentionhidden_sizeattn	LayerNormlayer_norm_epsln_1
ModuleDictr   Linearr   mlpln_2	attn_maskselfconfig	__class__s     r7   rJ   z%BridgeTowerResidualAttention.__init__j   s    ))&*<*<f>P>PTV>VW	LL!3!39N9NO	==RYYv'9'96;M;MPQ;QRS023ryy););a)?ASASTU
 LL!3!39N9NO	r6   hidden_stateattention_maskc                 ,   |+|j                  t        j                  |j                        }| j                  1| j                  j                  |j
                  |j                        nd | _        | j                  |||d| j                  |      d   S )NdtypedeviceF)need_weightsrU   key_padding_maskr   )tor1   boolr_   rU   r^   rM   )rW   rZ   r[   s      r7   	attentionz&BridgeTowerResidualAttention.attention{   s    %+..UZZH[H[.\N ~~) NNL$6$6|?R?RS 	
 yynn+  
  	r6   Nc                     || j                  | j                  |      |      z   }| j                  |      }| j                  j	                         D ]
  } ||      } ||z   }|S N)rd   rP   rT   rS   values)rW   rZ   r[   residual_statelayers        r7   forwardz$BridgeTowerResidualAttention.forward   sc    %tyy7NP^(__yy0XX__& 	/E .L	/%4r6   rf   )	r-   r.   r/   rJ   r1   Tensorrd   rj   __classcell__rY   s   @r7   r@   r@   i   sD    "ell ELL "ELL %,,QUBU r6   r@   c                   ^     e Zd Z fdZddej
                  dej
                  dz  fdZ xZS )BridgeTowerTransformerc                    t         |           |j                  | _        |j                  | _        |j                  rHt        j                  t        | j                  dz
        D cg c]  }t        |       c}      | _	        nDt        j                  t        | j                        D cg c]  }t        |       c}      | _	        |j                  | _
        y c c}w c c}w )Nr    )rI   rJ   rL   num_hidden_layersremove_last_layerr   
ModuleListranger@   	resblocksstop_gradientrW   rX   _rY   s      r7   rJ   zBridgeTowerTransformer.__init__   s    !--!'!9!9##]]?DTE[E[^_E_?`a!-f5aDN  ]]?DTE[E[?\]!-f5]DN $11 b ^s   'C,C!NrZ   r[   c                     g }| j                   D ]H  } |||      }| j                  r |j                  |j                                8|j                  |       J |S rf   )ru   rv   appenddetach)rW   rZ   r[   r+   blocks        r7   rj   zBridgeTowerTransformer.forward   s\    ^^ 	3E ~>L!!$$\%8%8%:;$$\2	3 r6   rf   r-   r.   r/   rJ   r1   rk   rj   rl   rm   s   @r7   ro   ro      s)    2ELL %,,QUBU r6   ro   c                        e Zd Zdef fdZdej                  dededej                  fdZd
dej                  dej                  fd	Z
 xZS )BridgeTowerVisionEmbeddingsrX   c                    t         |           || _        |j                  | _        |j
                  | _        |j                  | _        t        j                  t        j                  | j                              | _        t        j                  |j                  | j                  | j                  | j                  d      | _        | j
                  | j                  z  dz  | _        | j                  dz   | _        t        j"                  | j                   | j                        | _        | j'                  dt        j(                  | j                         j+                  d      d       y )NF)in_channelsout_channelskernel_sizestridebias   r    position_idsr    
persistent)rI   rJ   rX   rL   	embed_dim
image_size
patch_sizer   	Parameterr1   randnclass_embeddingConv2dnum_channelspatch_embeddingnum_patchesnum_positions	Embeddingposition_embeddingregister_bufferarangeexpandrV   s     r7   rJ   z$BridgeTowerVisionEmbeddings.__init__   s	   ++ ++ ++!||EKK,GH!yy++?? 
 !OOt>1D!--1"$,,t/A/A4>>"R^U\\$:L:L-M-T-TU\-]jopr6   
embeddingsheightwidthreturnc                    |j                   d   dz
  }| j                  j                  j                  d      }|j                   d   dz
  }t        j
                  j                         s%||k(  r ||k(  r| j                  | j                        S |ddddf   }|ddddf   }|j                   d   }	|| j                  z  }
|| j                  z  }t        |dz        }|j                  d|||	      }|j                  dddd      }t        j                  j                  ||
|fdd	
      }|j                  dddd      j                  dd|	      }t	        j                   ||fd      S )a   
        This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
        images. This method is also adapted to support torch.jit tracing.

        Adapted from:
        - https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
        - https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
        r    r   Nr         ?r   r   bicubicF)sizemodealign_cornersdim)shaper   weight	unsqueezer1   jit
is_tracingr   r   r   reshapepermuter   
functionalinterpolateviewcat)rW   r   r   r   r   r   r   class_pos_embedpatch_pos_embedr   
new_height	new_widthsqrt_num_positionss                r7   interpolate_pos_encodingz4BridgeTowerVisionEmbeddings.interpolate_pos_encoding   sv    !&&q)A-!44;;EEaH*003a7 yy##%+*F6UZ?**4+<+<==,QU3,QU3r"t.
T__,	&}c'9:)11!5GI[]`a)11!Q1=--33i(	 4 
 *11!Q1=BB1b#Nyy/?;CCr6   pixel_valuesc                 `   |j                   \  }}}}|sJ|| j                  k7  s|| j                  k7  r,t        d| d| d| j                   d| j                   d	      | j                  j                  j
                  }| j                  |j                  |            }|j                  d      j                  dd      }| j                  j                  |dd      }	t        j                  |	|gd	      }
|r|
| j                  |
||      z   }
|
S |
| j                  | j                        z   }
|
S )
NzInput image size (*z) doesn't match model (z).r^   r   r    r   r   )r   r   
ValueErrorr   r   r^   rb   flatten	transposer   r   r1   r   r   r   r   )rW   r   r   
batch_sizerx   r   r   target_dtypepatch_embedsclass_embedsr   s              r7   rj   z#BridgeTowerVisionEmbeddings.forward   s6   '3'9'9$
Avu'Vt-F%SWSbSbJb$VHAeW4KDOOK\\]^b^m^m]nnpq  ++2288++LOO,O,OP#++A.88A>++22:q"EYYl;C
##d&C&CJPVX]&^^J  $d&=&=d>O>O&PPJr6   F)r-   r.   r/   r#   rJ   r1   rk   intr   r2   rj   rl   rm   s   @r7   r   r      se    q6 q,'D5<< 'D 'DUX 'D]b]i]i 'DRE$5$5 Z_ZfZf r6   r   c                        e Zd Z fdZ	 ddej
                  defdZ	 ddej
                  defdZdej
                  fdZ	 xZ
S )	BridgeTowerVisionTransformerc           	      0   t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t        |      | _
        t	        j
                  |j                  |j                        | _        |j                  | _        |j                  set	        j                  t        |j                        D cg c]-  }t	        j
                  |j                  |j                        / c}      | _        y y c c}w NrC   )rI   rJ   r   r   r   rN   rL   rO   ln_prero   transformerln_postshare_layernormrs   rt   rq   ln_separaterw   s      r7   rJ   z%BridgeTowerVisionTransformer.__init__  s    5f=ll6#5#56;P;PQ1&9||F$6$6F<Q<QR%55%%!}}V[\b\t\tVuvQRf00f6K6KLv D &vs   2Dr   r   c                    | j                  ||      }| j                  |      }|j                  ddd      }| j                  ||      }t	        j
                  |d      }|j                  dddd      }| j                  r| j                  |      }|S g }t        || j                        D ]  \  }} ||      }|j                  |         t	        j
                  |d      }|S )Nr    r   r   r   r   )r   r   r   r   r1   stackr   r   zipr   rz   )rW   r   r[   r   r+   hidden_states_stacklns          r7   rj   z$BridgeTowerVisionTransformer.forward  s     6NOM2%--aA6((GMq9%--aAq9 LL7M  #%%(8H8H%I :!r "= 1#**=9: "KK(;CMr6   c                 t    | j                  ||      }| j                  |      }|j                  ddd      }|S )Nr   r    r   r   )r   r   r   )rW   r   r   r+   s       r7   forward_prez(BridgeTowerVisionTransformer.forward_pre,  s?    
 OghM2%--aA6r6   rZ   c                 N    |j                  ddd      }| j                  |      }|S )Nr    r   r   )r   r   )rW   rZ   visual_output_posts      r7   forward_postz)BridgeTowerVisionTransformer.forward_post7  s-    )11!Q:!\\*<=!!r6   r   )r-   r.   r/   rJ   r1   rk   rc   rj   r   r   rl   rm   s   @r7   r   r     sX    " */	ll #'	< */	ll	 #'	" "r6   r   c                   $     e Zd Z fdZd Z xZS )BridgeTowerLinkTowerc                    t         |           |j                  | _        |j                  | _        |j                  dv r|j                  dk(  r.t	        j
                  t        j                  d            | _        n<|j                  dk(  r-t	        j
                  t        j                  d            | _	        t	        j                  | j                  |j                        | _
        y t        d|j                   d      )	N)add
scaled_addr   r   g      ?r   r   rC   link_tower_type  is not implemented)rI   rJ   link_tower_typerL   r   r   r1   tensorscaled_factorbetarN   rO   NotImplementedErrorrV   s     r7   rJ   zBridgeTowerLinkTower.__init__>  s    %55!--!!%II%%5%'\\%,,s2C%D"''=8LLc):;	\\$*:*:@U@UVDN%(89O9O8PPc&deer6   c                 Z   | j                   dk(  r| j                  ||z         S | j                   dk(  r!| j                  || j                  z  |z         S | j                   dk(  r1| j                  |d| j                  z
  z  || j                  z  z         S t	        d| j                    d      )Nr   r   r   r    r   r   )r   rN   r   r   r   )rW   r+   cross_modal_hidden_statesr[   s       r7   rj   zBridgeTowerLinkTower.forwardK  s    5(>>-2K"KLL!!\1>>-$2D2D"DG`"`aa!!]2>>-1tyy="AD]`d`i`iDi"ijj%(89M9M8NNa&bccr6   r-   r.   r/   rJ   rj   rl   rm   s   @r7   r   r   =  s    fdr6   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BridgeTowerSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y r   )rI   rJ   r   rR   rL   denserN   rO   Dropouthidden_dropout_probdropoutrV   s     r7   rJ   zBridgeTowerSelfOutput.__init__X  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r6   r+   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rf   r   r   rN   rW   r+   r   s      r7   rj   zBridgeTowerSelfOutput.forward^  7    

=1]3}|'CDr6   r}   rm   s   @r7   r   r   W  1    >U\\  RWR^R^ r6   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BridgeTowerIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rf   )rI   rJ   r   rR   rL   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnrV   s     r7   rJ   z BridgeTowerIntermediate.__init__g  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r6   r+   r   c                 J    | j                  |      }| j                  |      }|S rf   )r   r   rW   r+   s     r7   rj   zBridgeTowerIntermediate.forwardo  s&    

=100?r6   r}   rm   s   @r7   r   r   f  s#    9U\\ ell r6   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BridgeTowerOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )rI   rJ   r   rR   r   rL   r   rN   rO   r   r   r   rV   s     r7   rJ   zBridgeTowerOutput.__init__w  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r6   r+   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rf   r   r   s      r7   rj   zBridgeTowerOutput.forward}  r   r6   r}   rm   s   @r7   r  r  v  r   r6   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BridgeTowerPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rf   )rI   rJ   r   rR   rL   r   Tanh
activationrV   s     r7   rJ   zBridgeTowerPooler.__init__  s9    YYv1163E3EF
'')r6   r+   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r  )rW   r+   first_token_tensorpooled_outputs       r7   rj   zBridgeTowerPooler.forward  s6     +1a40

#566r6   r}   rm   s   @r7   r  r    s#    $
U\\ ell r6   r  modulequerykeyvaluer[   scalingr   kwargsc                    ||j                  d      dz  }t        j                  ||j                  dd            |z  }|||z   }t        j
                  j                  |d      }t        j
                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nr         r   r   r   )ptrainingr    )
r   r1   matmulr   r   r   softmaxr   r  
contiguous)
r  r  r  r  r[   r  r   r  attn_weightsattn_outputs
             r7   eager_attention_forwardr    s     **R.D( <<s}}Q':;gEL!#n4==((2(>L==((6??([L,,|U3K''1-88:K$$r6   c                        e Zd Zd
 fd	Z	 	 	 ddej
                  dej                  dz  dedz  dej
                  dz  dee	   de
ej
                     fd	Z xZS )BridgeTowerSelfAttentionNc                 @   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        | j                  dz  | _
        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                   |j"                        | _        |j&                  | _        || _        || _        y Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r  )rI   rJ   rL   num_attention_headshasattrr   rX   r   attention_head_sizeall_head_sizer  r   rR   r  r  r  r   attention_probs_dropout_probr   
is_decoder	is_causal	layer_idxrW   rX   r(  r)  rY   s       r7   rJ   z!BridgeTowerSelfAttention.__init__  sP    : ::a?PVXhHi#F$6$6#7 8 445Q8  #)#=#= #&v'9'9F<V<V'V#W !558P8PP//5YYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF ++""r6   r+   r[   past_key_valuescache_positionr  r   c                    |j                   d d }g |d| j                  } | j                  |      j                  | j	                  dd      } | j                  |      j                  | j	                  dd      }	 | j                  |      j                  | j	                  dd      }
|A|}t        |t              r|j                  }|j                  |	|
| j                  d|i      \  }	}
t        j                  | j                  j                  t               } || ||	|
|f| j"                  sdn| j$                  j&                  | j(                  d|\  }} |j*                  g |d j-                         }||fS )Nr   r    r   r,          r   r  )r   r$  r  r   r   r  r  r   r   self_attention_cacheupdater)  r   get_interfacerX   _attn_implementationr  r  r   r  r  r   r  )rW   r+   r[   r+  r,  r  input_shapehidden_shapequery_layer	key_layervalue_layercurrent_past_key_valuesattention_interfacer  r  s                  r7   rj   z BridgeTowerSelfAttention.forward  s    $))#2.CCbC$*B*BC 5djj/44lCMMaQRS0DHH]+00,?II!QO	4djj/44lCMMaQRS&&5#/+>?*9*N*N' &=%C%C!>2	&"I{ )@(M(MKK,,.E)
 %8	%
  $}}C$,,..LL	%
 	%
!\ *k));;;;FFHL((r6   FNNNNr-   r.   r/   rJ   r1   rk   r2   r   r   r   r4   rj   rl   rm   s   @r7   r  r    s}    #6 48(,.2-)||-) ))D0-) 	-)
 t+-) +,-) 
u||	-)r6   r  c                        e Zd Zd
 fd	Z	 	 	 ddej
                  dej                  dz  dej                  dz  dedz  dee	   de
ej
                     fd	Z xZS )BridgeTowerCrossAttentionNc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        | j                  dz  | _
        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                   |j"                        | _        || _        || _        y r  )rI   rJ   rL   r"  r#  r   rX   r   r$  r%  r  r   rR   r  r  r  r   r&  r   r(  r)  r*  s       r7   rJ   z"BridgeTowerCrossAttention.__init__  sC    : ::a?PVXhHi#F$6$6#7 8 445Q8  #)#=#= #&v'9'9F<V<V'V#W !558P8PP//5YYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF""r6   r+   encoder_hidden_statesr[   r+  r  r   c                 V   |j                   d d \  }}|j                   d   }||d| j                  f}	||d| j                  f}
 | j                  |      j                  |	 j	                  dd      }|%|j
                  j                  | j                        nd}|]|r[|j                  j                  | j                     j                  }|j                  j                  | j                     j                  }n | j                  |      j                  |
 j	                  dd      } | j                  |      j                  |
 j	                  dd      }|C|j                  j                  ||| j                        \  }}d|j
                  | j                  <   t        j                   | j"                  j$                  t&              } || ||||f| j(                  sdn| j*                  j,                  | j.                  d|\  }}|j1                  ||d      j3                         }||fS )Nr   r    r   FTr.  r/  )r   r$  r  r   r   
is_updatedgetr)  cross_attention_cachelayerskeysrg   r  r  r1  r   r2  rX   r3  r  r  r   r  r  r   r  )rW   r+   rA  r[   r+  r  bsztgt_lensrc_lenq_input_shapekv_input_shaper6  rC  r7  r8  r:  r  r  s                     r7   rj   z!BridgeTowerCrossAttention.forward  s    %**3B/W'--a0gr4+C+CDwD,D,DE 5djj/44mDNNqRSTGVGb_//33DNNChm
&:'==DDT^^TYYI)??FFt~~V]]K<!67<<nMWWXY[\]I@$**%:;@@.Q[[\]_`aK*)8)N)N)U)U{DNN*&	; >B**4>>:(?(M(MKK,,.E)
 %8	%
  $}}C$,,..LL	%
 	%
!\ "))#w;FFHL((r6   r;  r<  )r-   r.   r/   rJ   r1   rk   r2   r   r   r   r4   rj   rl   rm   s   @r7   r?  r?    s    #4 ;?376:2)||2)  %00472) ))D0	2)
 -t32) +,2) 
u||	2)r6   r?  c                        e Zd Zd fd	Z	 	 	 	 	 ddej
                  dej                  dz  dej                  dz  dej                  dz  dedz  dej
                  dz  d	ee	   d
e
ej
                     fdZ xZS )BridgeTowerAttentionNc                     t         |           || _        |rt        nt        } ||||      | _        t        |      | _        y )Nr(  r)  )rI   rJ   is_cross_attentionr?  r  rW   r   output)rW   rX   r(  r)  rQ  attention_classrY   s         r7   rJ   zBridgeTowerAttention.__init__K  s=    "47I3Og#Fi9U	+F3r6   r+   r[   rA  encoder_attention_maskr+  r,  r  r   c                     | j                   s|n|} | j                  |f||||d|\  }}	| j                  ||      }||	fS )N)rA  r[   r+  r,  )rQ  rW   rR  )
rW   r+   r[   rA  rT  r+  r,  r  attention_outputr  s
             r7   rj   zBridgeTowerAttention.forwardR  sg     04/F/FLb)2*
"7)+)*
 *
&,  ;;'7G--r6   )FNFNNNNNr=  rm   s   @r7   rN  rN  J  s    4 48:>;?(,.2.||. ))D0.  %0047	.
 !& 1 1D 8. . t+. +,. 
u||	.r6   rN  c                   @     e Zd Zd fd	Z	 	 	 ddee   fdZd Z xZS )BridgeTowerBertCrossLayerc                 $   t         |           |j                  | _        d| _        t	        |d|      | _        |j                  | _        |j                  | _        t	        |d|d      | _        t        |      | _
        t        |      | _        y )Nr    TrP  Fr(  r)  rQ  )rI   rJ   chunk_size_feed_forwardseq_len_dimrN  rd   r'  add_cross_attentioncrossattentionr   intermediater  rR  rW   rX   r)  rY   s      r7   rJ   z"BridgeTowerBertCrossLayer.__init__j  s    '-'E'E$-fPYZ ++#)#=#= 2#	
 4F;'/r6   r  c                      | j                   |f|d d|\  }}|}	 | j                  |	f||||d|\  }
}|
}	t        | j                  | j                  | j
                  |	      }|||fS )N)r[   r+  )r[   rA  rT  r+  )rd   r_  r   feed_forward_chunkr\  r]  )rW   r+   rA  r[   rT  r+  r  self_attention_outputself_attn_weightsrV  cross_attention_outputcross_attn_weightslayer_outputs                r7   rj   z!BridgeTowerBertCrossLayer.forwardz  s     4B4>>4
) 4
 	4
00 15HT5H5H6
)"7#9+6
 6
2 2 20##T%A%A4CSCSUe
 
 	
r6   c                 L    | j                  |      }| j                  ||      }|S rf   r`  rR  rW   rV  intermediate_outputrh  s       r7   rc  z,BridgeTowerBertCrossLayer.feed_forward_chunk  ,    "//0@A{{#68HIr6   rf   r<  )	r-   r.   r/   rJ   r   r   rj   rc  rl   rm   s   @r7   rY  rY  i  s.    0( #"
 +,"
Hr6   rY  c                        e Zd Zd fd	Z	 	 	 	 	 ddej
                  dej                  dz  dej                  dz  dej                  dz  dedz  dej
                  dz  d	ee	   d
e
ej
                     fdZd Z xZS )BridgeTowerTextLayerNc                    t         |           |j                  | _        d| _        t	        ||j
                  |      | _        |j
                  | _        |j                  | _        | j                  r.| j
                  st        |  d      t	        |d|d      | _	        t        |      | _        t        |      | _        y )Nr    rP  z> should be used as a decoder model if cross attention is addedFTr[  )rI   rJ   r\  r]  rN  r'  rd   r^  r   r_  r   r`  r  rR  ra  s      r7   rJ   zBridgeTowerTextLayer.__init__  s    '-'E'E$-f@Q@Q]fg ++#)#=#= ##?? D6)g!hii"6##'	#D 4F;'/r6   r+   r[   rA  rT  r+  r,  r  r   c                 6   d} | j                   ||f||d|\  }	}
|	}| j                  r=|;t        | d      st        d|  d       | j                  |	d ||fd|i|\  }}|}|f}t        | j                  | j                  | j                  |      }|||
fz   S )Nr5   )r+  r,  r_  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r+  )	rd   r'  r#  r   r_  r   rc  r\  r]  )rW   r+   r[   rA  rT  r+  r,  r  outputsrd  re  rV  rf  rg  rh  s                  r7   rj   zBridgeTowerTextLayer.forward  s
    3A4>>4
 ,)	4

 4
00 1??4@4!12 =dV DD D 
 :M9L9L%%&	:
 !0: :6"$6  6)+G0##T%A%A4CSCSUe
 
 
 	
r6   c                 L    | j                  |      }| j                  ||      }|S rf   rj  rk  s       r7   rc  z'BridgeTowerTextLayer.feed_forward_chunk  rm  r6   rf   rW  )r-   r.   r/   rJ   r1   rk   r2   r   r   r   r4   rj   rc  rl   rm   s   @r7   ro  ro    s    0. 48:>;?(,.2,
||,
 ))D0,
  %0047	,

 !& 1 1D 8,
 ,
 t+,
 +,,
 
u||	,
\r6   ro  c                       e Zd Z fdZ	 	 	 	 	 	 	 	 ddej
                  dej                  dz  dej                  dz  dej                  dz  dedz  dedz  d	edz  d
edz  dej
                  dz  de	e
   deej
                     ez  fdZ xZS )BridgeTowerTextEncoderc           	          t         |           || _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        y c c}w )N)r)  )	rI   rJ   rX   r   rs   rt   rq   ro  ri   )rW   rX   irY   s      r7   rJ   zBridgeTowerTextEncoder.__init__  sI    ]]@EfF^F^@_`1!&A6`

`s   ANr+   r[   rA  rT  r+  	use_cacheoutput_attentionsoutput_hidden_statesr,  r  r   c
                 D   |rdnd }|rdnd }|r| j                   j                  rdnd }t        | j                        D ]E  \  }} ||||f|||	d|
}|d   }|s||d   fz   }| j                   j                  s=||d   fz   }G |r||fz   }t	        ||r|nd |||      S )Nr5   )rT  r+  r,  r   r    r   )last_hidden_stater+  r+   r,   cross_attentions)rX   r^  	enumerateri   r   )rW   r+   r[   rA  rT  r+  rx  ry  rz  r,  r  all_hidden_statesall_self_attentionsall_cross_attentionsrw  layer_modulelayer_outputss                    r7   rj   zBridgeTowerTextEncoder.forward  s     #7BD$5b4%64;;;Z;Zr`d(4 	VOA|(% (> /- M *!,M &9]1=M<O&O#;;22+?=QRCSBU+U(	V"   1]4D D8+/8Od+*1
 	
r6   )NNNNNFFN)r-   r.   r/   rJ   r1   rk   r2   r   rc   r   r   r4   r   rj   rl   rm   s   @r7   ru  ru    s    
 48:>;?(,!%).,1.2+
||+
 ))D0+
  %0047	+

 !& 1 1D 8+
 +
 $;+
  $;+
 #Tk+
 t++
 +,+
 
u||	H	H+
r6   ru  c                        e Zd ZdZ fdZ	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  ded	ej                  fd
Z
ed        Zedd       Z xZS )BridgeTowerTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 T   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j
                  |j                        | _
        t        j                  |j                        | _        | j                  dt!        j"                  |j$                        j'                  d      d       | j                  dt!        j(                  | j*                  j-                         t         j.                        d       |j                  | _        t        j                  |j$                  |j
                  | j0                        | _        y )	N)padding_idxrC   r   r   Fr   token_type_idsr   )rI   rJ   r   r   
vocab_sizerL   pad_token_idword_embeddingstype_vocab_sizetoken_type_embeddingsrN   rO   r   r   r   r   r1   r   max_position_embeddingsr   zerosr   r   longr  position_embeddingsrV   s     r7   rJ   z"BridgeTowerTextEmbeddings.__init__(  s4   !||F,=,=v?Q?Q_e_r_rs%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
 "..#%<<**F,>,>DL\L\$
 r6   N	input_idsr  r   inputs_embedspast_key_values_lengthr   c                    |<|| j                  || j                  |      }n| j                  || j                        }||j                         }n|j                         d d }|\  }}|t	        | d      rT| j
                  j                  |j                  d   d      }	t        j                  |	d|      }	|	j                  ||      }n:t        j                  |t        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j!                  |      }||z   }| j#                  |      }| j%                  |      }|S )Nr   r  r   r    )r   indexr]   )"create_position_ids_from_input_idsr  &create_position_ids_from_inputs_embedsr   r#  r  r   r   r1   gatherr  r  r   r_   r  r  r  rN   r   )rW   r  r  r   r  r  r4  r   
seq_lengthbuffered_token_type_idsr  r   r  s                r7   rj   z!BridgeTowerTextEmbeddings.forward<  sn    $#FFt//1G   $JJ=Z^ZjZjk #..*K',,.s3K!,
J
 !t-.*.*=*=*D*D\EWEWXYEZ\^*_'*/,,7NTU]i*j'!8!?!?
J!W!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
"66|D"55
^^J/
\\*-
r6   c                     | j                         dd }|d   }t        j                  |dz   ||z   dz   t        j                  | j                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr   r    r]   r   )r   r1   r   r  r_   r   r   )r  r  r4  sequence_lengthr   s        r7   r  z@BridgeTowerTextEmbeddings.create_position_ids_from_inputs_embedsl  sp     $((*3B/%a.||!O_{:Q>ejjYfYmYm
 %%a(//<<r6   c                     | j                  |      j                         }t        j                  |d      j	                  |      |z   |z  }|j                         |z   S )a  
        Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
        are ignored. This is modified from fairseq's `utils.make_positions`.

        Args:
            x: torch.Tensor x:

        Returns: torch.Tensor
        r    r   )ner   r1   cumsumtype_asr  )r  r  r  maskincremental_indicess        r7   r  z<BridgeTowerTextEmbeddings.create_position_ids_from_input_ids~  sW     ||K(,,.$||Da8@@FI__cgg"'')K77r6   )NNNNr   )r   )r-   r.   r/   r0   rJ   r1   
LongTensorr2   r   rk   rj   staticmethodr  r  rl   rm   s   @r7   r  r  %  s    Q
, .2260426&'.##d*. ((4/. &&-	.
 ((4/. !$. 
.` = =" 8 8r6   r  c                   x    e Zd ZU eed<   dZdZdZddgZdZ	 e
j                         dej                  fd	       Zy
)BridgeTowerPreTrainedModelrX   bridgetower)imagetextFr  r@   r+  r  c                    | j                   j                  }t        |t              r| j                   j                  dz  d| j                   j
                  z  dz  z  }| j                   j                  dz  }d| j                   j                  z  dz  }|j                  j                  D ]  }t        j                  |j                  j                  ||z         t        j                  |j                  j                         t        j                  |j                  j                  j                  ||z         t        j                  |j                   j"                  j                  ||z         t        j                  |j                   j$                  j                  ||z          t        j                  |j&                  j(                  ||z         t        j                  |j&                  j*                  j                  ||z         nt        |t,        j.                  t,        j0                  t,        j2                  f      r't        j                  |j                  dd|z         nt        |t,        j4                        r@t        j                  |j6                         t        j8                  |j                         n*t        |t:              r5t        j<                  |j>                  | j                   j@                         nt        |tB              rMt        jD                  |jF                  tI        jJ                  |jL                        jO                  d             nt        |tP              rxt        jD                  |jF                  tI        jJ                  |jF                  jR                  d         jO                  d             t        j                  |jT                         t        |t,        j.                  tV        f      r-|j6                   t        j                  |j6                         y y y )	Nr  r   )stdr.  g?)meanr  r   r   ),rX   initializer_factorr   r   rL   rq   r   ru   initnormal_rM   in_proj_weightzeros_in_proj_biasout_projr   rS   rE   rH   r   r   r   r   rR   r   r   rN   r   ones_!BridgeTowerForContrastiveLearning	constant_logit_scalelogit_scale_init_valuer   copy_r   r1   r   r   r   r  r   r  BridgeTowerMLMHead)rW   r  r  proj_stdattn_stdfc_stdr|   s          r7   _init_weightsz(BridgeTowerPreTrainedModel._init_weights  s   kk,,f:;//51t{{?\?\;\ae:efH{{..4H$++111d:F++55 JUZZ66HsNKEJJ334UZZ0077X^LUYY^^22EUYY--44(S.IJ LL**::3OLL**==DD(UX.YBIIr|| DELLSdSjA-KK$JJv}}% ABNN6--t{{/Q/QR ;<JJv**ELL9M9M,N,U,UV],^_ 9:JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--.fryy*<=>6;;CZKK$ D[>r6   N)r-   r.   r/   r!   r3   base_model_prefixinput_modalitiessupports_gradient_checkpointing_no_split_modules_skip_keys_device_placementr1   no_gradr   Moduler  r5   r6   r7   r  r    sQ    %(&+#35ST"3U]]_%BII % %r6   r  c                   F     e Zd ZU eed<   dZ fdZed        ZddZ	 xZ
S )BridgeTowerVisionModelrX   )r  c                 d    t         |   |       t        |      | _        | j	                          y rf   )rI   rJ   r   visual	post_initrV   s     r7   rJ   zBridgeTowerVisionModel.__init__  s&     26:r6   c                 j    | j                   j                  j                  j                  j                  S rf   )r  r   r   r   r^   rW   s    r7   r^   zBridgeTowerVisionModel.dtype  s$    {{%%55<<BBBr6   c                 Z    | j                  |j                  | j                        ||      S rf   )r  typer^   )rW   r  
image_maskr   r  s        r7   rj   zBridgeTowerVisionModel.forward  s#    {{5::djj1:?WXXr6   )NF)r-   r.   r/   r#   r3   r  rJ   propertyr^   rj   rl   rm   s   @r7   r  r    s0    ##!
 C CYr6   r  a0  
    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in *Attention is
    all you need*_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz
    Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.

    .. _*Attention is all you need*: https://huggingface.co/papers/1706.03762
    c                        e Zd ZU eed<   dZd fd	Zd Zd Ze	e
	 	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  dedz  dedz  dedz  dej                  dz  dee   deej                     ez  fd              Zd Z xZS )BridgeTowerTextModelrX   )r  c                     t         |   |       || _        d| _        t	        |      | _        t        |      | _        |rt        |      nd| _	        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        FN)rI   rJ   rX   gradient_checkpointingr  r   ru  encoderr  poolerr  )rW   rX   add_pooling_layerrY   s      r7   rJ   zBridgeTowerTextModel.__init__  sV    
 	 &+#3F;-f53D'/$ 	r6   c                 .    | j                   j                  S rf   r   r  r  s    r7   get_input_embeddingsz)BridgeTowerTextModel.get_input_embeddings  s    ...r6   c                 &    || j                   _        y rf   r  rW   r  s     r7   set_input_embeddingsz)BridgeTowerTextModel.set_input_embeddings  s    */'r6   Nr  r[   r  r   r  rA  rT  r+  rx  ry  rz  r,  r  r   c                    |
|
n| j                   j                  }
||n| j                   j                  }| j                   j                  r|	|	n| j                   j                  }	nd}	| j
                  r%| j                  r|	rt        j                  d       d}	|	r6|4t        t        | j                         t        | j                               }|d u |d uz  rt        d      ||j                  }|j                  }n|j                  }|j                  d d }|d   }||j                         nd}|t        j                   |||z   |      }| j#                  |||||	      }| j%                  ||||||
      \  }} | j&                  |f|||||	|
|||d	|}|d   }| j(                  | j)                  |      nd }t+        |||j,                  |j.                  |j0                  |j2                        S )NFzZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...)rX   z:You must specify exactly one of input_ids or inputs_embedsr   r    r   r_   )r  r   r  r  r  )r[   rT  embedding_outputrA  r,  r+  )	r[   rA  rT  r+  rx  ry  rz  r,  r   )r|  r*   r+  r+   r,   r}  )rX   ry  rz  r'  rx  r  r  loggerwarning_oncer   r   r   r_   r   get_seq_lengthr1   r   r   _create_attention_masksr  r  r   r+  r+   r,   r}  )rW   r  r[   r  r   r  rA  rT  r+  rx  ry  rz  r,  r  r_   r4  r  r  r  encoder_outputssequence_outputr  s                         r7   rj   zBridgeTowerTextModel.forward  s=   * 2C1N-TXT_T_TqTq$8$D $++JjJj 	 ;;!!%.%:	@U@UII&&4==##p "	01,dkk2RT`hlhshsTtuO-t";<YZZ %%F#//K"))F'--cr2K ^
ETE`!?!?!Afg!"\\*@BX[eBentuN??%)'#9 + 
 261M1M)#9-"7)+ 2N 2
.. '$,,
)"7#9+/!5)%
 
 *!,8<8OO4UY;-'+;;)77&11,==
 	
r6   c                     | j                   j                  rt        | j                   ||||      }nt        | j                   ||      }|t        | j                   |||      }||fS )N)rX   r  r[   r,  r+  )rX   r  r[   )rX   r  r[   rA  )rX   r'  r   r   )rW   r[   rT  r  rA  r,  r+  s          r7   r  z,BridgeTowerTextModel._create_attention_masksV  sx     ;;!!/{{.-- /N 7{{.-N "-%>{{.5&;	&" 555r6   )T)NNNNNNNNNNNN)r-   r.   r/   r"   r3   r  rJ   r  r  r   r   r1   rk   r   rc   r   r   r4   r   rj   r  rl   rm   s   @r7   r  r    so    "! "/0  *..2.2,0-1596:(,!%)-,0.2[
<<$&[
 t+[
 t+	[

 llT)[
 ||d*[
  %||d2[
 !&t 3[
 [
 $;[
  $;[
 #Tk[
 t+[
 +,[
 
u||	K	K[
	  
[
| 6r6   r  zv
    The bare BridgeTower Model transformer outputting BridgeTowerModelOutput object without any specific head on
    c                       e Zd Z fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  de
dz  dedz  dedz  dedz  dej                  dz  dedeej                     ez  fd       Zd Z xZS )BridgeTowerModelc           	         t         |   |       || _        |j                  }|j                  }|j
                  r_t        j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _
        nt        j                  t        |j                        D cg c],  }t        j                  |j                  |j                        . c}      | _	        t        j                  t        |j                        D cg c],  }t        j                  |j                  |j                        . c}      | _
        t        j                  d|j                        | _        t!        |      | _        t%        |      | _        |j(                  s|j*                  r| j"                  j,                  j.                  D ]  }| j"                  j,                  j0                  j2                  j4                  |j2                  _        | j"                  j,                  j0                  j6                  j4                  |j6                  _         t        j                  t        |j                        D cg c]  }t9        |       c}      | _        t        j                  t        |j                        D cg c]  }t9        |       c}      | _        t?        |      | _         t?        |      | _!        t        jD                  |j                  |jF                        | _$        t        jD                  |j                  |jF                        | _%        |jL                  r!tO        |      | _(        tO        |      | _)        nt        j                  t        |j                  dz
        D cg c]  }tO        |       c}      | _(        t        j                  t        |j                  dz
        D cg c]  }tO        |       c}      | _)        | jU                          y c c}w c c}w c c}w c c}w c c}w c c}w )Nr   rC   r    )+rI   rJ   rX   vision_configtext_config$share_cross_modal_transformer_layersr   rR   rL   cross_modal_text_transformcross_modal_image_transformrs   rt   rq   r   r  r  vision_modelr  
text_modelr   "init_layernorm_from_vision_encoderr  cross_modal_ln_separater   r   datar   rY  cross_modal_image_layerscross_modal_text_layersr  cross_modal_image_poolercross_modal_text_poolerrN   rO   cross_modal_text_layernormcross_modal_image_layernormshare_link_tower_layersr   cross_modal_text_link_towercross_modal_image_link_towerr  )rW   rX   r  r  rx   r   rY   s         r7   rJ   zBridgeTowerModel.__init__  s9    ,,((66.0ii8O8OQWQcQc.dD+/1yy9R9RTZTfTf/gD,.0mmQVW]WoWoQpqA;22F4F4FGq/D+ 02}}SXY_YqYqSrsa=44f6H6HIs0D, &(\\!V5G5G%H"2=A.{;,,1Z1Z''..FF J!%!2!2!9!9!A!A!H!H!M!M		#0077??DDIIJ )+=B6C[C[=\]&{3])
% (*}}=B6C[C[=\]&{3](
$
 ):&(A%'8'@$ +-,,v7I7IvOdOd*e'+-<<8J8JPVPePe+f())/CF/KD,0DV0LD-/1}}7<V=U=UXY=Y7Z[!%f-[0D, 137<V=U=UXY=Y7Z[!%f-[1D- 	W r t ^ ^  \ \s$   1P=$1QQQQQc                 6    | j                   j                         S rf   )r  r  r  s    r7   r  z%BridgeTowerModel.get_input_embeddings  s    3355r6   c                 :    | j                   j                  |       y rf   )r  r  r  s     r7   r  z%BridgeTowerModel.set_input_embeddings  s    ,,U3r6   Nr  r[   r  r   
pixel_maskr  r=   image_token_type_idxry  rz  return_dictlabelsr   r   c                    |	|	n| j                   j                  }	|
|
n| j                   j                  }
|
rdnd}|
rdnd}|
rdnd}|
rdnd}|	rdnd}||t        d      ||n| j                   j                  }|xs d}|j                         }| j                  j                  |      }|
r||fz  }|0t        j                  |t        j                  |j                        }| j                  j                  ||      j                  |j                        }t        | j                  j                  j                         | j                   j"                  z
  dz   }| j                  j                  j                   d| D ]  } |||      d   }|
s||fz  } |K| j$                  j&                  j)                  |j+                  | j$                  j,                        |      }n|j/                  ddd	      }|
r||fz  }| j$                  j&                  j0                  j2                  d| D ]  } ||      }|
s||fz  } | j$                  j&                  j5                  |j+                  | j$                  j,                              }| j7                  |      }| j9                  t        j:                  dt        j                  |j                              j=                  |      }| j?                  ||z         }| jA                  |      }| j9                  t        jB                  d
|t        j                  |j                              j=                  |      }||z   }| jE                  |      }t        j                  |j                  d      |j                  d      ft        j                  |j                        }| j                  j                  ||j                               j                  |j                        } | jF                  d   |||||	      } | d   }! | jH                  d   |||||	      }"|"d   }#|
r||!|#ffz  }|	r|| d   |"d   ffz  }d}$tK        |t        | j                  j                  j                               D ]r  }% | j                  j                  j                   |%   ||      d   } | j$                  j&                  j0                  j2                  |%   |      j+                  | j$                  j,                        }| jA                  | j$                  j&                  j5                  |            |z   }| jL                  |$   }&| jN                  |$   }' |&| j7                  |      |z   |!|      }( |'||#|      }) | jF                  |$dz      |(|)|||	      } | d   }! | jH                  |$dz      |)|(|||	      }"|"d   }#|$dz  }$|
r||fz  }||fz  }||!|#ffz  }|	se|| d   |"d   ffz  }u |!|#}+}*| jQ                  |*|+      },|
r|||f}|stS        d |*|+|,||fD              S tU        |*|+|,||      S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        image_token_type_idx (`int`, *optional*):
            - The token type ids for images.
        output_hidden_states (`bool`, *optional*):
            If set to `True`, hidden states are returned as a list containing the hidden states of text, image, and
            cross-modal components respectively. i.e. `(hidden_states_text, hidden_states_image,
            hidden_states_cross_modal)` where each element is a list of the hidden states of the corresponding
            modality. `hidden_states_txt/img` are a list of tensors corresponding to unimodal hidden states and
            `hidden_states_cross_modal` is a list of tuples containing `cross_modal_text_hidden_states` and
            `cross_modal_image_hidden_states` of each brdige layer.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels are currently not supported.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerModel
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO

        >>> # prepare image and text
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))
        >>> text = "hello world"
        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base")
        >>> model = BridgeTowerModel.from_pretrained("BridgeTower/bridgetower-base")

        >>> inputs = processor(image, text, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> outputs.keys()
        odict_keys(['text_features', 'image_features', 'pooler_output'])
        ```Nr5   zYBridgeTowerModel does not use `inputs_embeds`.  Make sure to pass in `input_ids` instead.r    )r  r]   r   r   r   r    )r[   rT  ry  c              3   $   K   | ]  }|| 
 y wrf   r5   ).0vs     r7   	<genexpr>z+BridgeTowerModel.forward.<locals>.<genexpr>  s      = s   )r(   r)   r*   r+   r,   )+rX   ry  rz  r   use_return_dictr   r  r   r1   onesr  r_   get_extended_attention_maskrb   lenr  ri   rq   r  r  r   r  r^   r   r   ru   r   r  r  r  	expand_asr  r  fullr  r  r  rt   r  r  get_cls_featuresr4   r'   )-rW   r  r[   r  r   r  r  r=   r  ry  rz  r  r  r   r  all_hidden_states_textall_hidden_states_imageall_hidden_states_crossr  r  r4  r<   extend_text_maskssplit_indexri   r|   image_embeds_with_lncross_modal_texttext_token_type_embeddingsimage_token_type_embeddingscross_modal_imageextend_image_maskslayer_outputs_textcross_text_featureslayer_outputs_imagecross_image_featureslink_layer_indexrw  text_link_towerimage_link_towercross_text_features_cross_image_features_r(   r)   cls_featuress-                                                r7   rj   zBridgeTowerModel.forward  s   n 2C1N-TXT_T_TqTq$8$D $++JjJj 	 (<(<"$(<"$"6BD$5b4$):%k  &1%<k$++B]B]38qnn&oo0090E"{n4"!"ZZ5::iN^N^_N OOGGXcdgg

 $//117784;;;X;XX[\\ __,,22<K@ 	9E->?BK#&;.8&		9 ,,33??!!$"3"3"9"9:Um @ L
 (//1a8L#6# &&--99CCL[Q 	;E .L#'L?:'	;
  $0077DD\EVEVW[WhWhWnWnEop  ::;G%)%?%?KKI4D4DE&

)$
% 	#  ::;KNh;hi#??@TU&*&@&@JJt1IL\L\]'

)(
) 	$  46QQ <<=QRZZ##A&(9(>(>q(AB**##


 "__HHU_UdUdUfgjj
 =T99!<,#5/
 13>d;;A>-#4/
  315#)<>R(S'UU#%7%:<OPQ<R$S#UU {C(?(?(E(E$FG 0	ZA:$//1177:;HYZ[\]KL4,,33??II!L\Z__!!''L 001B1B1I1I1V1VWc1de-. !
 #>>?OPO#@@AQR $3//<?YY#!$ 
 %55IK_as$t! "T!=!=>NQR>R!S$%0'9"3" #5Q"7"U$"?"?@PST@T"U%$1'8"3# $7q#9 !#&;.8&'L?:''-@BV,W+YY' #);A)>@STU@V(W'YY#a0	Zf )<=Q~,,]NK!79PRi j 'GXZmn   &')&+*
 	
r6   c                 x    | j                  |      }| j                  |      }t        j                  ||gd      S )Nr   r   )r  r  r1   r   )rW   r(   r)   cls_features_textcls_features_images        r7   r  z!BridgeTowerModel.get_cls_features  s<     88G!::>Jyy+-?@bIIr6   )NNNNNNNNNNNNF)r-   r.   r/   rJ   r  r  r   r1   r  r2   r   rc   r4   rk   r'   rj   r  rl   rm   s   @r7   r  r  y  sh   6p64  .2372615.22615+/)-,0#'*.).l
##d*l
 ))D0l
 ((4/	l

 ''$.l
 $$t+l
 ((4/l
 ''$.l
 "Djl
  $;l
 #Tkl
 D[l
   4'l
 #'l
  
u||	5	5!l
 l
\Jr6   r  c                   $     e Zd Z fdZd Z xZS )"BridgeTowerPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )rI   rJ   r   rR   rL   r   r   r   r   r
   transform_act_fnrN   rO   rV   s     r7   rJ   z+BridgeTowerPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr6   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rf   )r   r-  rN   r   s     r7   rj   z*BridgeTowerPredictionHeadTransform.forward  s4    

=1--m<}5r6   r   rm   s   @r7   r+  r+    s    Ur6   r+  c                   &     e Zd Zd fd	Zd Z xZS )r  c                 p   t         |           || _        t        |      | _        t        j                  |j                  |j                  j                  d      | _
        t        j                  t        j                  |j                  j                              | _        ||| j                  _        y y )NF)r   )rI   rJ   rX   r+  	transformr   rR   rL   r  r  decoderr   r1   r  r   r   )rW   rX   r   rY   s      r7   rJ   zBridgeTowerMLMHead.__init__  s    ;FCyy!3!3V5G5G5R5RY^_LLV-?-?-J-J!KL	"(DLL r6   c                 d    | j                  |      }| j                  |      | j                  z   }|S rf   )r1  r2  r   )rW   x	mlm_scores      r7   rj   zBridgeTowerMLMHead.forward  s-    NN1%	LL+dii7	r6   rf   r   rm   s   @r7   r  r    s    )r6   r  c                   $     e Zd Z fdZd Z xZS )BridgeTowerITMHeadc                 X    t         |           t        j                  |d      | _        y Nr   rI   rJ   r   rR   fc)rW   rL   rY   s     r7   rJ   zBridgeTowerITMHead.__init__  s     ))K+r6   c                 (    | j                  |      }|S rf   r;  )rW   r4  	itm_scores      r7   rj   zBridgeTowerITMHead.forward  s    GGAJ	r6   r   rm   s   @r7   r7  r7    s    ,r6   r7  z\
    BridgeTower Model with a language modeling head on top as done during pretraining.
    c                       e Zd ZddiZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  dedz  dedz  dej                  dz  deeej                     z  fd       Z xZS )BridgeTowerForMaskedLMzmlm_score.decoder.weightz8bridgetower.text_model.embeddings.word_embeddings.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y rf   )rI   rJ   r  r  r  r5  r  rV   s     r7   rJ   zBridgeTowerForMaskedLM.__init__  s5     +F3+F3 	r6   c                 .    | j                   j                  S rf   r5  r2  r  s    r7   get_output_embeddingsz,BridgeTowerForMaskedLM.get_output_embeddings  s    ~~%%%r6   c                 &    || j                   _        y rf   rC  )rW   new_embeddingss     r7   set_output_embeddingsz,BridgeTowerForMaskedLM.set_output_embeddings  s    !/r6   Nr  r[   r  r   r  r  r=   ry  rz  r  r  r   c                    |
|
n| j                   j                  }
| j                  |||||||||	|

      }| j                  |
r|j                  n|d         }d}|kt               }|j                  |j                        } ||j                  d| j                   j                  j                        |j                  d            }|
st        |      }||f|z   S |S t        |||j                  |j                        S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
        >>> from PIL import Image
        >>> import httpx
        >>> from io import BytesIO

        >>> url = "http://images.cocodataset.org/val2017/000000360943.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read())).convert("RGB")
        >>> text = "a <mask> looking out of the window"

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
        >>> model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")

        >>> # prepare inputs
        >>> encoding = processor(image, text, return_tensors="pt")

        >>> # forward pass
        >>> outputs = model(**encoding)

        >>> results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())

        >>> print(results)
        .a cat looking out of the window.
        ```N	r[   r  r   r  r  r=   ry  rz  r  r   r   r:   r;   r+   r,   )rX   r  r  r5  r(   r   rb   r_   r   r  r  r4   r   r+   r,   )rW   r  r[   r  r   r  r  r=   ry  rz  r  r  r  rr  
mlm_logitsmasked_lm_lossloss_fctrR  s                     r7   rj   zBridgeTowerForMaskedLM.forward  s   h &1%<k$++B]B]""))%!'%/!5# # 
 ^^[G$9$9gVWjY
')HYYz001F%joob$++:Q:Q:\:\&]_e_j_jkm_noN:&F3A3M^%.YSYY!//))	
 	
r6   NNNNNNNNNNN)r-   r.   r/   _tied_weights_keysrJ   rD  rG  r   r1   r  r2   rc   r   r4   rj   rl   rm   s   @r7   r@  r@    sO    56pq&0  .2372615.22615)-,0#'*.R
##d*R
 ))D0R
 ((4/	R

 ''$.R
 $$t+R
 ((4/R
 ''$.R
  $;R
 #TkR
 D[R
   4'R
 
% 1 12	2R
 R
r6   r@  z
    BridgeTower Model transformer with a classifier head on top (a linear layer on top of the final hidden state of the
    [CLS] token) for image-to-text matching.
    c                   z    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
edz  dedz  dedz  dej                  dz  de	e
ej                     z  fd       Z xZS )#BridgeTowerForImageAndTextRetrievalc                     t         |   |       t        |      | _        t	        |j
                  dz        | _        | j                          y r9  )rI   rJ   r  r  r7  rL   r>  r  rV   s     r7   rJ   z,BridgeTowerForImageAndTextRetrieval.__init__R  s@     +F3+F,>,>,BC 	r6   Nr  r[   r  r   r  r  r=   ry  rz  r  r  r   c                    |
|
n| j                   j                  }
| j                  |||||||||	|

      }|
r|j                  n|d   }| j	                  |      }d}|.t               }|j                  |j                        } |||      }|
st        |      }||f|z   S |S t        |||j                  |j                        S )a^  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        labels (`torch.LongTensor` of shape `(batch_size, 1)`, *optional*):
            Labels for computing the image-text matching loss. 0 means the pairs don't match and 1 means they match.
            The pairs with 0 will be skipped for calculation.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
        >>> import httpx
        >>> from io import BytesIO
        >>> from PIL import Image

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))
        >>> texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
        >>> model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")

        >>> # forward pass
        >>> scores = dict()
        >>> for text in texts:
        ...     # prepare inputs
        ...     encoding = processor(image, text, return_tensors="pt")
        ...     outputs = model(**encoding)
        ...     scores[text] = outputs.logits[0, 1].item()
        ```NrI  r   rJ  )rX   r  r  r*   r>  r   rb   r_   r4   r   r+   r,   )rW   r  r[   r  r   r  r  r=   ry  rz  r  r  r  rr  r*   r;   itm_lossrM  rR  s                      r7   rj   z+BridgeTowerForImageAndTextRetrieval.forward\  s    ` &1%<k$++B]B]""))%!'%/!5# # 
 2=--'!*.')HYYv}}-F/H6]F-5-AXK&(MvM'!//))	
 	
r6   rN  )r-   r.   r/   rJ   r   r1   r  r2   rc   r   r4   rj   rl   rm   s   @r7   rQ  rQ  K  s7     .2372615.22615)-,0#'*.R
##d*R
 ))D0R
 ((4/	R

 ''$.R
 $$t+R
 ((4/R
 ''$.R
  $;R
 #TkR
 D[R
   4'R
 
"E%*;*;$<	<R
 R
r6   rQ  c                   $     e Zd Z fdZd Z xZS )BridgeTowerContrastiveHeadc                 X    t         |           t        j                  ||      | _        y rf   r:  )rW   rL   
embed_sizerY   s      r7   rJ   z#BridgeTowerContrastiveHead.__init__  s     ))K4r6   c                 (    | j                  |      }|S rf   r=  )rW   r4  s     r7   rj   z"BridgeTowerContrastiveHead.forward  s    GGAJr6   r   rm   s   @r7   rV  rV    s    5r6   rV  zl
    BridgeTower Model with a image-text contrastive head on top computing image-text contrastive loss.
    c                   f    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
edz  dedz  dedz  dedz  de	e
ej                     z  fd       Z xZS )r  c                    t         |   |       t        |      | _        t	        |j
                  |j                        | _        t	        |j
                  |j                        | _        t	        |j
                  dz  |j                        | _	        t        j                  t        j                  | j                  j                              | _        | j#                          y r9  )rI   rJ   r  r  rV  rL   contrastive_hidden_sizeitc_text_headitc_image_headitc_cross_modal_headr   r   r1   r   rX   r  r  r  rV   s     r7   rJ   z*BridgeTowerForContrastiveLearning.__init__  s     +F378J8JFLjLjk89K9KVMkMkl$>v?Q?QTU?UW]WuWu$v!<<T[[5W5W(XYr6   Nr  r[   r  r   r  r  r=   ry  rz  r  return_lossr   c                 F   |
|
n| j                   j                  }
| j                  ||||||||d|

      }|
r|j                  n|d   }|
r|j                  n|d   \  }}}|d   }|d   }| j                  j
                  j                  j                  |      }| j                  j                  t        j                  ddt        j                  | j                  j                  j                  j                  	            j                  |      }| j                  j                  |      |z   }t         j"                  j%                  | j'                  |ddd
ddf         dd      }t         j"                  j%                  | j)                  |ddd
ddf         dd      j+                  |j                        }t         j"                  j%                  | j-                  |      dd      j+                  |j                        }t        j.                  |||gd      }| j0                  j3                         j+                  |j                        }t        j4                  ||j7                               |z  }t        j4                  ||j7                               |z  }t        j4                  ||j7                               |z  }d}|rt        j8                  t;        |      |j                        }t         j"                  j=                  ||      }t         j"                  j=                  ||      }t         j"                  j=                  ||      }||z   |z   dz  }|
s||||f|dd z   } ||f| z   S | S t?        ||||||j                  |j@                        S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        return_loss (`bool`, *optional*):
            Whether or not to return the contrastive loss.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
        >>> import httpx
        >>> from io import BytesIO
        >>> from PIL import Image
        >>> import torch

        >>> image_urls = [
        ...     "https://farm4.staticflickr.com/3395/3428278415_81c3e27f15_z.jpg",
        ...     "http://images.cocodataset.org/val2017/000000039769.jpg",
        ... ]
        >>> texts = ["two dogs in a car", "two cats sleeping on a couch"]

        >>> with httpx.stream("GET", urls[0]) as response:
        ...     image1 = Image.open(BytesIO(response.read()))

        >>> with httpx.stream("GET", urls[1]) as response:
        ...     image2 = Image.open(BytesIO(response.read()))

        >>> images = [image1, image2]

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
        >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")

        >>> inputs = processor(images, texts, padding=True, return_tensors="pt")
        >>> loss = model(**inputs, return_loss=True).loss

        >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt")
        >>> loss_swapped = model(**inputs, return_loss=True).loss

        >>> print("Loss", round(loss.item(), 4))
        Loss 0.0019

        >>> print("Loss with swapped images", round(loss_swapped.item(), 4))
        Loss with swapped images 2.126
        ```NTrI  r   r   r   r  r    r]   r   )r   r  r  r   g      @)r:   r;   r<   r=   r>   r+   r,   )!rX   r  r  r*   r+   r  r  r   r  r1   r  r  r   r_   r  r  r   r   	normalizer]  r^  rb   r_  r   r  expr  tr   r  cross_entropyr9   r,   )!rW   r  r[   r  r   r  r  r=   ry  rz  r  r`  r  rr  r*   hidden_states_txthidden_states_imghidden_states_cross_modalr<   r  r  r>   r;   r  logits_text_to_imagelogits_text_to_crosslogits_image_to_crossitc_lossr  text_to_image_losstext_to_cross_lossimage_to_cross_lossrR  s!                                    r7   rj   z)BridgeTowerForContrastiveLearning.forward  sw   z &1%<k$++B]B]""))%!'%/!%# # 
 2=--'!*%0G!!gaj 	H,.G (+(,#//<<CCPPQ]^&*&6&6&L&LJJtQejj9I9I9_9_9f9f9m9mn'

)(
) 	$ ''CCDXY\ww mm--d.@.@QPQSTWAU.V\^bc-d}}..t/B/B<PQSTVWPWCX/Y_aef.gjj%% k 
 }}..t/H/H/W]_cd.ehh%% i 
 k<FBO&&**,//{7I7I/J$||K9IJ[X$||K9IJ[X %\<>>;K L{ Z\\#f+fmmDF!#!<!<=QSY!Z!#!<!<=QSY!Z"$--"="=>SU["\*-??BUUY\\Hk<FQRQSTF-5-AXK&(MvM+#%%!//))
 	
r6   )NNNNNNNNTNN)r-   r.   r/   rJ   r   r1   r  r2   rc   r9   r4   rj   rl   rm   s   @r7   r  r    s0     .2372615.22615)-,0#'#'
##d*
 ))D0
 ((4/	

 ''$.
 $$t+
 ((4/
 ''$.
  $;
 #Tk
 D[
 D[
 
&e.?.?(@	@
 
r6   r  )r  rQ  r@  r  r  )Nr.  )Xr0   collectionsr   collections.abcr   dataclassesr   r1   r   torch.nnr    r	   r  activationsr
   r   cache_utilsr   r   r   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   utils.genericr   configuration_bridgetowerr!   r"   r#   
get_loggerr-   r  _TOKENIZER_FOR_DOCr'   r9   r  r@   ro   r   r   r   r   r   r  r  rk   floatr  r  r?  rN  rY  ro  ru  r  r  r  r  r  r+  r  r7  r@  rQ  rV  r  __all__r5   r6   r7   <module>r     s     # $ !   % & 6 C C J 9  G & 6 K K - h h 
		H	%'  
7[ 7 7$ 
7; 7 74)299 )XRYY 6P")) Pf7"299 7"td299 d4BII bii  		 		 , !%II%<<% 
% <<	%
 LL4'% T\% % '(%:F)ryy F)TJ)		 J)\.299 .>8		 8vF5 FT3
RYY 3
ng8		 g8T &% &% &%RY7 Y" ^65 ^6^6B 
qJ1 qJ
qJj	 "    
e
7 e

e
P ^
*D ^
^
B  
N
(B N

N
br6   