
    qiC                     @   d Z ddlZddlZddlmZ ddlmZmZmZ ddlm	Z
 ddlmZ ddlmZ dd	lmZmZmZmZmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ  ej<                  e      Z  G d dejB                        Z" G d dejB                        Z# G d dejB                        Z$ G d dejB                        Z% G d dejB                        Z& G d dejB                        Z' G d dejB                        Z( G d de      Z) G d d ejB                        Z* G d! d"ejB                        Z+e G d# d$e             Z,e G d% d&e,             Z- ed'(       G d) d*e,             Z.e G d+ d,e,             Z/ G d- d.ejB                        Z0e G d/ d0e,             Z1g d1Z2y)2zPyTorch LiLT model.    N)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)auto_docstringlogging   )
LiltConfigc                   :     e Zd Z fdZ	 	 	 	 ddZd Zd Z xZS )LiltTextEmbeddingsc                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        | j#                  dt%        j&                  |j                        j)                  d      d       |j                  | _        t        j                  |j                  |j
                  | j*                        | _	        y )Npadding_idxepsposition_idsr   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandr   selfconfig	__class__s     X/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/lilt/modeling_lilt.pyr"   zLiltTextEmbeddings.__init__*   s   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	

 "..#%<<**F,>,>DL\L\$
     c                 &   |I|6| j                  || j                        j                  |j                        }n| j	                  |      }||j                         }n|j                         d d }|:t        j                  |t        j                  | j                  j                        }|| j                  |      }| j                  |      }||z   }| j                  |      }||z  }| j                  |      }| j                  |      }||fS )Nr   dtypedevice)"create_position_ids_from_input_idsr   tor>   &create_position_ids_from_inputs_embedssizer2   zeroslongr   r'   r+   r)   r,   r0   )	r6   	input_idstoken_type_idsr   inputs_embedsinput_shaper+   
embeddingsr)   s	            r9   forwardzLiltTextEmbeddings.forward>   s    $#FFyRVRbRbcff$$   $JJ=Y #..*K',,.s3K!"[[EJJtO`O`OgOghN  00;M $ : :> J"%::
"66|D))
^^J/
\\*-
<''r:   c                     |j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )a  
        Args:
        Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding
        symbols are ignored. This is modified from fairseq's `utils.make_positions`.
            x: torch.Tensor x:
        Returns: torch.Tensor
        r   dim)neintr2   cumsumtype_asrD   )r6   rE   r   maskincremental_indicess        r9   r?   z5LiltTextEmbeddings.create_position_ids_from_input_idsb   sP     ||K(,,.$||Da8@@F$N"'')K77r:   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        Args:
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.:
            inputs_embeds: torch.Tensor
        Returns: torch.Tensor
        Nr   r   r<   r   )rB   r2   r3   r   rD   r>   	unsqueezer4   )r6   rG   rH   sequence_lengthr   s        r9   rA   z9LiltTextEmbeddings.create_position_ids_from_inputs_embedso   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r:   )NNNN)__name__
__module____qualname__r"   rJ   r?   rA   __classcell__r8   s   @r9   r   r   )   s&    
, "(H8=r:   r   c                   &     e Zd Z fdZddZ xZS )LiltLayoutEmbeddingsc                    t         |           t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _	        |j                  | _        t        j                  |j                  |j
                  |j                  z  | j                        | _        t        j                  |j
                  |j
                  |j                  z        | _        t        j"                  |j
                  |j                  z  |j$                        | _        t        j&                  |j(                        | _        y )N   r   )in_featuresout_featuresr   )r!   r"   r   r#   max_2d_position_embeddingsr%   x_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingsr&   r   r(   channel_shrink_ratiobox_position_embeddingsLinearbox_linear_embeddingsr,   r-   r.   r/   r0   r5   s     r9   r"   zLiltLayoutEmbeddings.__init__   s^    &(\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"!..')||**&"="==(((
$
 &(YY**9K9KvOjOj9j&
" f&8&8F<W<W&W]c]r]rszz&"<"<=r:   c                    	 | j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df   |d d d d df   z
        }| j	                  |d d d d df   |d d d d df   z
        }	t        j                  ||||||	gd      }
| j                  |
      }
| j                  |      }|
|z   }
| j                  |
      }
| j                  |
      }
|
S # t        $ r}t        d      |d }~ww xY w)Nr   r      r   z;The `bbox` coordinate values should be within 0-1000 range.r   rL   )rc   rd   
IndexErrorre   rf   r2   catrj   rh   r,   r0   )r6   bboxr   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingsere   rf   spatial_position_embeddingsrh   s               r9   rJ   zLiltLayoutEmbeddings.forward   sw   	c'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y&+ii()))%% 
'
# '+&@&@A\&]#"&">">|"L&AD[&[#&*nn5P&Q#&*ll3N&O#**3  	cZ[abb	cs   A,D& &	E /D;;E )NN)rW   rX   rY   r"   rJ   rZ   r[   s   @r9   r]   r]      s    >*+r:   r]   c                   4     e Zd Zd fd	ZddZ	 	 ddZ xZS )LiltSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j$                  |j&                        | _        |j                  | _        || _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r!   r"   r%   num_attention_headshasattr
ValueErrorrO   attention_head_sizeall_head_sizer   ri   querykeyvaluerg   layout_query
layout_keylayout_valuer.   attention_probs_dropout_probr0   	layer_idx)r6   r7   r   r8   s      r9   r"   zLiltSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
II&"="==t?Q?QU[UpUp?p
 ))&"="==t?Q?QU[UpUp?p
 II&"="==t?Q?QU[UpUp?p
 zz&"E"EF$*$?$?!"r:   c                     |j                         d d | j                  | j                  |z  fz   } |j                  | }|j	                  dddd      S )Nr   r   rl   r   r   )rB   r{   r~   viewpermute)r6   xrnew_x_shapes       r9   transpose_for_scoresz&LiltSelfAttention.transpose_for_scores   sT    ffhsmt'?'?AYAY]^A^&__AFFK yyAq!$$r:   c                    | j                  | j                  |      | j                        }| j                  | j                  |      | j                        }| j                  | j	                  |      | j                        }| j                  |      }| j                  | j                  |            }	| j                  | j                  |            }
| j                  |      }t        j                  ||	j                  dd            }t        j                  ||j                  dd            }|t        j                  | j                        z  }|t        j                  | j                  | j                  z        z  }||z   }||z   }|||z   } t        j                  d      |      }| j!                  |      }t        j                  ||      }|j#                  dddd      j%                         }|j'                         d d | j(                  | j                  z  fz   } |j*                  | }|||z   } t        j                  d      |      }| j!                  |      }t        j                  ||
      }|j#                  dddd      j%                         }|j'                         d d | j(                  fz   } |j*                  | }||f}|r||fz   }|S )	N)r   r   rL   r   rl   r   r   )r   r   rg   r   r   r   r   r   r2   matmul	transposemathsqrtr~   r   Softmaxr0   r   
contiguousrB   r   r   )r6   hidden_stateslayout_inputsattention_maskoutput_attentionslayout_value_layerlayout_key_layerlayout_query_layermixed_query_layer	key_layervalue_layerquery_layerattention_scoreslayout_attention_scorestmp_attention_scorestmp_layout_attention_scoreslayout_attention_probslayout_context_layernew_context_layer_shapeattention_probscontext_layeroutputss                         r9   rJ   zLiltSelfAttention.forward   s    "66t7H7H7W[_[t[t6u44T__]5SW[WpWp4q!66t7H7H7W[_[t[t6u JJ}5--dhh}.EF	//

=0IJ//0AB <<Y5H5HR5PQ"',,/ACSC]C]^`bdCe"f/$))D<T<T2UU&=		$$(A(AAA
 '
# 02MM"=@T"T%&=&N# "4!34K!L "&.D!E$||,BDVW3;;Aq!QGRRT"6";";"=cr"BdFXFX\`\u\uFuEw"w8388:QR%/.@ -"**,-=> ,,7_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CD "67 22Gr:   N)r   NF)rW   rX   rY   r"   r   rJ   rZ   r[   s   @r9   rw   rw      s    #>% Ar:   rw   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LiltSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr   )r!   r"   r   ri   r%   denser,   r-   r.   r/   r0   r5   s     r9   r"   zLiltSelfOutput.__init__"  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r:   r   input_tensorreturnc                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r0   r,   r6   r   r   s      r9   rJ   zLiltSelfOutput.forward(  7    

=1]3}|'CDr:   rW   rX   rY   r"   r2   TensorrJ   rZ   r[   s   @r9   r   r   !  1    >U\\  RWR^R^ r:   r   c                        e Zd Zd	 fd	Z	 	 d
dej
                  dej
                  dej                  dz  dedz  deej
                     f
dZ	 xZ
S )LiltAttentionNc                     t         |           t        ||      | _        t	        |      | _        |j                  }|j                  |j                  z  |_        t	        |      | _        ||_        y )Nr   )	r!   r"   rw   r6   r   outputr%   rg   layout_output)r6   r7   r   ori_hidden_sizer8   s       r9   r"   zLiltAttention.__init__0  sa    %f	B	$V, ,,#//63N3NN+F3,r:   r   r   r   r   r   c                     | j                  ||||      }| j                  |d   |      }| j                  |d   |      }||f|dd  z   }|S )Nr   r   rl   )r6   r   r   )	r6   r   r   r   r   self_outputsattention_outputlayout_attention_outputr   s	            r9   rJ   zLiltAttention.forward:  sh     yy	
  ;;|AF"&"4"4\!_m"T#%<=QR@PPr:   r   r   )rW   rX   rY   r"   r2   r   FloatTensorbooltuplerJ   rZ   r[   s   @r9   r   r   /  se    - 48).|| || ))D0	
  $; 
u||	r:   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LiltIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r!   r"   r   ri   r%   intermediate_sizer   
isinstance
hidden_actstrr	   intermediate_act_fnr5   s     r9   r"   zLiltIntermediate.__init__O  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r:   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   )r6   r   s     r9   rJ   zLiltIntermediate.forwardW  s&    

=100?r:   r   r[   s   @r9   r   r   N  s#    9U\\ ell r:   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
LiltOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r!   r"   r   ri   r   r%   r   r,   r-   r.   r/   r0   r5   s     r9   r"   zLiltOutput.__init___  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r:   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r9   rJ   zLiltOutput.forwarde  r   r:   r   r[   s   @r9   r   r   ^  r   r:   r   c                        e Zd Zd fd	Z	 	 ddej
                  dej
                  dej                  dz  dedz  deej
                     f
dZ	d	 Z
d
 Z xZS )	LiltLayerNc                    t         |           |j                  | _        d| _        t	        ||      | _        t        |      | _        t        |      | _	        |j                  }|j                  }|j                  |j                  z  |_
        |j                  |j                  z  |_        t        |      | _        t        |      | _        ||_
        ||_        y )Nr   r   )r!   r"   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r%   r   rg   layout_intermediater   )r6   r7   r   r   ori_intermediate_sizer8   s        r9   r"   zLiltLayer.__init__m  s    '-'E'E$&vC,V4 ( ,, & 8 8#//63N3NN#)#;#;v?Z?Z#Z #3F#; '/,#8 r:   r   r   r   r   r   c                    | j                  ||||      }|d   }|d   }|dd  }t        | j                  | j                  | j                  |      }	t        | j
                  | j                  | j                  |      }
|	|
f|z   }|S )N)r   r   r   rl   )r   r   feed_forward_chunkr   r   layout_feed_forward_chunk)r6   r   r   r   r   self_attention_outputsr   r   r   layer_outputlayout_layer_outputs              r9   rJ   zLiltLayer.forward~  s     "&/	 "0 "
 2!4"8";(,0##T%A%A4CSCSUe
 8**D,H,H$JZJZ\s
  !45?r:   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   r6   r   intermediate_outputr   s       r9   r   zLiltLayer.feed_forward_chunk  s,    "//0@A{{#68HIr:   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   r   s       r9   r   z#LiltLayer.layout_feed_forward_chunk  s.    "667GH))*=?OPr:   r   r   )rW   rX   rY   r"   r2   r   r   r   r   rJ   r   r   rZ   r[   s   @r9   r   r   l  so    9* 48).|| || ))D0	
  $; 
u||	:
r:   r   c                        e Zd Z fdZ	 	 	 	 ddej
                  dej
                  dej                  dz  dedz  dedz  dedz  d	eej
                     e	z  fd
Z
 xZS )LiltEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        y c c}w r   )	r!   r"   r7   r   
ModuleListrangenum_hidden_layersr   layer)r6   r7   _r8   s      r9   r"   zLiltEncoder.__init__  sC    ]]uVE]E]?^#_!If$5#_`
#_s   ANr   r   r   r   output_hidden_statesreturn_dictr   c                     |rdnd }|rdnd }t        | j                        D ].  \  }	}
|r||fz   } |
||||      }|d   }|d   }|s&||d   fz   }0 |r||fz   }|st        d |||fD              S t        |||      S )N r   r   rl   c              3   $   K   | ]  }|| 
 y wr   r   ).0vs     r9   	<genexpr>z&LiltEncoder.forward.<locals>.<genexpr>  s      
 = s   )last_hidden_stater   
attentions)	enumerater   r   r   )r6   r   r   r   r   r   r   all_hidden_statesall_self_attentionsilayer_modulelayer_outputss               r9   rJ   zLiltEncoder.forward  s     #7BD$5b4(4 	POA|#$58H$H!(!	M *!,M)!,M &9]1=M<O&O#	P"   1]4D D  "%'   ++*
 	
r:   )NFFT)rW   rX   rY   r"   r2   r   r   r   r   r   rJ   rZ   r[   s   @r9   r   r     s    a 48).,1#'.
||.
 ||.
 ))D0	.

  $;.
 #Tk.
 D[.
 
u||		..
r:   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
LiltPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r!   r"   r   ri   r%   r   Tanh
activationr5   s     r9   r"   zLiltPooler.__init__  s9    YYv1163E3EF
'')r:   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S Nr   )r   r  )r6   r   first_token_tensorpooled_outputs       r9   rJ   zLiltPooler.forward  s6     +1a40

#566r:   r   r[   s   @r9   r  r    s#    $
U\\ ell r:   r  c                   6     e Zd ZU eed<   dZdZg Z fdZ xZ	S )LiltPreTrainedModelr7   liltTc                     t         |   |       t        |t              rZt	        j
                  |j                  t        j                  |j                  j                  d         j                  d             y y )Nr   r   )r!   _init_weightsr   r   initcopy_r   r2   r3   shaper4   )r6   moduler8   s     r9   r  z!LiltPreTrainedModel._init_weights  s[    f%f01JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 2r:   )
rW   rX   rY   r   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modulesr  rZ   r[   s   @r9   r  r    s'    &*#i ir:   r  c                   H    e Zd Zd fd	Zd Zd Ze	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  de	dz  de	dz  de	dz  de
ej                     ez  fd       Z xZS )	LiltModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        |rt        |      nd| _
        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r!   r"   r7   r   rI   r]   layout_embeddingsr   encoderr  pooler	post_init)r6   r7   add_pooling_layerr8   s      r9   r"   zLiltModel.__init__  sZ    
 	 ,V4!5f!="6*,=j(4 	r:   c                 .    | j                   j                  S r   rI   r'   )r6   s    r9   get_input_embeddingszLiltModel.get_input_embeddings  s    ...r:   c                 &    || j                   _        y r   r!  )r6   r   s     r9   set_input_embeddingszLiltModel.set_input_embeddings  s    */'r:   NrE   ro   r   rF   r   rG   r   r   r   r   c
                 D   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      |\  }}||j                  n|j                  }|)t        j                  |dz   t        j                  |      }|t        j                  ||f|      }|pt        | j                  d      r4| j                  j                  ddd|f   }|j                  ||      }|}n&t        j                  |t        j                  |      }| j!                  ||      }| j                  ||||	      \  }}| j#                  ||
      }| j%                  ||||||	      }|d   }| j&                  | j'                  |      nd}|	s
||f|dd z   S t)        |||j*                  |j,                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModel
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)   r<   )r>   rF   )rE   r   rF   rG   )ro   r   )r   r   r   r   r   r   )r   pooler_outputr   r   )r7   r   r   use_return_dictr}   %warn_if_padding_and_no_attention_maskrB   r>   r2   rC   rD   onesr|   rI   rF   r4   get_extended_attention_maskr  r  r  r   r   r   )r6   rE   ro   r   rF   r   rG   r   r   r   kwargsrH   
batch_size
seq_lengthr>   buffered_token_type_ids buffered_token_type_ids_expandedextended_attention_maskembedding_outputlayout_embedding_outputencoder_outputssequence_outputr
  s                          r9   rJ   zLiltModel.forward  sR   P 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T<;;{T1FSD!"ZZ*j)A6RN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_al0m)-%)'	 *9 *
&, #'"8"8dQ]"8"^,,#2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r:   )T)	NNNNNNNNN)rW   rX   rY   r"   r"  r$  r   r2   r   r   r   r   rJ   rZ   r[   s   @r9   r  r    s   "/0  *.$(.2.2,0-1)-,0#'h
<<$&h
 llT!h
 t+	h

 t+h
 llT)h
 ||d*h
  $;h
 #Tkh
 D[h
 
u||	9	9h
 h
r:   r  z
    LiLT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   Z    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
e	dz  de	dz  de	dz  de
ej                     ez  fd       Z xZS )LiltForSequenceClassificationc                     t         |   |       |j                  | _        || _        t	        |d      | _        t        |      | _        | j                          y NF)r  )	r!   r"   
num_labelsr7   r  r  LiltClassificationHead
classifierr  r5   s     r9   r"   z&LiltForSequenceClassification.__init__  sJ      ++f>	08 	r:   NrE   ro   r   rF   r   rG   labelsr   r   r   r   c                 T   |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}||j	                  |j
                        }| j                   j                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j                  dk(  rt!               } |||      }|
s|f|d	d z   }||f|z   S |S t#        |||j$                  |j&                  
      S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> predicted_class_idx = outputs.logits.argmax(-1).item()
        >>> predicted_class = model.config.id2label[predicted_class_idx]
        ```Nro   r   rF   r   rG   r   r   r   r   r   
regressionsingle_label_classificationmulti_label_classificationr   rl   losslogitsr   r   )r7   r(  r  r=  r@   r>   problem_typer;  r=   r2   rD   rO   r   squeezer   r   r   r   r   r   r6   rE   ro   r   rF   r   rG   r>  r   r   r   r,  r   r5  rF  rE  loss_fctr   s                     r9   rJ   z%LiltForSequenceClassification.forward  s   \ &1%<k$++B]B]))))%'/!5#  

 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r:   
NNNNNNNNNN)rW   rX   rY   r"   r   r2   
LongTensorr   r   r   r   r   rJ   rZ   r[   s   @r9   r8  r8    s   	  .2$(37260426*.)-,0#'_
##d*_
 llT!_
 ))D0	_

 ((4/_
 &&-_
 ((4/_
   4'_
  $;_
 #Tk_
 D[_
 
u||	7	7_
 _
r:   r8  c                   Z    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
edz  dedz  dedz  de	ej                     ez  fd       Z xZS )LiltForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y r:  )r!   r"   r;  r  r  classifier_dropoutr/   r   r.   r0   ri   r%   r=  r  r6   r7   rP  r8   s      r9   r"   z#LiltForTokenClassification.__init__  s      ++f>	)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r:   NrE   ro   r   rF   r   rG   r>  r   r   r   r   c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|W|j                  |j                        }t               } ||j                  d| j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForTokenClassification
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> predicted_class_indices = outputs.logits.argmax(-1)
        ```Nr@  r   r   rl   rD  )r7   r(  r  r0   r=  r@   r>   r   r   r;  r   r   r   rI  s                     r9   rJ   z"LiltForTokenClassification.forward  s	   V &1%<k$++B]B]))))%'/!5#  

 "!*,,71YYv}}-F')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r:   rK  )rW   rX   rY   r"   r   r2   rL  r   r   r   r   r   rJ   rZ   r[   s   @r9   rN  rN    s     .2(,37260426*.)-,0#'M
##d*M
 %M
 ))D0	M

 ((4/M
 &&-M
 ((4/M
   4'M
  $;M
 #TkM
 D[M
 
u||	4	4M
 M
r:   rN  c                   (     e Zd ZdZ fdZd Z xZS )r<  z-Head for sentence-level classification tasks.c                 Z   t         |           t        j                  |j                  |j                        | _        |j                  |j                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        y r   )r!   r"   r   ri   r%   r   rP  r/   r.   r0   r;  out_projrQ  s      r9   r"   zLiltClassificationHead.__init__[  s    YYv1163E3EF
)/)B)B)NF%%TZTnTn 	 zz"45		&"4"4f6G6GHr:   c                     |d d dd d f   }| j                  |      }| j                  |      }t        j                  |      }| j                  |      }| j	                  |      }|S r  )r0   r   r2   tanhrU  )r6   featuresr,  r   s       r9   rJ   zLiltClassificationHead.forwardd  sY    Q1WLLOJJqMJJqMLLOMM!r:   )rW   rX   rY   __doc__r"   rJ   rZ   r[   s   @r9   r<  r<  X  s    7Ir:   r<  c                   z    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dedz  dedz  dedz  de	ej                     ez  fd       Z xZS )LiltForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r:  )
r!   r"   r;  r  r  r   ri   r%   
qa_outputsr  r5   s     r9   r"   z!LiltForQuestionAnswering.__init__q  sU      ++f>	))F$6$68I8IJ 	r:   NrE   ro   r   rF   r   rG   start_positionsend_positionsr   r   r   r   c                 (   ||n| j                   j                  }| j                  |||||||	|
|	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                  	      S )
a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForQuestionAnswering
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> predicted_answer = tokenizer.decode(predict_answer_tokens)
        ```Nr@  r   r   r   rL   )ignore_indexrl   )rE  start_logits
end_logitsr   r   )r7   r(  r  r]  splitrH  r   lenrB   clampr   r   r   r   )r6   rE   ro   r   rF   r   rG   r^  r_  r   r   r   r,  r   r5  rF  rb  rc  
total_lossignored_indexrJ  
start_lossend_lossr   s                           r9   rJ   z LiltForQuestionAnswering.forward{  s   ^ &1%<k$++B]B]))))%'/!5#  

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r:   )NNNNNNNNNNN)rW   rX   rY   r"   r   r2   rL  r   r   r   r   r   rJ   rZ   r[   s   @r9   r[  r[  n  s6     .2(,372604263715)-,0#'^
##d*^
 %^
 ))D0	^

 ((4/^
 &&-^
 ((4/^
 ))D0^
 ''$.^
  $;^
 #Tk^
 D[^
 
u||	;	;^
 ^
r:   r[  )r[  r8  rN  r  r  )3rY  r   r2   r   torch.nnr   r   r    r   r  activationsr	   modeling_layersr
   modeling_outputsr   r   r   r   r   modeling_utilsr   pytorch_utilsr   utilsr   r   configuration_liltr   
get_loggerrW   loggerModuler   r]   rw   r   r   r   r   r   r   r  r  r  r8  rN  r<  r[  __all__r   r:   r9   <module>rx     s       A A & ! 9  . 6 , * 
		H	%S= S=l5+299 5+pf		 fTRYY BII >ryy   7* 7t4
")) 4
p  	i/ 	i 	i A
# A
 A
H m
$7 m
m
` ^
!4 ^
 ^
DRYY , k
2 k
 k
\r:   