
    Z jԟ                     r   S r SSKJr  SSKrSSKJr  SSKJrJrJr  SSK	J
r  SSKJr  SS	KJr  SS
KJrJrJrJrJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJ r J!r!J"r"  SSK#J$r$  SSK%J&r&  SSK'J(r(  \"RR                  " \*5      r+\RX                  r- " S S\R\                  5      r/ SCS\R\                  S\R`                  S\R`                  S\R`                  S\R`                  S-  S\1S\14S jjr2 " S S\R\                  5      r3 " S S \R\                  5      r4 " S! S"\R\                  5      r5 " S# S$\R\                  5      r6 " S% S&\R\                  5      r7 " S' S(\5      r8 " S) S*\R\                  5      r9 " S+ S,\R\                  5      r: " S- S.\R\                  5      r; " S/ S0\R\                  5      r< " S1 S2\R\                  5      r=\  " S3 S4\5      5       r>\  " S5 S6\>5      5       r?\  " S7 S8\>5      5       r@\ " S9S:9 " S; S<\>5      5       rA\ " S=S:9 " S> S?\>5      5       rB\  " S@ SA\>5      5       rC/ SBQrDg)DzPyTorch LayoutLM model.    )CallableN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )LayoutLMConfigc                   @   ^  \ rS rSrSrU 4S jr     SS jrSrU =r$ )LayoutLMEmbeddings0   zGConstruct the embeddings from word, position and token_type embeddings.c                   > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [#        UR
                  UR$                  S9U l        [        R(                  " UR*                  5      U l        U R/                  S[0        R2                  " UR                  5      R5                  S5      SS9  g )N)padding_idxepsposition_idsr   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr*   LayoutLMEmbeddings.__init__3   s[   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2H2H&J\J\%]"*6+=+=6CXCXYzz&"<"<=ELL)G)GHOOPWXej 	 	
    c                    Ub  UR                  5       nOUR                  5       S S nUS   nUb  UR                  OUR                  nUc  U R                  S S 2S U24   nUc$  [        R                  " U[        R
                  US9nUc  U R                  U5      nUn	U R                  U5      n
 U R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  U5      nU	U
-   U-   U-   U-   U-   U-   U-   U-   nU R                  U5      nU R                  U5      nU$ ! [         a  n[        S5      UeS nAff = f)Nr'   r   dtypedevicer      r   z:The `bbox`coordinate values should be within 0-1000 range.)sizerM   r%   r@   zeroslongr/   r1   r3   r4   
IndexErrorr5   r6   r8   r;   r>   )rD   	input_idsbboxtoken_type_idsr%   inputs_embedsinput_shape
seq_lengthrM   words_embeddingsr1   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser5   r6   r8   
embeddingss                       rG   forwardLayoutLMEmbeddings.forwardD   s1     #..*K',,.s3K ^
%.%:!!@T@T,,Q^<L!"[[EJJvVN  00;M("66|D	b'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y $ : :> J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J/
\\*-
)  	bYZ`aa	bs   /A,F: :
GGG)	r;   r>   r5   r1   r8   r6   r/   r3   r4   )NNNNN)	__name__
__module____qualname____firstlineno____doc__r*   r`   __static_attributes____classcell__rF   s   @rG   r   r   0   s&    Q
& 5 5rI   r   modulequerykeyvalueattention_maskscalingr>   c                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )NrN   r   r'   )dimrL   )ptrainingr   )r@   matmul	transposer   
functionalsoftmaxfloat32torL   r>   rs   
contiguous)
rj   rk   rl   rm   rn   ro   r>   kwargsattn_weightsattn_outputs
             rG   eager_attention_forwardr~   }   s     <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$rI   c                      ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
\R                  \R                  S-  4   4S jjrS	rU =r$ )LayoutLMSelfAttention   c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r)   r*   r-   num_attention_headshasattr
ValueErrorrE   intattention_head_sizeall_head_sizer   Linearrk   rl   rm   r<   attention_probs_dropout_probr>   attention_dropoutro   rC   s     rG   r*   LayoutLMSelfAttention.__init__   sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rI   Nhidden_statesrn   r{   returnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
X4$ )Nr'   r   rN           )r>   ro   )shaper   rk   viewru   rl   rm   r   get_interfacerE   _attn_implementationr~   rs   r   ro   reshaperz   )rD   r   rn   r{   rW   hidden_shapequery_states
key_statesvalue_statesattention_interfacer}   r|   s               rG   r`   LayoutLMSelfAttention.forward   s8    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFH((rI   )
r   r   r   rE   r>   rl   r   rk   ro   rm   N)rb   rc   rd   re   r*   r@   TensorFloatTensorr   r   tupler`   rg   rh   ri   s   @rG   r   r      si    60 48)||) ))D0) +,	)
 
u||U\\D00	1) )rI   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr#   )r)   r*   r   r   r-   denser;   r:   r<   r=   r>   rC   s     rG   r*   LayoutLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rI   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r>   r;   rD   r   r   s      rG   r`   LayoutLMSelfOutput.forward   5    

=1]3}'CDrI   r;   r   r>   
rb   rc   rd   re   r*   r@   r   r`   rg   rh   ri   s   @rG   r   r      6    >U\\  RWR^R^  rI   r   c            	          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	rU =r$ )LayoutLMAttention   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r)   r*   r   rD   r   outputrC   s     rG   r*   LayoutLMAttention.__init__   s&    )&1	(0rI   Nr   rn   r{   r   c                 Z    UnU R                   " U4SU0UD6u  pU R                  X5      nU$ Nrn   )rD   r   )rD   r   rn   r{   residual_s         rG   r`   LayoutLMAttention.forward   sE     !99
)
 

 M<rI   )r   rD   r   )rb   rc   rd   re   r*   r@   r   r   r   r   r`   rg   rh   ri   s   @rG   r   r      sV    1 48|| ))D0 +,	
 
 rI   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r)   r*   r   r   r-   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnrC   s     rG   r*   LayoutLMIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rI   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   rD   r   s     rG   r`   LayoutLMIntermediate.forward   s&    

=100?rI   r   r   ri   s   @rG   r   r      s(    9U\\ ell  rI   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r)   r*   r   r   r   r-   r   r;   r:   r<   r=   r>   rC   s     rG   r*   LayoutLMOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rI   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      rG   r`   LayoutLMOutput.forward  r   rI   r   r   ri   s   @rG   r   r     r   rI   r   c            	          ^  \ rS rSrU 4S jr SS\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	 rS
rU =r$ )LayoutLMLayeri  c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r)   r*   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   rC   s     rG   r*   LayoutLMLayer.__init__  sI    '-'E'E$*6208$V,rI   Nr   rn   r{   r   c                     U R                   " U4SU0UD6n[        U R                  U R                  U R                  U5      nU$ r   )r   r   feed_forward_chunkr   r   )rD   r   rn   r{   s       rG   r`   LayoutLMLayer.forward  sW     
)
 
 2##T%A%A4CSCSUb
 rI   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )rD   attention_outputintermediate_outputlayer_outputs       rG   r    LayoutLMLayer.feed_forward_chunk+  s)    "//0@A{{#6IrI   )r   r   r   r   r   r   )rb   rc   rd   re   r*   r@   r   r   r   r   r`   r   rg   rh   ri   s   @rG   r   r     s[    - 48|| ))D0 +,	
 
$ rI   r   c            	       |   ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
4S jjrS	rU =r$ )LayoutLMEncoderi2  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r)   r*   rE   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)rD   rE   irF   s      rG   r*   LayoutLMEncoder.__init__3  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&Nr   rn   r{   r   c                 N    U R                    H  nU" UU40 UD6nM     [        US9$ )N)last_hidden_state)r   r   )rD   r   rn   r{   layer_modules        rG   r`   LayoutLMEncoder.forward9  s>     !JJL( M ' +
 	
rI   )rE   r   r   r   )rb   rc   rd   re   r*   r@   r   r   r   r   r   r`   rg   rh   ri   s   @rG   r   r   2  sR    , 48
||
 ))D0
 +,	

 

 
rI   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPooleriL  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r)   r*   r   r   r-   r   Tanh
activationrC   s     rG   r*   LayoutLMPooler.__init__M  s9    YYv1163E3EF
'')rI   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r   )rD   r   first_token_tensorpooled_outputs       rG   r`   LayoutLMPooler.forwardR  s6     +1a40

#566rI   )r   r   r   ri   s   @rG   r   r   L  s(    $
U\\ ell  rI   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPredictionHeadTransformi\  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r)   r*   r   r   r-   r   r   r   r   r
   transform_act_fnr;   r:   rC   s     rG   r*   (LayoutLMPredictionHeadTransform.__init__]  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrI   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r;   r   s     rG   r`   'LayoutLMPredictionHeadTransform.forwardf  s4    

=1--m<}5rI   )r;   r   r   r   ri   s   @rG   r   r   \  s)    UU\\ ell  rI   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )LayoutLMLMPredictionHeadin  c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        g )NT)bias)r)   r*   r   	transformr   r   r-   r,   decoder	Parameterr@   rP   r  rC   s     rG   r*   !LayoutLMLMPredictionHead.__init__o  s[    8@ yy!3!3V5F5FTRLLV->->!?@	rI   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r   s     rG   r`    LayoutLMLMPredictionHead.forwardx  s$    }5]3rI   )r  r  r  )rb   rc   rd   re   r*   r`   rg   rh   ri   s   @rG   r  r  n  s    A rI   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMOnlyMLMHeadi  c                 B   > [         TU ]  5         [        U5      U l        g r   )r)   r*   r  predictionsrC   s     rG   r*   LayoutLMOnlyMLMHead.__init__  s    3F;rI   sequence_outputr   c                 (    U R                  U5      nU$ r   r  )rD   r  prediction_scoress      rG   r`   LayoutLMOnlyMLMHead.forward  s     ,,_=  rI   r  r   ri   s   @rG   r  r    s(    <!u|| ! ! !rI   r  c                   n   ^  \ rS rSr% \\S'   SrSr\\	S.r
\R                  " 5       U 4S j5       rSrU =r$ )LayoutLMPreTrainedModeli  rE   layoutlmT)r   
attentionsc                 r  > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a\  [        R                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        gg)zInitialize the weightsr'   r&   N)r)   _init_weightsr   r  initzeros_r  r   copy_r%   r@   rA   r   rB   )rD   rj   rF   s     rG   r  %LayoutLMPreTrainedModel._init_weights  s~     	f%f677KK$ 233JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 4rI    )rb   rc   rd   re   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   r   _can_record_outputsr@   no_gradr  rg   rh   ri   s   @rG   r  r    s?    "&*#&+
 ]]_i irI   r  c                   4  ^  \ rS rSrU 4S jrS rS r\\\	      SS\
R                  S-  S\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       5       rSrU =r$ )LayoutLMModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r   )
r)   r*   rE   r   r_   r   encoderr   pooler	post_initrC   s     rG   r*   LayoutLMModel.__init__  sE     ,V4&v.$V, 	rI   c                 .    U R                   R                  $ r   r_   r/   rD   s    rG   get_input_embeddings"LayoutLMModel.get_input_embeddings  s    ...rI   c                 $    XR                   l        g r   r,  )rD   rm   s     rG   set_input_embeddings"LayoutLMModel.set_input_embeddings  s    */'rI   NrS   rT   rn   rU   r%   rV   r{   r   c                    Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[        S5      eUb  UR                  OUR                  n	Uc  [        R
                  " XS9nUc$  [        R                  " U[        R                  U	S9nUc'  [        R                  " US-   [        R                  U	S9nUR                  S5      R                  S	5      n
U
R                  U R                  S
9n
SU
-
  [        R                  " U R                  5      R                  -  n
U R                  UUUUUS9nU R                  " UU
40 UD6nUS   nU R                  U5      n[!        UUS9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMModel
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> outputs = model(
...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
... )

>>> last_hidden_states = outputs.last_hidden_state
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer'   z5You have to specify either input_ids or inputs_embeds)rM   rK   )   r   rN   )rL   g      ?)rS   rT   r%   rU   rV   r   )r   pooler_output)r   %warn_if_padding_and_no_attention_maskrO   rM   r@   onesrP   rQ   	unsqueezery   rL   finfominr_   r'  r(  r   )rD   rS   rT   rn   rU   r%   rV   r{   rW   rM   extended_attention_maskembedding_outputencoder_outputsr  r   s                  rG   r`   LayoutLMModel.forward  s   f  ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN<;;{T1FSD"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@EKKPTPZPZD[D_D_"_??%)' + 
 ,,#
 

 *!,O4)-'
 	
rI   )rE   r_   r'  r(  )NNNNNN)rb   rc   rd   re   r*   r.  r1  r   r   r   r@   
LongTensorr   r   r   r   r   r`   rg   rh   ri   s   @rG   r%  r%    s    	/0   .2(,37260426[
##d*[
 %[
 ))D0	[

 ((4/[
 &&-[
 ((4/[
 +,[
 
+	+[
    [
rI   r%  c                   Z  ^  \ rS rSrSSS.rU 4S jrS rS rS r\	\
       SS
\R                  S	-  S\R                  S	-  S\R                  S	-  S\R                  S	-  S\R                  S	-  S\R                  S	-  S\R                  S	-  S\\   S\\-  4S jj5       5       rSrU =r$ )LayoutLMForMaskedLMi  zcls.predictions.biasz*layoutlm.embeddings.word_embeddings.weight)zcls.predictions.decoder.biaszcls.predictions.decoder.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r)   r*   r%  r  r  clsr)  rC   s     rG   r*   LayoutLMForMaskedLM.__init__  s4     %f-&v. 	rI   c                 B    U R                   R                  R                  $ r   r  r_   r/   r-  s    rG   r.  (LayoutLMForMaskedLM.get_input_embeddings!      }}''777rI   c                 B    U R                   R                  R                  $ r   )rC  r  r  r-  s    rG   get_output_embeddings)LayoutLMForMaskedLM.get_output_embeddings$  s    xx##+++rI   c                     XR                   R                  l        UR                  U R                   R                  l        g r   )rC  r  r  r  )rD   new_embeddingss     rG   set_output_embeddings)LayoutLMForMaskedLM.set_output_embeddings'  s*    '5$$2$7$7!rI   NrS   rT   rn   rU   r%   rV   labelsr{   r   c                 6   U R                   " UU4UUUUS.UD6n	U	S   n
U R                  U
5      nSnUbF  [        5       nU" UR                  SU R                  R
                  5      UR                  S5      5      n[        UUU	R                  U	R                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "[MASK]"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=labels,
... )

>>> loss = outputs.loss
```)rn   rU   r%   rV   r   Nr'   losslogitsr   r  )	r  rC  r   r   rE   r,   r   r   r  )rD   rS   rT   rn   rU   r%   rV   rP  r{   outputsr  r  masked_lm_lossloss_fcts                 rG   r`   LayoutLMForMaskedLM.forward+  s    z --
 *)%'
 
 "!* HH_5')H%!&&r4;;+A+ABBN
 $!//))	
 	
rI   )rC  r  NNNNNNN)rb   rc   rd   re   _tied_weights_keysr*   r.  rJ  rN  r   r   r@   r?  r   r   r   r   r   r`   rg   rh   ri   s   @rG   rA  rA    s    )?*V
8,8  .2(,37260426*.U
##d*U
 %U
 ))D0	U

 ((4/U
 &&-U
 ((4/U
   4'U
 +,U
 
	U
  U
rI   rA  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                   D  ^  \ rS rSrU 4S jrS r\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )!LayoutLMForSequenceClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r)   r*   
num_labelsr%  r  r   r<   r=   r>   r   r-   
classifierr)  rC   s     rG   r*   *LayoutLMForSequenceClassification.__init__  i      ++%f-zz&"<"<=))F$6$68I8IJ 	rI   c                 B    U R                   R                  R                  $ r   rF  r-  s    rG   r.  6LayoutLMForSequenceClassification.get_input_embeddings  rH  rI   NrS   rT   rn   rU   r%   rV   rP  r{   r   c           
         U R                   " S	UUUUUUS.UD6n	U	S   n
U R                  U
5      n
U R                  U
5      nSnUGb  U R                  R                  c  U R
                  S:X  a  SU R                  l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                  R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                  R                  S:X  a  [        5       nU" X5      n[        UUU	R                   U	R"                  S9$ )
a
  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> sequence_label = torch.tensor([1])

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=sequence_label,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```rS   rT   rn   rU   r%   rV   r   N
regressionsingle_label_classificationmulti_label_classificationr'   rR  r  )r  r>   ra  rE   problem_typer`  rL   r@   rQ   r   r   squeezer   r   r   r   r   r  )rD   rS   rT   rn   rU   r%   rV   rP  r{   rU  r   rT  rS  rW  s                 rG   r`   )LayoutLMForSequenceClassification.forward  s   z -- 
))%'
 
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rI   ra  r>   r  r`  rY  )rb   rc   rd   re   r*   r.  r   r   r@   r?  r   r   r   r   r   r`   rg   rh   ri   s   @rG   r]  r]    s    8  .2(,37260426*.f
##d*f
 %f
 ))D0	f

 ((4/f
 &&-f
 ((4/f
   4'f
 +,f
 
)	)f
  f
rI   r]  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                   D  ^  \ rS rSrU 4S jrS r\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )LayoutLMForTokenClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r_  rC   s     rG   r*   'LayoutLMForTokenClassification.__init__  rc  rI   c                 B    U R                   R                  R                  $ r   rF  r-  s    rG   r.  3LayoutLMForTokenClassification.get_input_embeddings  rH  rI   NrS   rT   rn   rU   r%   rV   rP  r{   r   c           
      D   U R                   " SUUUUUUS.UD6n	U	S   n
U R                  U
5      n
U R                  U
5      nSnUb<  [        5       nU" UR	                  SU R
                  5      UR	                  S5      5      n[        UUU	R                  U	R                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=token_labels,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```rg  r   Nr'   rR  r  )	r  r>   ra  r   r   r`  r   r   r  )rD   rS   rT   rn   rU   r%   rV   rP  r{   rU  r  rT  rS  rW  s                 rG   r`   &LayoutLMForTokenClassification.forward  s    v -- 
))%'
 
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
rI   rn  rY  )rb   rc   rd   re   r*   r.  r   r   r@   r?  r   r   r   r   r   r`   rg   rh   ri   s   @rG   rp  rp    s    8  .2(,37260426*.R
##d*R
 %R
 ))D0	R

 ((4/R
 &&-R
 ((4/R
   4'R
 +,R
 
&	&R
  R
rI   rp  c                   h  ^  \ rS rSrSU 4S jjrS r\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )LayoutLMForQuestionAnsweringip  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g)z}
has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
    Whether or not to add visual segment embeddings.
N)
r)   r*   r`  r%  r  r   r   r-   
qa_outputsr)  )rD   rE   has_visual_segment_embeddingrF   s      rG   r*   %LayoutLMForQuestionAnswering.__init__r  sU    
 	  ++%f-))F$6$68I8IJ 	rI   c                 B    U R                   R                  R                  $ r   rF  r-  s    rG   r.  1LayoutLMForQuestionAnswering.get_input_embeddings  rH  rI   NrS   rT   rn   rU   r%   rV   start_positionsend_positionsr{   r   c	           
         U R                   " S
UUUUUUS.U	D6n
U
S   nU R                  U5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nSnUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  n[        UUUU
R                  U
R                  S	9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Example:

In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
of what it thinks the answer is (the span of the answer within the texts parsed from the image).

```python
>>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
>>> from datasets import load_dataset
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

>>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0]
>>> question = "what's his name?"
>>> words = example["words"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(
...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
... )
>>> bbox = []
>>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
...     if s == 1:
...         bbox.append(boxes[w])
...     elif i == tokenizer.sep_token_id:
...         bbox.append([1000] * 4)
...     else:
...         bbox.append([0] * 4)
>>> encoding["bbox"] = torch.tensor([bbox])

>>> word_ids = encoding.word_ids(0)
>>> outputs = model(**encoding)
>>> loss = outputs.loss
>>> start_scores = outputs.start_logits
>>> end_scores = outputs.end_logits
>>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
>>> print(" ".join(words[start : end + 1]))
M. Hamann P. Harper, P. Martinez
```rg  r   r   r'   )rq   N)ignore_indexrN   )rS  start_logits
end_logitsr   r  r  )r  rz  splitrl  rz   lenrO   clampr   r   r   r  )rD   rS   rT   rn   rU   r%   rV   r  r  r{   rU  r  rT  r  r  
total_lossignored_indexrW  
start_lossend_losss                       rG   r`   $LayoutLMForQuestionAnswering.forward  sy   ~ -- 
))%'
 
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
rI   )r  r`  rz  )T)NNNNNNNN)rb   rc   rd   re   r*   r.  r   r   r@   r?  r   r   r   r   r   r`   rg   rh   ri   s   @rG   rx  rx  p  s   8  .2(,372604263715e
##d*e
 %e
 ))D0	e

 ((4/e
 &&-e
 ((4/e
 ))D0e
 ''$.e
 +,e
 
-	-e
  e
rI   rx  )rA  r]  rp  rx  r%  r  )r   )Erf   collections.abcr   r@   r   torch.nnr   r   r    r	   r  activationsr
   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   configuration_layoutlmr   
get_loggerrb   loggerr;   r9   Moduler   r   floatr~   r   r   r   r   r   r   r   r   r   r  r  r  r%  rA  r]  rp  rx  __all__r  rI   rG   <module>r     se    $   A A & ! 9  G & 6 R R 7 5 2 
		H	% LL I Ih %II%<<% 
% <<	%
 LL4'% % %.3)BII 3)n 		 .299  RYY . D
bii 
4RYY  bii $ryy "!")) ! io i i& p
+ p
 p
f p
1 p
 p
f v
(? v
v
r b
%< b
b
J y
#: y
 y
xrI   