
    Z j                     d   S r SSKJr  SSKrSSKJr  SSKJrJrJr  SSK	J
r  SSKJr  SS	KJr  SS
KJrJrJrJrJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJ r J!r!J"r"  SSK#J$r$  SSK%J&r&  SSK'J(r(  \"RR                  " \*5      r+ " S S\RX                  5      r- " S S\RX                  5      r. " S S\RX                  5      r/ " S S\RX                  5      r0 " S S\RX                  5      r1 " S S\RX                  5      r2 " S S \RX                  5      r3 " S! S"\RX                  5      r4 " S# S$\RX                  5      r5 SCS%\RX                  S&\Rl                  S'\Rl                  S(\Rl                  S)\Rl                  S-  S*\7S+\74S, jjr8 " S- S.\RX                  5      r9 " S/ S0\RX                  5      r: " S1 S2\5      r; " S3 S4\RX                  5      r<\  " S5 S6\5      5       r=\  " S7 S8\=5      5       r>\  " S9 S:\=5      5       r?\ " S;S<9 " S= S>\=5      5       r@\ " S?S<9 " S@ SA\=5      5       rA/ SBQrBg)DzPyTorch MarkupLM model.    )CallableN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )MarkupLMConfigc                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )XPathEmbeddings-   zConstruct the embeddings from xpath tags and subscripts.

We drop tree-id in this version, as its info can be covered by xpath.
c           	        > [         TU ]  5         UR                  U l        [        R                  " UR
                  U R                  -  UR                  5      U l        [        R                  " UR                  5      U l
        [        R                  " 5       U l        [        R                  " UR
                  U R                  -  SUR                  -  5      U l        [        R                  " SUR                  -  UR                  5      U l        [        R                  " [!        U R                  5       Vs/ s H.  n[        R"                  " UR$                  UR
                  5      PM0     sn5      U l        [        R                  " [!        U R                  5       Vs/ s H.  n[        R"                  " UR(                  UR
                  5      PM0     sn5      U l        g s  snf s  snf )N   )super__init__	max_depthr   Linearxpath_unit_hidden_sizehidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrange	Embeddingmax_xpath_tag_unit_embeddingsxpath_tag_sub_embeddingsmax_xpath_subs_unit_embeddingsxpath_subs_sub_embeddings)selfconfig_	__class__s      /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/markuplm/modeling_markuplm.pyr$   XPathEmbeddings.__init__3   sd   )))+63P3PSWSaSa3acicucu)v&zz&"<"<='')$&IIf.K.Kdnn.\^_bhbtbt^t$u!1v'9'9#96;M;MN(* t~~..A VAA6C`C`a.)
% *, t~~..A VBBFDaDab.*
&s   75G!5G&c           	         / n/ n[        U R                  5       H_  nUR                  U R                  U   " US S 2S S 2U4   5      5        UR                  U R                  U   " US S 2S S 2U4   5      5        Ma     [
        R                  " USS9n[
        R                  " USS9nX4-   nU R                  U R                  U R                  U R                  U5      5      5      5      nU$ )Ndim)r2   r%   appendr5   r7   torchcatr0   r,   r.   r/   )r8   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r<   forwardXPathEmbeddings.forwardM   s     " "t~~&A!(()F)Fq)I.YZ\]_`Y`Ja)bc!(()G)G)J>Z[]^`aZaKb)cd ' !&		*?R H %		*?R H0H>>$,,ttG`G`aqGr7s*tu    )r.   r,   r0   r%   r7   r5   r)   r/   )NN)	__name__
__module____qualname____firstlineno____doc__r$   rK   __static_attributes____classcell__r;   s   @r<   r   r   -   s    

4   rM   r   c                   f   ^  \ rS rSrSrU 4S jr\S 5       r\SS j5       r      S	S jr	Sr
U =r$ )
MarkupLMEmbeddings_   zGConstruct the embeddings from word, position and token_type embeddings.c                 X  > [         TU ]  5         Xl        [        R                  " UR
                  UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        UR                  U l        [        U5      U l        [        R                  " UR                  UR                  5      U l        [        R                   " UR                  UR"                  S9U l        [        R$                  " UR&                  5      U l        U R+                  S[,        R.                  " UR                  5      R1                  S5      SS9  UR                  U l        [        R                  " UR                  UR                  U R2                  S9U l
        g )N)padding_idxepsposition_idsr   r?   F)
persistent)r#   r$   r9   r   r3   
vocab_sizer(   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr%   r   rJ   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr*   r+   r,   register_bufferrC   arangeexpandrZ   r8   r9   r;   s     r<   r$   MarkupLMEmbeddings.__init__b   s8   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c )) / 7%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 "..#%<<**F,>,>DL\L\$
 rM   c                     U R                  5       SS nUS   n[        R                  " US-   X1-   S-   [        R                  U R                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr?   r   dtypedevicer   )sizerC   rj   longrq   	unsqueezerk   )inputs_embedsrZ   input_shapesequence_lengthr]   s        r<   &create_position_ids_from_inputs_embeds9MarkupLMEmbeddings.create_position_ids_from_inputs_embedsz   sn     $((*3B/%a.||!O_:Q>ejjYfYmYm
 %%a(//<<rM   c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-   U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: torch.Tensor x:

Returns: torch.Tensor
r   r@   )neintrC   cumsumtype_asrs   )	input_idsrZ   past_key_values_lengthmaskincremental_indicess        r<   "create_position_ids_from_input_ids5MarkupLMEmbeddings.create_position_ids_from_input_ids   sW     ||K(,,.$||Da8@@FI__cgg"'')K77rM   c                    Ub  UR                  5       nOUR                  5       S S nUb  UR                  OUR                  nUc:  Ub  U R                  XR                  5      nOU R	                  X`R                  5      nUc$  [
        R                  " U[
        R                  US9nUc  U R                  U5      nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  US9-  nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  US9-  nUn	U R!                  U5      n
U R#                  U5      nU R%                  X#5      nX-   U-   U-   nU R'                  U5      nU R)                  U5      nU$ )Nr?   ro   )rr   rq   r   rZ   rx   rC   zerosrs   rb   r9   
tag_pad_idonestuplelistr%   subs_pad_idrd   rf   rJ   rg   r,   )r8   r   rE   rF   token_type_idsr]   ru   rv   rq   words_embeddingsrd   rf   rJ   
embeddingss                 r<   rK   MarkupLMEmbeddings.forward   s     #..*K',,.s3K%.%:!!@T@T$#FFyRbRbc#JJ=ZjZjk!"[[EJJvVN  00;M !![[33ejjd;'4>>*::;5::V\7 N !![[44uzzd;'4>>*::;5::V\8 N )"66|D $ : :> J00P%;>SSVff
^^J/
\\*-
rM   )	rg   r9   r,   r%   rZ   rd   rf   rb   rJ   )r   )NNNNNN)rN   rO   rP   rQ   rR   r$   staticmethodrx   r   rK   rS   rT   rU   s   @r<   rW   rW   _   sQ    Q
0 = =" 8 8" 1 1rM   rW   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr[   )r#   r$   r   r&   r(   denserg   rh   r*   r+   r,   rl   s     r<   r$   MarkupLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rM   hidden_statesinput_tensorreturnc                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   r,   rg   r8   r   r   s      r<   rK   MarkupLMSelfOutput.forward   5    

=1]3}'CDrM   rg   r   r,   
rN   rO   rP   rQ   r$   rC   TensorrK   rS   rT   rU   s   @r<   r   r      6    >U\\  RWR^R^  rM   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r#   r$   r   r&   r(   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnrl   s     r<   r$   MarkupLMIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rM   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r8   r   s     r<   rK   MarkupLMIntermediate.forward   s&    

=100?rM   r   r   rU   s   @r<   r   r      s(    9U\\ ell  rM   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r#   r$   r   r&   r   r(   r   rg   rh   r*   r+   r,   rl   s     r<   r$   MarkupLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rM   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r<   rK   MarkupLMOutput.forward   r   rM   r   r   rU   s   @r<   r   r      r   rM   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r#   r$   r   r&   r(   r   Tanhr.   rl   s     r<   r$   MarkupLMPooler.__init__  s9    YYv1163E3EF
'')rM   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r.   )r8   r   first_token_tensorpooled_outputs       r<   rK   MarkupLMPooler.forward  s6     +1a40

#566rM   )r.   r   r   rU   s   @r<   r   r     s(    $
U\\ ell  rM   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r#   r$   r   r&   r(   r   r   r   r   r
   transform_act_fnrg   rh   rl   s     r<   r$   (MarkupLMPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrM   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   rg   r   s     r<   rK   'MarkupLMPredictionHeadTransform.forward  s4    

=1--m<}5rM   )rg   r   r   r   rU   s   @r<   r   r     s)    UU\\ ell  rM   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )MarkupLMLMPredictionHeadi#  c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        g )NT)bias)r#   r$   r   	transformr   r&   r(   r`   decoder	ParameterrC   r   r   rl   s     r<   r$   !MarkupLMLMPredictionHead.__init__$  s[    8@ yy!3!3V5F5FTRLLV->->!?@	rM   c                 J    U R                  U5      nU R                  U5      nU$ r   )r   r   r   s     r<   rK    MarkupLMLMPredictionHead.forward-  s$    }5]3rM   )r   r   r   )rN   rO   rP   rQ   r$   rK   rS   rT   rU   s   @r<   r   r   #  s    A rM   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMOnlyMLMHeadi4  c                 B   > [         TU ]  5         [        U5      U l        g r   )r#   r$   r   predictionsrl   s     r<   r$   MarkupLMOnlyMLMHead.__init__5  s    3F;rM   sequence_outputr   c                 (    U R                  U5      nU$ r   r   )r8   r   prediction_scoress      r<   rK   MarkupLMOnlyMLMHead.forward9  s     ,,_=  rM   r   r   rU   s   @r<   r   r   4  s(    <!u|| ! ! !rM   r   modulequerykeyvalueattention_maskscalingr,   c                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )N   r   r?   )rA   rp   )ptrainingr   )rC   matmul	transposer   
functionalsoftmaxfloat32torp   r,   r   
contiguous)
r   r   r   r   r   r   r,   kwargsattn_weightsattn_outputs
             r<   eager_attention_forwardr   ?  s     <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$rM   c                      ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
\R                  \R                  S-  4   4S jjrS	rU =r$ )MarkupLMSelfAttentioniV  c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r#   r$   r(   num_attention_headshasattr
ValueErrorr9   r|   attention_head_sizeall_head_sizer   r&   r   r   r   r*   attention_probs_dropout_probr,   attention_dropoutr   rl   s     r<   r$   MarkupLMSelfAttention.__init__W  sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rM   Nr   r   r   r   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
X4$ )Nr?   r   r           )r,   r   )shaper   r   viewr   r   r   r   get_interfacer9   _attn_implementationr   r   r   r   reshaper   )r8   r   r   r   rv   hidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   s               r<   rK   MarkupLMSelfAttention.forwardl  s8    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFH((rM   )
r   r   r   r9   r,   r   r   r   r   r   r   )rN   rO   rP   rQ   r$   rC   r   FloatTensorr   r   r   rK   rS   rT   rU   s   @r<   r   r   V  si    60 48)||) ))D0) +,	)
 
u||U\\D00	1) )rM   r   c            	          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	rU =r$ )MarkupLMAttentioni  c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r#   r$   r   r8   r   outputrl   s     r<   r$   MarkupLMAttention.__init__  s&    )&1	(0rM   Nr   r   r   r   c                 Z    UnU R                   " U4SU0UD6u  pU R                  X5      nU$ Nr   )r8   r  )r8   r   r   r   residualr:   s         r<   rK   MarkupLMAttention.forward  sE     !99
)
 

 M<rM   )r  r8   r   )rN   rO   rP   rQ   r$   rC   r   r  r   r   rK   rS   rT   rU   s   @r<   r  r    sV    1 48|| ))D0 +,	
 
 rM   r  c            	          ^  \ rS rSrU 4S jr SS\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	 rS
rU =r$ )MarkupLMLayeri  c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r#   r$   chunk_size_feed_forwardseq_len_dimr  	attentionr   intermediater   r  rl   s     r<   r$   MarkupLMLayer.__init__  sI    '-'E'E$*6208$V,rM   Nr   r   r   r   c                     U R                   " U4SU0UD6n[        U R                  U R                  U R                  U5      nU$ r  )r  r   feed_forward_chunkr  r  )r8   r   r   r   s       r<   rK   MarkupLMLayer.forward  sW     
)
 
 2##T%A%A4CSCSUb
 rM   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r  r  )r8   attention_outputintermediate_outputlayer_outputs       r<   r    MarkupLMLayer.feed_forward_chunk  s)    "//0@A{{#6IrM   )r  r  r  r  r  r   )rN   rO   rP   rQ   r$   rC   r   r  r   r   rK   r   rS   rT   rU   s   @r<   r  r    s[    - 48|| ))D0 +,	
 
$ rM   r  c            	       |   ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
4S jjrS	rU =r$ )MarkupLMEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r#   r$   r9   r   r1   r2   num_hidden_layersr  layergradient_checkpointing)r8   r9   rI   r;   s      r<   r$   MarkupLMEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&Nr   r   r   r   c                 N    U R                    H  nU" UU40 UD6nM     [        US9$ )N)last_hidden_state)r+  r   )r8   r   r   r   layer_modules        r<   rK   MarkupLMEncoder.forward  s>     !JJL( M ' +
 	
rM   )r9   r,  r+  r   )rN   rO   rP   rQ   r$   rC   r   r  r   r   r   rK   rS   rT   rU   s   @r<   r(  r(    sR    , 48
||
 ))D0
 +,	

 

 
rM   r(  c                   j   ^  \ rS rSr% \\S'   Sr\\S.r	\
R                  " 5       U 4S j5       rSrU =r$ )MarkupLMPreTrainedModeli  r9   markuplm)r   
attentionsc                 r  > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a\  [        R                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        gg)zInitialize the weightsr?   r^   N)r#   _init_weightsr   r   initzeros_r   rW   copy_r]   rC   rj   r  rk   )r8   r   r;   s     r<   r7  %MarkupLMPreTrainedModel._init_weights  s~     	f%f677KK$ 233JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 4rM    )rN   rO   rP   rQ   r   __annotations__base_model_prefixr  r   _can_record_outputsrC   no_gradr7  rS   rT   rU   s   @r<   r3  r3    s9    "&+
 ]]_i irM   r3  c                   X  ^  \ rS rSrSU 4S jjrS rS r\\\	       SS\
R                  S-  S\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       5       rSrU =r$ )MarkupLMModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
r#   r$   r9   rW   r   r(  encoderr   pooler	post_init)r8   r9   add_pooling_layerr;   s      r<   r$   MarkupLMModel.__init__  sK    
 	 ,V4&v.0AnV,t 	rM   c                 .    U R                   R                  $ r   r   rb   )r8   s    r<   get_input_embeddings"MarkupLMModel.get_input_embeddings  s    ...rM   c                 $    XR                   l        g r   rJ  )r8   r   s     r<   set_input_embeddings"MarkupLMModel.set_input_embeddings  s    */'rM   Nr   rE   rF   r   r   r]   ru   r   r   c           	         Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       n	O"Ub  UR                  5       SS n	O[        S5      eUb  UR                  OUR                  n
Uc  [        R
                  " XS9nUc$  [        R                  " U	[        R                  U
S9nUR                  S5      R                  S5      nUR                  U R                  S	9nS
U-
  S-  nU R                  UUUUUUS9nU R                  " UU40 UD6nUS   nU R                  b  U R                  U5      OSn[        UUS9$ )ao  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMModel

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

>>> encoding = processor(html_string, return_tensors="pt")

>>> outputs = model(**encoding)
>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 4, 768]
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer?   z5You have to specify either input_ids or inputs_embeds)rq   ro   r   r   )rp   g      ?g     )r   rE   rF   r]   r   ru   r   )r/  pooler_output)r   %warn_if_padding_and_no_attention_maskrr   rq   rC   r   r   rs   rt   r   rp   r   rD  rE  r   )r8   r   rE   rF   r   r   r]   ru   r   rv   rq   extended_attention_maskembedding_outputencoder_outputsr   r   s                   r<   rK   MarkupLMModel.forward  so   J  ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@H"L??))%)' + 
 ,,#
 

 *!,8<8OO4UY)-'
 	
rM   )r9   r   rD  rE  )T)NNNNNNN)rN   rO   rP   rQ   r$   rK  rN  r   r   r   rC   
LongTensorr  r   r   r   r   rK   rS   rT   rU   s   @r<   rB  rB    s	    /0   .2262637260426K
##d*K
 ((4/K
 ((4/	K

 ))D0K
 ((4/K
 &&-K
 ((4/K
 +,K
 
+	+K
    K
rM   rB  c                     ^  \ rS rSrU 4S jr\\         SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )MarkupLMForQuestionAnsweringi\  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g NF)rG  )
r#   r$   
num_labelsrB  r4  r   r&   r(   
qa_outputsrF  rl   s     r<   r$   %MarkupLMForQuestionAnswering.__init___  sU      ++%fF))F$6$68I8IJ 	rM   Nr   rE   rF   r   r   r]   ru   start_positionsend_positionsr   r   c
           
         U R                   " U4UUUUUUS.U
D6nUS   nU R                  U5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nSnUb  U	b  [        UR                  5       5      S:  a  UR                  S5      n[        U	R                  5       5      S:  a  U	R                  S5      n	UR                  S5      nUR                  SU5        U	R                  SU5        [        US9nU" X5      nU" X5      nUU-   S-  n[        UUUUR                  UR                  S	9$ )
a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
>>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

>>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
>>> question = "What's his name?"

>>> encoding = processor(html_string, questions=question, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()

>>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
>>> processor.decode(predict_answer_tokens).strip()
'Niels'
```rE   rF   r   r   r]   ru   r   r   r?   r@   N)ignore_indexr   )lossstart_logits
end_logitsr   r5  )r4  r]  splitsqueezer   lenrr   clamp_r   r   r   r5  )r8   r   rE   rF   r   r   r]   ru   r_  r`  r   outputsr   logitsre  rf  
total_lossignored_indexloss_fct
start_lossend_losss                        r<   rK   $MarkupLMForQuestionAnswering.forwardi  sx   Z --	
))))%'	
 	
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
:H$x/14J+%!!//))
 	
rM   )r4  r\  r]  )	NNNNNNNNN)rN   rO   rP   rQ   r$   r   r   rC   r   r   r   r   r   rK   rS   rT   rU   s   @r<   rY  rY  \  s      *..2.2.2.2,0-1/3-1T
<<$&T
 t+T
 t+	T

 t+T
 t+T
 llT)T
 ||d*T
 ,T
 ||d*T
 +,T
 
u||	;	;T
  T
rM   rY  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                   x  ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )MarkupLMForTokenClassificationi  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r[  )r#   r$   r\  rB  r4  classifier_dropoutr+   r   r*   r,   r&   r(   
classifierrF  r8   r9   rw  r;   s      r<   r$   'MarkupLMForTokenClassification.__init__  s      ++%fF)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rM   Nr   rE   rF   r   r   r]   ru   labelsr   r   c	           
      8   U R                   " U4UUUUUUS.U	D6n
U
S   nU R                  U5      nSnUbF  [        5       nU" UR                  SU R                  R
                  5      UR                  S5      5      n[        UUU
R                  U
R                  S9$ )a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForTokenClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> processor.parse_html = False
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> nodes = ["hello", "world"]
>>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
>>> node_labels = [1, 2]
>>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```rb  r   Nr?   rd  rl  r   r5  )	r4  rx  r   r  r9   r\  r   r   r5  )r8   r   rE   rF   r   r   r]   ru   r{  r   rk  r   r   rd  ro  s                  r<   rK   &MarkupLMForTokenClassification.forward  s    V --	
))))%'	
 	
 "!* OOO<')H!&&r4;;+A+ABBD
 %$!//))	
 	
rM   )rx  r,   r4  r\  NNNNNNNN)rN   rO   rP   rQ   r$   r   r   rC   r   r   r   r   r   rK   rS   rT   rU   s   @r<   ru  ru    s     *..2.2.2.2,0-1&*D
<<$&D
 t+D
 t+	D

 t+D
 t+D
 llT)D
 ||d*D
 t#D
 +,D
 
u||	~	-D
  D
rM   ru  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   x  ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )!MarkupLMForSequenceClassificationi   c                 r  > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l
        [        R                  " UR                  UR                  5      U l        U R                  5         g r   )r#   r$   r\  r9   rB  r4  rw  r+   r   r*   r,   r&   r(   rx  rF  ry  s      r<   r$   *MarkupLMForSequenceClassification.__init__(  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rM   Nr   rE   rF   r   r   r]   ru   r{  r   r   c	           
         U R                   " U4UUUUUUS.U	D6n
U
S   nU R                  U5      nU R                  U5      nSnUGb  U R                  R                  c  U R
                  S:X  a  SU R                  l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                  R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                  R                  S:X  a  [        5       nU" X5      n[        UUU
R                   U
R"                  S9$ )	a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForSequenceClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
>>> encoding = processor(html_string, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```rb  r   N
regressionsingle_label_classificationmulti_label_classificationr?   r}  )r4  r,   rx  r9   problem_typer\  rp   rC   rs   r|   r   rh  r   r  r   r   r   r5  )r8   r   rE   rF   r   r   r]   ru   r{  r   rk  r   rl  rd  ro  s                  r<   rK   )MarkupLMForSequenceClassification.forward7  s   T --	
))))%'	
 	
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rM   )rx  r9   r,   r4  r\  r  )rN   rO   rP   rQ   r$   r   r   rC   r   r   r   r   r   rK   rS   rT   rU   s   @r<   r  r     s	     *..2.2.2.2,0-1&*T
<<$&T
 t+T
 t+	T

 t+T
 t+T
 llT)T
 ||d*T
 t#T
 +,T
 
u||	7	7T
  T
rM   r  )rY  r  ru  rB  r3  )r  )CrR   collections.abcr   rC   r   torch.nnr   r   r    r	   r8  activationsr
   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   configuration_markuplmr   
get_loggerrN   loggerModuler   rW   r   r   r   r   r   r   r   r   floatr   r   r  r  r(  r3  rB  rY  ru  r  __all__r<  rM   r<   <module>r     sJ    $   A A & ! 9  G & 6 R R 7 5 2 
		H	%/ bii / dp ph 299  RYY RYY  bii $ryy "!")) !$ %II%<<% 
% <<	%
 LL4'% % %.3)BII 3)n		 .. D
bii 
2 io i i$ f
+ f
 f
R b
#: b
 b
J 
V
%< V

V
r g
(? g
g
TrM   