
    Z j                        S SK Jr  S SKJr  S SKrS SKJr  S SKJrJrJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJrJrJrJrJr  SSKJrJr  SSKJ r   SSK!J"r"J#r#J$r$J%r%  SSK&J'r'J(r(  SSK)J*r*  SSK+J,r,  \%RZ                  " \.5      r/ " S S\R`                  5      r1\Rd                  \1S.r3 " S S\R`                  5      r4  S_S\R`                  S\Rj                  S\Rj                  S\Rj                  S\Rj                  S-  S\6S-  S\6S\ \#   4S  jjr7 " S! S"\R`                  5      r8 " S# S$\R`                  5      r9 " S% S&\R`                  5      r: " S' S(\R`                  5      r; " S) S*\R`                  5      r< " S+ S,\R`                  5      r= " S- S.\R`                  5      r> " S/ S0\R`                  5      r? " S1 S2\R`                  5      r@ " S3 S4\R`                  5      rA " S5 S6\5      rB " S7 S8\R`                  5      rC " S9 S:\R`                  5      rD " S; S<\R`                  5      rE " S= S>\R`                  5      rF " S? S@\R`                  5      rG " SA SB\R`                  5      rH\$ " SC SD\5      5       rI\$" SESF9\ " SG SH\"5      5       5       rJ\$ " SI SJ\I5      5       rK\$" SKSF9 " SL SM\I5      5       rL\$ " SN SO\I5      5       rM " SP SQ\R`                  5      rN\$" SRSF9 " SS ST\I5      5       rO\$" SUSF9 " SV SW\I5      5       rP\$ " SX SY\I5      5       rQ\$ " SZ S[\I5      5       rR\$ " S\ S]\I5      5       rS/ S^QrTg)`    )Callable)	dataclassN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)create_bidirectional_mask)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )MobileBertConfigc                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )NoNorm7   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        g N)	super__init__r   	Parametertorchzerosbiasonesweight)self	feat_sizeeps	__class__s      ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mobilebert/modeling_mobilebert.pyr(   NoNorm.__init__8   s@    LLY!78	ll5::i#89    input_tensorreturnc                 8    XR                   -  U R                  -   $ r&   )r.   r,   )r/   r6   s     r3   forwardNoNorm.forward=   s    kk)DII55r5   )r,   r.   r&   
__name__
__module____qualname____firstlineno__r(   r*   Tensorr9   __static_attributes____classcell__r2   s   @r3   r#   r#   7   s(    :
6ELL 6U\\ 6 6r5   r#   )
layer_normno_normc                      ^  \ rS rSrSrU 4S jr    SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  4
S
 jjr
SrU =r$ )MobileBertEmbeddingsD   zGConstruct the embeddings from word, position and token_type embeddings.c                 b  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        [
        R                  " UR                  UR                  UR                  S9U l	        [
        R                  " UR                  UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  (       a  SOSnU R                  U-  n[
        R                  " X1R                  5      U l        [         UR"                     " UR                  5      U l        [
        R&                  " UR(                  5      U l        U R-                  S[.        R0                  " UR                  5      R3                  S5      SS9  g )N)padding_idxr	   r    position_idsr    F)
persistent)r'   r(   trigram_inputembedding_sizehidden_sizer   	Embedding
vocab_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsLinearembedding_transformationNORM2FNnormalization_type	LayerNormDropouthidden_dropout_probdropoutregister_bufferr*   arangeexpand)r/   configembed_dim_multiplierembedded_input_sizer2   s       r3   r(   MobileBertEmbeddings.__init__G   sD   #11$33!--!||F,=,=v?T?Tbhbubuv#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"$($6$6qA"114HH(*		2EGYGY(Z% !:!:;F<N<NOzz&"<"<= 	ELL)G)GHOOPWXej 	 	
r5   N	input_idstoken_type_idsrK   inputs_embedsr7   c           
      ,   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  (       ah  [        R                  " [        R                  R                  US S 2SS 24   / SQSS9U[        R                  R                  US S 2S S24   / SQSS9/SS	9nU R                  (       d  U R                  U R                  :w  a  U R                  U5      nU R                  U5      nU R!                  U5      nXG-   U-   n	U R#                  U	5      n	U R%                  U	5      n	U	$ )
NrM   r    )dtypedevice)r   r   r   r    r   r           )value)r   r   r    r   r   r      dim)sizerK   r*   r+   longrn   rU   rO   catr   
functionalpadrP   rQ   r[   rW   rY   r^   ra   )
r/   ri   rj   rK   rk   input_shape
seq_lengthrW   rY   
embeddingss
             r3   r9   MobileBertEmbeddings.forward]   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M "IIMM%%mAqrE&:<NVY%Z!MM%%mAssF&;=OWZ%[
 M !4!48H8H!H 99-HM #66|D $ : :> J"8;PP
^^J/
\\*-
r5   )	r^   ra   rP   r[   rQ   rW   rY   rO   rU   )NNNN)r<   r=   r>   r?   __doc__r(   r*   
LongTensorFloatTensorr@   r9   rA   rB   rC   s   @r3   rG   rG   D   s    Q
0 .22604260##d*0 ((4/0 &&-	0
 ((4/0 
0 0r5   rG   modulequerykeyrp   attention_maskscalingra   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )NrM         rq   r	   rr   )ptrainingr    )
rt   r*   matmul	transposer   rw   softmaxra   r   
contiguous)
r   r   r   rp   r   r   ra   r   attn_weightsattn_outputs
             r3   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$r5   c                      ^  \ rS rSrU 4S jr SS\R                  S\R                  S\R                  S\R                  S-  S\\	   S	\
\R                     4S
 jjrSrU =r$ )MobileBertSelfAttention   c                   > [         TU ]  5         Xl        UR                  U l        [	        UR
                  UR                  -  5      U l        U R                  U R                  -  U l        U R                  S-  U l        [        R                  " UR
                  U R                  5      U l        [        R                  " UR
                  U R                  5      U l        [        R                  " UR                  (       a  UR
                  OUR                  U R                  5      U l        [        R                   " UR"                  5      U l        SU l        g )Nr   F)r'   r(   re   num_attention_headsinttrue_hidden_sizeattention_head_sizeall_head_sizer   r   rZ   r   r   use_bottleneck_attentionrQ   rp   r_   attention_probs_dropout_probra   	is_causalr/   re   r2   s     r3   r(    MobileBertSelfAttention.__init__   s    #)#=#= #&v'>'>A[A['[#\ !558P8PP//5YYv668J8JK
99V44d6H6HIYY'-'F'FF##FL^L^`d`r`r

 zz&"E"EFr5   Nquery_tensor
key_tensorvalue_tensorr   r   r7   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  " U6 R	                  SS5      nU R                  U5      R                  " U6 R	                  SS5      n	U R                  U5      R                  " U6 R	                  SS5      n
[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                  R                  U R                  S.UD6u  pUR                   " / UQSP76 R#                  5       nX4$ )NrM   r    rq   ro   )ra   r   )shaper   r   viewr   r   rp   r   get_interfacere   _attn_implementationr   r   ra   r   r   reshaper   )r/   r   r   r   r   r   ry   hidden_shapequery_layer	key_layervalue_layerattention_interfacer   r   s                 r3   r9   MobileBertSelfAttention.forward   s<    #(("-CCbC$*B*BC jj.33\BLLQPQRHHZ(--|<FFq!L	jj.33\BLLQPQR(?(M(MKK,,.E)
 %8	%
  $}}C$,,..LL	%
 	%
! "));;;;FFH((r5   )
r   r   re   ra   r   r   r   r   r   rp   r&   r<   r=   r>   r?   r(   r*   r@   r   r   r   tupler9   rA   rB   rC   s   @r3   r   r      sw    , 48)ll) LL) ll	)
 ))D0) +,) 
u||	) )r5   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MobileBertSelfOutput   c                 t  > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        U R                  (       d&  [        R                  " UR                  5      U l        g g Nr1   )r'   r(   use_bottleneckr   rZ   r   denser\   r]   layer_norm_epsr^   r_   r`   ra   r   s     r3   r(   MobileBertSelfOutput.__init__   s    $33YYv668O8OP
 !:!:;F<S<SY_YnYno""::f&@&@ADL #r5   hidden_statesresidual_tensorr7   c                     U R                  U5      nU R                  (       d  U R                  U5      nU R                  X2-   5      nU$ r&   )r   r   ra   r^   r/   r   r   layer_outputss       r3   r9   MobileBertSelfOutput.forward   s>    

=1"" LL7M}'FGr5   )r^   r   ra   r   r;   rC   s   @r3   r   r      s7    BU\\ ELL UZUaUa  r5   r   c                      ^  \ rS rSrU 4S jr SS\R                  S\R                  S\R                  S\R                  S\R                  S-  S	\\	   S
\
\R                     4S jjrSrU =r$ )MobileBertAttention   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r&   )r'   r(   r   r/   r   outputr   s     r3   r(   MobileBertAttention.__init__   s&    +F3	*62r5   Nr   r   r   layer_inputr   r   r7   c                 Z    U R                   " UUUU40 UD6u  pxU R                  Xt5      nXx4$ r&   )r/   r   )	r/   r   r   r   r   r   r   attention_outputr   s	            r3   r9   MobileBertAttention.forward   sG     *.	*

 *
&  ;;'7E--r5   )r   r/   r&   r   rC   s   @r3   r   r      s    3 48.ll. LL. ll	.
 \\. ))D0. +,. 
u||	. .r5   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r&   )r'   r(   r   rZ   r   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr   s     r3   r(   MobileBertIntermediate.__init__  s`    YYv668P8PQ
f''--'-f.?.?'@D$'-'8'8D$r5   r   r7   c                 J    U R                  U5      nU R                  U5      nU$ r&   r   r   r/   r   s     r3   r9   MobileBertIntermediate.forward  s&    

=100?r5   r   r;   rC   s   @r3   r   r     s(    9U\\ ell  r5   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )OutputBottlenecki  c                 .  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        [        R                  " UR                  5      U l        g r   )r'   r(   r   rZ   r   rQ   r   r\   r]   r   r^   r_   r`   ra   r   s     r3   r(   OutputBottleneck.__init__  sh    YYv668J8JK
 !:!:;F<N<NTZTiTijzz&"<"<=r5   r   r   r7   c                 p    U R                  U5      nU R                  U5      nU R                  X2-   5      nU$ r&   )r   ra   r^   r   s       r3   r9   OutputBottleneck.forward$  s5    

=1]3}'FGr5   )r^   r   ra   r;   rC   s   @r3   r   r     s6    >U\\ ELL UZUaUa  r5   r   c                      ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  S\R                  4S jrSrU =r	$ )	MobileBertOutputi+  c                   > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR                  5      U l        [        UR                     " UR                  5      U l
        U R                  (       d&  [        R                  " UR                  5      U l        g [        U5      U l        g r&   )r'   r(   r   r   rZ   r   r   r   r\   r]   r^   r_   r`   ra   r   
bottleneckr   s     r3   r(   MobileBertOutput.__init__,  s    $33YYv779P9PQ
 !:!:;F<S<ST""::f&@&@ADL.v6DOr5   intermediate_statesresidual_tensor_1residual_tensor_2r7   c                     U R                  U5      nU R                  (       d&  U R                  U5      nU R                  XB-   5      nU$ U R                  XB-   5      nU R	                  XC5      nU$ r&   )r   r   ra   r^   r   )r/   r   r   r   layer_outputs        r3   r9   MobileBertOutput.forward6  sj     zz"56""<<5L>>,*JKL   >>,*JKL??<KLr5   )r^   r   r   ra   r   r;   rC   s   @r3   r   r   +  sD    7
#(<<
DILL
ejeqeq
	
 
r5   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )BottleneckLayeriC  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        g r   )r'   r(   r   rZ   rQ   intra_bottleneck_sizer   r\   r]   r   r^   r   s     r3   r(   BottleneckLayer.__init__D  sR    YYv1163O3OP
 !:!:;F<X<X^d^s^str5   r   r7   c                 J    U R                  U5      nU R                  U5      nU$ r&   r   r^   )r/   r   r   s      r3   r9   BottleneckLayer.forwardI  s$    jj/nn[1r5   r^   r   r;   rC   s   @r3   r   r   C  s)    u
U\\ ell  r5   r   c                   h   ^  \ rS rSrU 4S jrS\R                  S\\R                     4S jrSr	U =r
$ )
BottleneckiO  c                    > [         TU ]  5         UR                  U l        UR                  U l        [	        U5      U l        U R                  (       a  [	        U5      U l        g g r&   )r'   r(   key_query_shared_bottleneckr   r   input	attentionr   s     r3   r(   Bottleneck.__init__P  sP    +1+M+M((.(G(G%$V,
++,V4DN ,r5   r   r7   c                     U R                  U5      nU R                  (       a  U4S-  $ U R                  (       a  U R                  U5      nX3X4$ XX4$ )N   )r   r   r   r   )r/   r   bottlenecked_hidden_statesshared_attention_inputs       r3   r9   Bottleneck.forwardX  sX    " &*ZZ%>"((.0144--%)^^M%B"*Mnn!-\\r5   )r   r   r   r   r<   r=   r>   r?   r(   r*   r@   r   r9   rA   rB   rC   s   @r3   r   r   O  s1    5]U\\ ]eELL6I ] ]r5   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )	FFNOutputis  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     " UR
                  UR                  S9U l
        g r   )r'   r(   r   rZ   r   r   r   r\   r]   r   r^   r   s     r3   r(   FFNOutput.__init__t  sR    YYv779P9PQ
 !:!:;F<S<SY_YnYnor5   r   r   r7   c                 N    U R                  U5      nU R                  X2-   5      nU$ r&   r   r   s       r3   r9   FFNOutput.forwardy  s'    

=1}'FGr5   r   r;   rC   s   @r3   r   r   s  s7    p
U\\ ELL UZUaUa  r5   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )FFNLayeri  c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r&   )r'   r(   r   intermediater   r   r   s     r3   r(   FFNLayer.__init__  s'    26:'r5   r   r7   c                 J    U R                  U5      nU R                  X!5      nU$ r&   r  r   )r/   r   intermediate_outputr   s       r3   r9   FFNLayer.forward  s(    "//>$7Gr5   r
  r;   rC   s   @r3   r  r    s(    (
U\\ ell  r5   r  c            	          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	rU =r$ )MobileBertLayeri  c                   > [         TU ]  5         UR                  U l        UR                  U l        [	        U5      U l        [        U5      U l        [        U5      U l	        U R                  (       a  [        U5      U l        UR                  S:  aL  [        R                  " [        UR                  S-
  5       Vs/ s H  n[        U5      PM     sn5      U l        g g s  snf Nr    )r'   r(   r   num_feedforward_networksr   r   r   r  r   r   r   r   r   
ModuleListranger  ffnr/   re   _r2   s      r3   r(   MobileBertLayer.__init__  s    $33(.(G(G%,V426:&v.(0DO**Q.}}fFeFehiFi@j%k@j1hv&6@j%klDH /%ks   =C Nr   r   r   r7   c                 6   U R                   (       a  U R                  U5      u  pEpgO	U/S-  u  pEpgU R                  " UUUUU40 UD6u  pUn
U R                  S:w  a  U R                   H  nU" U
5      n
M     U R                  U
5      nU R                  XU5      nU$ )Nr   r    )r   r   r   r  r  r  r   )r/   r   r   r   r   r   r   r   self_attention_outputr  r   
ffn_moduler  r   s                 r3   r9   MobileBertLayer.forward  s     BF//R_B`?LlKCP/TUBU?Ll#'>>$
 $
  1((A-"hh
#-.>#?  ' #//0@A{{#6-Xr5   )r   r   r  r  r  r   r   r&   )r<   r=   r>   r?   r(   r*   r@   r   r   r   r9   rA   rB   rC   s   @r3   r  r    sW    m  48|| ))D0 +,	
 
 r5   r  c            
          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
\-  4S jjrS	rU =r$ )MobileBertEncoderi  c                    > [         TU ]  5         [        R                  " [	        UR
                  5       Vs/ s H  n[        U5      PM     sn5      U l        g s  snf r&   )r'   r(   r   r  r  num_hidden_layersr  layerr  s      r3   r(   MobileBertEncoder.__init__  sB    ]]U6KcKcEd#eEdOF$;Ed#ef
#es   ANr   r   r   r7   c                 d    [        U R                  5       H  u  pEU" UU40 UD6nM     [        US9$ )N)last_hidden_state)	enumerater   r   )r/   r   r   r   ilayer_modules         r3   r9   MobileBertEncoder.forward  s@      )4OA( M  5 ??r5   )r   r&   )r<   r=   r>   r?   r(   r*   r@   r   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r    s^    g 48@||@ ))D0@ +,	@
 
	 @ @r5   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertPooleri  c                    > [         TU ]  5         UR                  U l        U R                  (       a1  [        R
                  " UR                  UR                  5      U l        g g r&   )r'   r(   classifier_activationdo_activater   rZ   rQ   r   r   s     r3   r(   MobileBertPooler.__init__  sH    !776#5#5v7I7IJDJ r5   r   r7   c                     US S 2S4   nU R                   (       d  U$ U R                  U5      n[        R                  " U5      nU$ )Nr   )r,  r   r*   tanh)r/   r   first_token_tensorpooled_outputs       r3   r9   MobileBertPooler.forward  sE     +1a40%% JJ'9:M!JJ}5M  r5   )r   r,  r;   rC   s   @r3   r)  r)    s)    K	!U\\ 	!ell 	! 	!r5   r)  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )!MobileBertPredictionHeadTransformi  c                 b  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        S   " UR                  UR                  S9U l        g )NrD   r   )r'   r(   r   rZ   rQ   r   r   r   r   r   transform_act_fnr\   r   r^   r   s     r3   r(   *MobileBertPredictionHeadTransform.__init__  s    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D! .v/A/AvG\G\]r5   r   r7   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r&   )r   r6  r^   r   s     r3   r9   )MobileBertPredictionHeadTransform.forward  s4    

=1--m<}5r5   )r^   r   r6  r;   rC   s   @r3   r4  r4    s)    ^U\\ ell  r5   r4  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertLMPredictionHeadi  c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  UR                  -
  SS9U l	        [        R
                  " UR                  UR                  SS9U l
        [        R                  " [        R                  " UR                  5      5      U l        g )NF)r,   T)r'   r(   r4  	transformr   rZ   rS   rQ   rP   r   decoderr)   r*   r+   r,   r   s     r3   r(   #MobileBertLMPredictionHead.__init__  s    :6B YYv00&2D2DvG\G\2\chi
yy!6!68I8IPTULLV->->!?@	r5   r   r7   c                    U R                  U5      nUR                  [        R                  " U R                  R
                  R                  5       U R                  R
                  /SS95      nXR                  R                  -  nU$ )Nr   rr   )	r=  r   r*   rv   r>  r.   tr   r,   r   s     r3   r9   "MobileBertLMPredictionHead.forward  si    }5%,,UYY8K8K8M8M8OQUQ[Q[QbQb7cij-kl***r5   )r,   r>  r   r=  r;   rC   s   @r3   r;  r;    s)    AU\\ ell  r5   r;  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertOnlyMLMHeadi   c                 B   > [         TU ]  5         [        U5      U l        g r&   )r'   r(   r;  predictionsr   s     r3   r(   MobileBertOnlyMLMHead.__init__  s    5f=r5   sequence_outputr7   c                 (    U R                  U5      nU$ r&   rF  )r/   rH  prediction_scoress      r3   r9   MobileBertOnlyMLMHead.forward  s     ,,_=  r5   rJ  r;   rC   s   @r3   rD  rD     s(    >!u|| ! ! !r5   rD  c                      ^  \ rS rSrU 4S jrS\R                  S\R                  S\\R                     4S jrSr	U =r
$ )MobileBertPreTrainingHeadsi
  c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  S5      U l        g Nrq   )r'   r(   r;  rF  r   rZ   rQ   seq_relationshipr   s     r3   r(   #MobileBertPreTrainingHeads.__init__  s4    5f= "		&*<*<a @r5   rH  r1  r7   c                 L    U R                  U5      nU R                  U5      nX44$ r&   rF  rQ  )r/   rH  r1  rK  seq_relationship_scores        r3   r9   "MobileBertPreTrainingHeads.forward  s-     ,,_=!%!6!6}!E 88r5   rT  r   rC   s   @r3   rN  rN  
  s=    A
9u|| 9ELL 9UZ[`[g[gUh 9 9r5   rN  c                   ~   ^  \ rS rSr% \\S'   SrSrSrSr	Sr
Sr\\S.r\R                   " 5       U 4S j5       rSrU =r$ )MobileBertPreTrainedModeli  re   
mobilebertT)r   
attentionsc                   > [         TU ]  U5        [        U[        5      (       aA  [        R
                  " UR                  5        [        R                  " UR                  5        g[        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a\  [        R                  " UR                  [        R                  " UR                  R                  S   5      R!                  S5      5        gg)zInitialize the weightsrM   rL   N)r'   _init_weightsr   r#   initzeros_r,   ones_r.   r;  rG   copy_rK   r*   rc   r   rd   )r/   r   r2   s     r3   r\  'MobileBertPreTrainedModel._init_weights$  s     	f%ff%%KK$JJv}}% :;;KK$ 455JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 6r5    )r<   r=   r>   r?   r!   __annotations__base_model_prefixsupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr  r   _can_record_outputsr*   no_gradr\  rA   rB   rC   s   @r3   rX  rX    sV    $&*#N"&(-
 ]]_	i 	ir5   rX  z6
    Output type of [`MobileBertForPreTraining`].
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  S-  \S'   Sr\\R                     S-  \S'   Sr\\R                     S-  \S'   S	rg)
MobileBertForPreTrainingOutputi1  ar  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    Total loss as the sum of the masked language modeling loss and the next sequence prediction
    (classification) loss.
prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
    Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
    Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
    before SoftMax).
Nlossprediction_logitsseq_relationship_logitsr   rZ  rb  )r<   r=   r>   r?   r}   ro  r*   r   rc  rp  rq  r   r   rZ  rA   rb  r5   r3   rn  rn  1  s~    	 &*D%

d
")26u((4/68<U..5<59M5**+d2926Je''(4/6r5   rn  c                     ^  \ rS rSrSrSU 4S jjrS rS r\\	\
     SS\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       5       rSrU =r$ )MobileBertModeliJ  z*
https://huggingface.co/papers/2004.02984
c                    > [         TU ]  U5        Xl        SU l        [	        U5      U l        [        U5      U l        U(       a  [        U5      OSU l	        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
FN)r'   r(   re   gradient_checkpointingrG   r{   r  encoderr)  pooler	post_init)r/   re   add_pooling_layerr2   s      r3   r(   MobileBertModel.__init__P  sT    
 	 &+#.v6(02C&v. 	r5   c                 .    U R                   R                  $ r&   r{   rU   r/   s    r3   get_input_embeddings$MobileBertModel.get_input_embeddingsa  s    ...r5   c                 $    XR                   l        g r&   r|  )r/   rp   s     r3   set_input_embeddings$MobileBertModel.set_input_embeddingsd  s    */'r5   Nri   r   rj   rK   rk   r   r7   c                 
   US L US L-  (       a  [        S5      eU R                  UUUUS9n[        U R                  UUS9nU R                  " U4SU0UD6nUS   n	U R
                  b  U R                  U	5      OS n
[        U	U
S9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)ri   rK   rj   rk   )re   rk   r   r   r   )r#  pooler_output)
ValueErrorr{   r   re   rv  rw  r   )r/   ri   r   rj   rK   rk   r   embedding_outputencoder_outputsrH  r1  s              r3   r9   MobileBertModel.forwardg  s     -t";<YZZ??%)'	 + 
 3;;*)
 ,,
)
 

 *!,8<8OO4UY)-'
 	
r5   )re   r{   rv  ru  rw  )T)NNNNN)r<   r=   r>   r?   r}   r(   r~  r  r   r   r   r*   r~   r   r   r   r   r   r9   rA   rB   rC   s   @r3   rs  rs  J  s    "/0   .237260426$
##d*$
 ))D0$
 ((4/	$

 &&-$
 ((4/$
 +,$
 
+	+$
    $
r5   rs  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                     ^  \ rS rSrSSS.rU 4S jrS rS rSS	\S-  S
\	R                  4U 4S jjjr\\       SS\R                  S-  S\R                   S-  S\R                  S-  S\R                  S-  S\R                   S-  S\R                  S-  S\R                  S-  S\\   S
\\-  4S jj5       5       rSrU =r$ )MobileBertForPreTrainingi  cls.predictions.bias,mobilebert.embeddings.word_embeddings.weightzcls.predictions.decoder.biaszcls.predictions.decoder.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r&   )r'   r(   rs  rY  rN  clsrx  r   s     r3   r(   !MobileBertForPreTraining.__init__  s4     )&1-f5 	r5   c                 B    U R                   R                  R                  $ r&   r  rF  r>  r}  s    r3   get_output_embeddings.MobileBertForPreTraining.get_output_embeddings      xx##+++r5   c                     XR                   R                  l        UR                  U R                   R                  l        g r&   r  rF  r>  r,   r/   new_embeddingss     r3   set_output_embeddings.MobileBertForPreTraining.set_output_embeddings  *    '5$$2$7$7!r5   Nnew_num_tokensr7   c                    > U R                  U R                  R                  R                  USS9U R                  R                  l        [        TU ]  US9$ NT)r  
transposed)r  _get_resized_lm_headr  rF  r   r'   resize_token_embeddingsr/   r  r2   s     r3   r  0MobileBertForPreTraining.resize_token_embeddings  sR    %)%>%>HH  &&~RV &? &
" w.n.MMr5   ri   r   rj   rK   rk   labelsnext_sentence_labelr   c           	         U R                   " U4UUUUSS.UD6n	U	SS u  pU R                  X5      u  pSnUbv  Ubs  [        5       nU" UR                  SU R                  R
                  5      UR                  S5      5      nU" UR                  SS5      UR                  S5      5      nUU-   n[        UUUU	R                  U	R                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring) Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Examples:

```python
>>> from transformers import AutoTokenizer, MobileBertForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")

>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
>>> # Batch size 1
>>> outputs = model(input_ids)

>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
```Tr   rj   rK   rk   return_dictNrq   rM   )ro  rp  rq  r   rZ  )	rY  r  r   r   re   rS   rn  r   rZ  )r/   ri   r   rj   rK   rk   r  r  r   outputsrH  r1  rK  rU  
total_lossloss_fctmasked_lm_lossnext_sentence_losss                     r3   r9    MobileBertForPreTraining.forward  s    R //
))%'
 
 *1!&48HH_4\1
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J-/$:!//))
 	
r5   r  rY  r&   NNNNNNN)r<   r=   r>   r?   _tied_weights_keysr(   r  r  r   r   rR   r  r   r   r*   r~   r   r   r   r   rn  r9   rA   rB   rC   s   @r3   r  r    s5    )?*X
,8NcDj NBLL N N  .237260426*.7;@
##d*@
 ))D0@
 ((4/	@

 &&-@
 ((4/@
   4'@
 #--4@
 +,@
 
/	/@
  @
r5   r  c                   j  ^  \ rS rSrSSS.rU 4S jrS rS rSS	\S-  S
\	R                  4U 4S jjjr\\      SS\R                  S-  S\R                   S-  S\R                  S-  S\R                  S-  S\R                   S-  S\R                  S-  S\\   S
\\-  4S jj5       5       rSrU =r$ )MobileBertForMaskedLMi  r  r  r  c                    > [         TU ]  U5        [        USS9U l        [	        U5      U l        Xl        U R                  5         g NF)ry  )r'   r(   rs  rY  rD  r  re   rx  r   s     r3   r(   MobileBertForMaskedLM.__init__   s;     )&EJ(0 	r5   c                 B    U R                   R                  R                  $ r&   r  r}  s    r3   r  +MobileBertForMaskedLM.get_output_embeddings	  r  r5   c                     XR                   R                  l        UR                  U R                   R                  l        g r&   r  r  s     r3   r  +MobileBertForMaskedLM.set_output_embeddings  r  r5   Nr  r7   c                    > U R                  U R                  R                  R                  USS9U R                  R                  l        [        TU ]  US9$ r  r  r  s     r3   r  -MobileBertForMaskedLM.resize_token_embeddings  sR    %)%>%>HH  &&~RV &? &
" w.n.MMr5   ri   r   rj   rK   rk   r  r   c           	      6   U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n
SnUbF  [        5       nU" U
R                  SU R                  R
                  5      UR                  S5      5      n[        UU
UR                  UR                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Tr  r   NrM   ro  logitsr   rZ  )	rY  r  r   r   re   rS   r   r   rZ  )r/   ri   r   rj   rK   rk   r  r   r  rH  rK  r  r  s                r3   r9   MobileBertForMaskedLM.forward  s    $ //
))%'
 
 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN$!//))	
 	
r5   )r  re   rY  r&   NNNNNN)r<   r=   r>   r?   r  r(   r  r  r   r   rR   r  r   r   r*   r~   r   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r    s    )?*X
,8NcDj NBLL N N  .237260426*.'
##d*'
 ))D0'
 ((4/	'

 &&-'
 ((4/'
   4''
 +,'
 
	'
  '
r5   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MobileBertOnlyNSPHeadiC  c                 n   > [         TU ]  5         [        R                  " UR                  S5      U l        g rP  )r'   r(   r   rZ   rQ   rQ  r   s     r3   r(   MobileBertOnlyNSPHead.__init__D  s'     "		&*<*<a @r5   r1  r7   c                 (    U R                  U5      nU$ r&   rQ  )r/   r1  rU  s      r3   r9   MobileBertOnlyNSPHead.forwardH  s    !%!6!6}!E%%r5   r  r;   rC   s   @r3   r  r  C  s)    A&U\\ &ell & &r5   r  zZ
    MobileBert Model with a `next sentence prediction (classification)` head on top.
    c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )#MobileBertForNextSentencePredictioniM  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r&   )r'   r(   rs  rY  r  r  rx  r   s     r3   r(   ,MobileBertForNextSentencePrediction.__init__S  s4     )&1(0 	r5   Nri   r   rj   rK   rk   r  r   r7   c           	         U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n
SnUb2  [        5       nU" U
R                  SS5      UR                  S5      5      n[	        UU
UR
                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring) Indices should be in `[0, 1]`.

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Examples:

```python
>>> from transformers import AutoTokenizer, MobileBertForNextSentencePrediction
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")

>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> loss = outputs.loss
>>> logits = outputs.logits
```Tr  r    NrM   rq   r  )rY  r  r   r   r   r   rZ  )r/   ri   r   rj   rK   rk   r  r   r  r1  rU  r  r  s                r3   r9   +MobileBertForNextSentencePrediction.forward\  s    L //
))%'
 
  
!%-!8!')H!)*@*E*Eb!*LfkkZ\o!^*#)!//))	
 	
r5   r  r  )r<   r=   r>   r?   r(   r   r   r*   r~   r   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r  M  s      .237260426*.;
##d*;
 ))D0;
 ((4/	;

 &&-;
 ((4/;
   4';
 +,;
 
,	,;
  ;
r5   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   8  ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )#MobileBertForSequenceClassificationi  c                 r  > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l
        [        R                  " UR                  UR                  5      U l        U R                  5         g r&   )r'   r(   
num_labelsre   rs  rY  classifier_dropoutr`   r   r_   ra   rZ   rQ   
classifierrx  r/   re   r  r2   s      r3   r(   ,MobileBertForSequenceClassification.__init__  s      ++)&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r5   Nri   r   rj   rK   rk   r  r   r7   c           	         U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n	U R                  U	5      n
SnUGb  U R                  R                  c  U R
                  S:X  a  SU R                  l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [        5       nU R
                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                  R                  S:X  a=  [        5       nU" U
R                  SU R
                  5      UR                  S5      5      nO,U R                  R                  S:X  a  [        5       nU" X5      n[        UU
UR                   UR"                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Tr  r    N
regressionsingle_label_classificationmulti_label_classificationrM   r  )rY  ra   r  re   problem_typer  rm   r*   ru   r   r   squeezer   r   r   r   r   rZ  )r/   ri   r   rj   rK   rk   r  r   r  r1  r  ro  r  s                r3   r9   +MobileBertForSequenceClassification.forward  s   $ //
))%'
 
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
r5   )r  re   ra   rY  r  r  )r<   r=   r>   r?   r(   r   r   r*   r@   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r    s      *..2.2,0-1&*;
<<$&;
 t+;
 t+	;

 llT);
 ||d*;
 t#;
 +,;
 
u||	7	7;
  ;
r5   r  c                   X  ^  \ rS rSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )MobileBertForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  )
r'   r(   r  rs  rY  r   rZ   rQ   
qa_outputsrx  r   s     r3   r(   'MobileBertForQuestionAnswering.__init__  sU      ++)&EJ))F$6$68I8IJ 	r5   Nri   r   rj   rK   rk   start_positionsend_positionsr   r7   c           	         U R                   " U4UUUUSS.UD6n	U	S   n
U R                  U
5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  n[        UUUU	R                  U	R                  S	9$ )
NTr  r   r    rM   rr   )ignore_indexrq   )ro  start_logits
end_logitsr   rZ  )rY  r  splitr  r   lenrt   clampr   r   r   rZ  )r/   ri   r   rj   rK   rk   r  r  r   r  rH  r  r  r  r  ignored_indexr  
start_lossend_losss                      r3   r9   &MobileBertForQuestionAnswering.forward   sx    //
))%'
 
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
r5   )rY  r  r  r  )r<   r=   r>   r?   r(   r   r   r*   r@   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r    s      *..2.2,0-1/3-13
<<$&3
 t+3
 t+	3

 llT)3
 ||d*3
 ,3
 ||d*3
 +,3
 
u||	;	;3
  3
r5   r  c                   8  ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ )MobileBertForMultipleChoicei8  c                 0  > [         TU ]  U5        [        U5      U l        UR                  b  UR                  OUR
                  n[        R                  " U5      U l        [        R                  " UR                  S5      U l        U R                  5         g r  )r'   r(   rs  rY  r  r`   r   r_   ra   rZ   rQ   r  rx  r  s      r3   r(   $MobileBertForMultipleChoice.__init__;  su     )&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$6: 	r5   Nri   r   rj   rK   rk   r  r   r7   c           	         Ub  UR                   S   OUR                   S   nUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb1  UR                  SUR                  S5      UR                  S5      5      OSnU R                  " U4UUUUSS.UD6n	U	S   n
U R	                  U
5      n
U R                  U
5      nUR                  SU5      nSnUb  [        5       nU" X5      n[        UUU	R                  U	R                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr    rM   Tr  r  )
r   r   rt   rY  ra   r  r   r   r   rZ  )r/   ri   r   rj   rK   rk   r  r   num_choicesr  r1  r  reshaped_logitsro  r  s                  r3   r9   #MobileBertForMultipleChoice.forwardH  s   T -6,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 //
))%'
 
  
]3/ ++b+6')HO4D("!//))	
 	
r5   )r  ra   rY  r  )r<   r=   r>   r?   r(   r   r   r*   r@   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r  8  s      *..2.2,0-1&*N
<<$&N
 t+N
 t+	N

 llT)N
 ||d*N
 t#N
 +,N
 
u||	8	8N
  N
r5   r  c                   8  ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\	\
   S\\R                     \-  4S jj5       5       rSrU =r$ ) MobileBertForTokenClassificationi  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )r'   r(   r  rs  rY  r  r`   r   r_   ra   rZ   rQ   r  rx  r  s      r3   r(   )MobileBertForTokenClassification.__init__  s      ++)&EJ)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r5   Nri   r   rj   rK   rk   r  r   r7   c           	      D   U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n	U R                  U	5      n
SnUb<  [        5       nU" U
R	                  SU R
                  5      UR	                  S5      5      n[        UU
UR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Tr  r   NrM   r  )	rY  ra   r  r   r   r  r   r   rZ  )r/   ri   r   rj   rK   rk   r  r   r  rH  r  ro  r  s                r3   r9   (MobileBertForTokenClassification.forward  s      //
))%'
 
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
r5   )r  ra   rY  r  r  )r<   r=   r>   r?   r(   r   r   r*   r@   r   r   r   r   r9   rA   rB   rC   s   @r3   r  r    s      *..2.2,0-1&*'
<<$&'
 t+'
 t+	'

 llT)'
 ||d*'
 t#'
 +,'
 
u||	4	4'
  '
r5   r  )
r  r  r  r  r  r  r  r  rs  rX  )Nro   )Ucollections.abcr   dataclassesr   r*   r   torch.nnr   r   r    r
   r]  activationsr   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   r   utils.output_capturingr   configuration_mobilebertr!   
get_loggerr<   loggerModuler#   r^   r\   rG   r@   floatr   r   r   r   r   r   r   r   r   r   r  r  r  r)  r4  r;  rD  rN  rX  rn  rs  r  r  r  r  r  r  r  r  __all__rb  r5   r3   <module>r     s  . % !   A A & ! 6 9	 	 	 G & M M I 5 6 
		H	%6RYY 6 &
9I299 If !%II%<<% 
% <<	%
 LL4'% T\% % '(%81)bii 1)h299 ".")) .8RYY ryy ryy 0	bii 	!] !]H			 		ryy 	)0 )X@		 @(!ryy !&		 " "!BII !	9 	9 i i i4 
 7[ 7 7& C
/ C
 C
L _
8 _
_
D F
5 F
 F
R&BII & 
G
*C G

G
T M
*C M
M
` @
%> @
 @
F ^
"; ^
 ^
B 8
'@ 8
 8
vr5   