
    Z j                     T   S r SSKrSSKrSSKJr  SSKJrJrJr  SSKJ	r
  SSKJrJr  SSKJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJr  \R:                  " \5      r\ " S S\5      5       r  " S S\RB                  5      r" " S S\RB                  5      r# " S S\RB                  5      r$ " S S\RB                  5      r% " S S\RB                  5      r& " S S\RB                  5      r' " S S\RB                  5      r( " S S\RB                  5      r)\ " S S \ 5      5       r* " S! S"\ 5      r+ " S# S$\RB                  5      r,\" S%S&9 " S' S(\ 5      5       r-\ " S) S*\ 5      5       r.\ " S+ S,\ 5      5       r/ " S- S.\RB                  5      r0\ " S/ S0\ 5      5       r1S1 r2/ S2Qr3g)3zPyTorch MPNet model.    N)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)auto_docstringlogging   )MPNetConfigc                   `   ^  \ rS rSr% \\S'   Sr\R                  " 5       U 4S j5       r	Sr
U =r$ )MPNetPreTrainedModel*   configmpnetc                 r  > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a\  [        R                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        gg)zInitialize the weightsr   r   N)super_init_weights
isinstanceMPNetLMHeadinitzeros_biasMPNetEmbeddingscopy_position_idstorcharangeshapeexpand)selfmodule	__class__s     y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mpnet/modeling_mpnet.pyr    "MPNetPreTrainedModel._init_weights/   s|     	f%fk**KK$00JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 1     )__name__
__module____qualname____firstlineno__r   __annotations__base_model_prefixr)   no_gradr    __static_attributes____classcell__r/   s   @r0   r   r   *   s)    
]]_i ir2   r   c                   8   ^  \ rS rSrU 4S jrSS jrS rSrU =r$ )r&   9   c                 6  > [         TU ]  5         SU l        [        R                  " UR
                  UR                  U R                  S9U l        [        R                  " UR                  UR                  U R                  S9U l	        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        U R                  S[         R"                  " UR                  5      R%                  S5      SS9  g )Nr   )padding_idxepsr(   r   F)
persistent)r   __init__rA   r   	Embedding
vocab_sizehidden_sizeword_embeddingsmax_position_embeddingsposition_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_bufferr)   r*   r,   r-   r   r/   s     r0   rE   MPNetEmbeddings.__init__:   s    !||F,=,=v?Q?Q_c_o_op#%<<**F,>,>DL\L\$
  f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
r2   c                 ~   Uc*  Ub  [        XR                  5      nOU R                  U5      nUb  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc  U R                  U5      nU R                  U5      nX7-   nU R                  U5      nU R                  U5      nU$ )Nr   r   )	"create_position_ids_from_input_idsrA   &create_position_ids_from_inputs_embedssizer(   rI   rK   rL   rP   )	r-   	input_idsr(   inputs_embedskwargsinput_shape
seq_lengthrK   
embeddingss	            r0   forwardMPNetEmbeddings.forwardH   s    $A)M]M]^#JJ=Y #..*K',,.s3K ^
,,Q^<L  00;M"66|D"8
^^J/
\\*-
r2   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr   r   )dtypedevicer   )rW   r)   r*   rA   longrb   	unsqueezer,   )r-   rY   r[   sequence_lengthr(   s        r0   rV   6MPNetEmbeddings.create_position_ids_from_inputs_embedsb   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r2   )rL   rP   rA   rK   rI   )NNN)	r4   r5   r6   r7   rE   r^   rV   r;   r<   r=   s   @r0   r&   r&   9   s    
4= =r2   r&   c                   8   ^  \ rS rSrU 4S jr   SS jrSrU =r$ )MPNetSelfAttentiont   c                 @  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                   5      U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r   rE   rH   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   LinearqkvorN   attention_probs_dropout_probrP   rR   s     r0   rE   MPNetSelfAttention.__init__u   s=    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP6--t/A/AB6--t/A/AB6--t/A/AB6--v/A/ABzz&"E"EFr2   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
[        R                  " XR	                  SS5      5      nU[        R                  " U R                  5      -  nUb  X-  nUb  X-   n[        R                  R                  USS9nU R                  U5      n[        R                  " X5      nUR                  SSSS5      R!                  5       nUR#                  5       S S U R$                  4-   nUR                  " U6 nU R'                  U5      nU(       a  X4nU$ U4nU$ )Nr   r      dimr   r   )r+   rq   rt   view	transposeru   rv   r)   matmulmathsqrtr   
functionalsoftmaxrP   permute
contiguousrW   rr   rw   )r-   hidden_statesattention_maskposition_biasoutput_attentionsrZ   r[   hidden_shapert   ru   rv   attention_scoresattention_probscnew_c_shaperw   outputss                    r0   r^   MPNetSelfAttention.forward   s    $))#2.CCbC$*B*BCFF=!&&|4>>q!DFF=!&&|4>>q!DFF=!&&|4>>q!D !<<;;r2+>?+dii8P8P.QQ $-%/@ --//0@b/I,,7LL,IIaAq!,,.ffhsmt'9'9&;;FFK FF1I*;1& CDr2   )rr   rq   rP   ru   rm   rw   rt   rv   NNFr4   r5   r6   r7   rE   r^   r;   r<   r=   s   @r0   rh   rh   t   s    G, ' 'r2   rh   c                   8   ^  \ rS rSrU 4S jr   SS jrSrU =r$ )MPNetAttention   c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g NrB   )r   rE   rh   attnr   rL   rH   rM   rN   rO   rP   rR   s     r0   rE   MPNetAttention.__init__   sN    &v.	f&8&8f>S>STzz&"<"<=r2   c                     U R                  UUUUS9nU R                  U R                  US   5      U-   5      nU4USS  -   nU$ )N)r   r   r   )r   rL   rP   )	r-   r   r   r   r   rZ   self_outputsattention_outputr   s	            r0   r^   MPNetAttention.forward   s_     yy/	 ! 
  >>$,,|A*G-*WX#%QR(88r2   )rL   r   rP   r   r   r=   s   @r0   r   r      s    >  r2   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MPNetIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g N)r   rE   r   rs   rH   intermediate_sizedenser!   
hidden_actstrr	   intermediate_act_fnrR   s     r0   rE   MPNetIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r2   r   returnc                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r-   r   s     r0   r^   MPNetIntermediate.forward   s&    

=100?r2   r   
r4   r5   r6   r7   rE   r)   Tensorr^   r;   r<   r=   s   @r0   r   r      s(    9U\\ ell  r2   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MPNetOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r   rE   r   rs   r   rH   r   rL   rM   rN   rO   rP   rR   s     r0   rE   MPNetOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r2   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   )r   rP   rL   )r-   r   r   s      r0   r^   MPNetOutput.forward   s5    

=1]3}'CDr2   )rL   r   rP   r   r=   s   @r0   r   r      s6    >U\\  RWR^R^  r2   r   c                   8   ^  \ rS rSrU 4S jr   SS jrSrU =r$ )
MPNetLayer   c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        U5      U l        g r   )r   rE   r   	attentionr   intermediater   outputrR   s     r0   rE   MPNetLayer.__init__   s3    '/-f5!&)r2   c                     U R                  UUUUS9nUS   nUSS  nU R                  U5      n	U R                  X5      n
U
4U-   nU$ )N)r   r   r   r   r   r   r   )r-   r   r   r   r   rZ   self_attention_outputsr   r   intermediate_outputlayer_outputs              r0   r^   MPNetLayer.forward   so     "&'/	 "0 "
 2!4(,"//0@A{{#6I/G+r2   r   r   r   r=   s   @r0   r   r      s    *  r2   r   c                      ^  \ rS rSrU 4S jr    SS\R                  S\R                  S-  S\S\S\4
S	 jjrSS
 jr	\
SS j5       rSrU =r$ )MPNetEncoderi	  c                 J  > [         TU ]  5         Xl        UR                  U l        [
        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l
        [
        R                  " UR                  U R                  5      U l        g s  snf r   )r   rE   r   rm   n_headsr   
ModuleListrangenum_hidden_layersr   layerrF   relative_attention_num_bucketsrelative_attention_bias)r-   r   _r/   s      r0   rE   MPNetEncoder.__init__
  sw    11]]fF^F^@_#`@_1Jv$6@_#`a
')||F4Y4Y[_[g[g'h$ $as   B Nr   r   r   output_hidden_statesreturn_dictc                 N   U R                  U5      nU(       a  SOS nU(       a  SOS n	[        U R                  5       H4  u  pU(       a  X4-   nU" UUU4SU0UD6nUS   nU(       d  M,  XS   4-   n	M6     U(       a  X4-   nU(       d  [        S XU	4 5       5      $ [	        UUU	S9$ )Nr3   r   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r3   ).0rv   s     r0   	<genexpr>'MPNetEncoder.forward.<locals>.<genexpr>2  s     h$Vq$Vs   	)last_hidden_stater   
attentions)compute_position_bias	enumerater   tupler   )r-   r   r   r   r   r   rZ   r   all_hidden_statesall_attentionsilayer_modulelayer_outputss                r0   r^   MPNetEncoder.forward  s     22=A"6BD0d(4OA#$58H$H!( #4	
 M *!,M  !/3C2E!E  5"   14D Dh]~$Vhhh++%
 	
r2   c                 P   UR                  S5      UR                  S5      UR                  S5      penUb  US S 2S S 2S 4   nUS S 2S S S 24   nOT[        R                  " U[        R                  S9S S 2S 4   n[        R                  " U[        R                  S9S S S 24   nX-
  n	U R	                  XS9n
U
R                  UR                  5      n
U R                  U
5      nUR                  / SQ5      R                  S5      nUR                  USXV45      R                  5       nU$ )Nr   r   )ra   )num_buckets)r{   r   r   r   )rW   r)   r*   rc   relative_position_buckettorb   r   r   rd   r,   r   )r-   xr(   r   bszqlenklencontext_positionmemory_positionrelative_position	rp_bucketvaluess               r0   r   "MPNetEncoder.compute_position_bias9  s   &&)QVVAYq	4#+Aq$J7*1dA:6O$||D

CAtGL#ll4uzzB47KO+>112C1]	LL*	--i8	*44Q7R45@@Br2   c                     SnU * nUS-  nX4S:  R                  [        R                  5      U-  -  n[        R                  " U5      nUS-  nXE:  nU[        R                  " UR                  5       U-  5      [        R                  " X%-  5      -  X-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XqS-
  5      5      nU[        R                  " XdU5      -  nU$ )Nr   r{   r   )
r   r)   rc   abslogfloatr   min	full_likewhere)r   r   max_distanceretn	max_exactis_smallval_if_larges           r0   r   %MPNetEncoder.relative_position_bucketK  s    Azz%**%33IIaL1$	= IIaggi)+,txx8P/QQU`Ulm
"UZZ. yyu|[\_/]^u{{855
r2   )r   r   r   r   )NFFF)N    )r     )r4   r5   r6   r7   rE   r)   r   boolr^   r   staticmethodr   r;   r<   r=   s   @r0   r   r   	  ss    i /3"'%*!&
||&
 t+&
  	&

 #&
 &
P$  r2   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MPNetPooleria  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r   rE   r   rs   rH   r   Tanh
activationrR   s     r0   rE   MPNetPooler.__init__b  s9    YYv1163E3EF
'')r2   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ Nr   )r   r	  )r-   r   first_token_tensorpooled_outputs       r0   r^   MPNetPooler.forwardg  s6     +1a40

#566r2   )r	  r   r   r=   s   @r0   r  r  a  s(    $
U\\ ell  r2   r  c                     ^  \ rS rSrSU 4S jjrS rS r\       SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\S-  S\S-  S\S-  S\\R                     \-  4S jj5       rSrU =r$ )
MPNetModelip  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
r   rE   r   r&   r]   r   encoderr  pooler	post_init)r-   r   add_pooling_layerr/   s      r0   rE   MPNetModel.__init__r  sK    
 	 )&1#F+->k&)D 	r2   c                 .    U R                   R                  $ r   r]   rI   r-   s    r0   get_input_embeddingsMPNetModel.get_input_embeddings  s    ...r2   c                 $    XR                   l        g r   r  )r-   values     r0   set_input_embeddingsMPNetModel.set_input_embeddings  s    */'r2   NrX   r   r(   rY   r   r   r   r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n	O"Ub  UR                  5       S S n	O[	        S5      eUb  UR                  OUR                  n
Uc  [        R                  " XS9nU R                  X)5      nU R                  XUS9nU R                  UUUUUS9nUS   nU R                  b  U R                  U5      OS nU(       d	  X4USS  -   $ [        UUUR                  UR                   S	9$ )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)rb   )rX   r(   rY   )r   r   r   r   r   r   )r   pooler_outputr   r   )r   r   r   r   ro   %warn_if_padding_and_no_attention_maskrW   rb   r)   onesget_extended_attention_maskr]   r  r  r   r   r   )r-   rX   r   r(   rY   r   r   r   rZ   r[   rb   extended_attention_maskembedding_outputencoder_outputssequence_outputr  s                   r0   r^   MPNetModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN040P0PQ_0m??Yiv?w,,2/!5# ' 
 *!,8<8OO4UY#3oab6III)-')77&11	
 	
r2   )r   r]   r  r  )T)NNNNNNN)r4   r5   r6   r7   rE   r  r  r   r)   
LongTensorFloatTensorr  r   r   r   r^   r;   r<   r=   s   @r0   r  r  p  s    /0  .2370426)-,0#'4
##d*4
 ))D04
 &&-	4

 ((4/4
  $;4
 #Tk4
 D[4
 
u||	9	94
 4
r2   r  c                   >  ^  \ rS rSrSSS.rU 4S jrS rS r\        SS	\	R                  S-  S
\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S\S-  S\S-  S\\	R                     \-  4S jj5       rSrU =r$ )MPNetForMaskedLMi  z'mpnet.embeddings.word_embeddings.weightzlm_head.bias)zlm_head.decoder.weightzlm_head.decoder.biasc                    > [         TU ]  U5        [        USS9U l        [	        U5      U l        U R                  5         g NF)r  )r   rE   r  r   r"   lm_headr  rR   s     r0   rE   MPNetForMaskedLM.__init__  s6     %@
"6* 	r2   c                 .    U R                   R                  $ r   )r1  decoderr  s    r0   get_output_embeddings&MPNetForMaskedLM.get_output_embeddings  s    ||###r2   c                 Z    XR                   l        UR                  U R                   l        g r   )r1  r4  r%   )r-   new_embeddingss     r0   set_output_embeddings&MPNetForMaskedLM.set_output_embeddings  s    -*//r2   NrX   r   r(   rY   labelsr   r   r   r   c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr   r(   rY   r   r   r   r   r   r{   losslogitsr   r   )
r   r   r   r1  r   r   rG   r   r   r   )r-   rX   r   r(   rY   r;  r   r   r   rZ   r   r)  prediction_scoresmasked_lm_lossloss_fctr   s                   r0   r^   MPNetForMaskedLM.forward  s    & &1%<k$++BYBY**)%'/!5#  
 "!* LL9')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r2   )r1  r   NNNNNNNN)r4   r5   r6   r7   _tied_weights_keysrE   r5  r9  r   r)   r+  r,  r  r   r   r   r^   r;   r<   r=   s   @r0   r.  r.    s    "K .
$0  .2370426*.)-,0#'/
##d*/
 ))D0/
 &&-	/

 ((4//
   4'/
  $;/
 #Tk/
 D[/
 
u||	~	-/
 /
r2   r.  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r"   i  z5MPNet Head for masked and permuted language modeling.c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  SS9U l
        [        R                  " [        R                  " UR                  5      5      U l        g )NrB   T)r%   )r   rE   r   rs   rH   r   rL   rM   
layer_normrG   r4  	Parameterr)   zerosr%   rR   s     r0   rE   MPNetLMHead.__init__  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FTRLLV->->!?@	r2   c                     U R                  U5      n[        U5      nU R                  U5      nU R                  U5      nU$ r   )r   r
   rI  r4  r-   featuresrZ   r   s       r0   r^   MPNetLMHead.forward  s;    JJx GOOA LLOr2   )r%   r4  r   rI  	r4   r5   r6   r7   __doc__rE   r^   r;   r<   r=   s   @r0   r"   r"     s    ?A r2   r"   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   (  ^  \ rS rSrU 4S jr\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	S-  S
\	S-  S\	S-  S\
\R                     \-  4S jj5       rSrU =r$ )MPNetForSequenceClassificationi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [        U5      U l        U R                  5         g r0  )r   rE   
num_labelsr  r   MPNetClassificationHead
classifierr  rR   s     r0   rE   'MPNetForSequenceClassification.__init__%  sC      ++%@
1&9 	r2   NrX   r   r(   rY   r;  r   r   r   r   c	           
      ,   Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU(       d  U4U
S	S -   nUb  U4U-   $ U$ [        UUU
R                   U
R"                  S
9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr=  r   r   
regressionsingle_label_classificationmulti_label_classificationr   r{   r>  )r   r   r   rY  problem_typerW  ra   r)   rc   rp   r   squeezer   r   r   r   r   r   r-   rX   r   r(   rY   r;  r   r   r   rZ   r   r)  r@  r?  rC  r   s                   r0   r^   &MPNetForSequenceClassification.forward/  s   ( &1%<k$++BYBY**)%'/!5#  
 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r2   )rY  r   rW  rE  )r4   r5   r6   r7   rE   r   r)   r+  r,  r  r   r   r   r^   r;   r<   r=   s   @r0   rU  rU    s      .2370426*.)-,0#'@
##d*@
 ))D0@
 &&-	@

 ((4/@
   4'@
  $;@
 #Tk@
 D[@
 
u||	7	7@
 @
r2   rU  c                   (  ^  \ rS rSrU 4S jr\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	S-  S
\	S-  S\	S-  S\
\R                     \-  4S jj5       rSrU =r$ )MPNetForMultipleChoiceis  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g )Nr   )r   rE   r  r   r   rN   rO   rP   rs   rH   rY  r  rR   s     r0   rE   MPNetForMultipleChoice.__init__u  sV     '
zz&"<"<=))F$6$6: 	r2   NrX   r   r(   rY   r;  r   r   r   r   c	           
         Ub  UOU R                   R                  nUb  UR                  S   OUR                  S   n
Ub!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUS9nUS   nU R                  U5      nU R                  U5      nUR                  SU
5      nSnUb  [        5       nU" UU5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a(  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r   r|   )r(   r   rY   r   r   r   r{   r>  )r   r   r+   r   rW   r   rP   rY  r   r   r   r   )r-   rX   r   r(   rY   r;  r   r   r   rZ   num_choicesflat_input_idsflat_position_idsflat_attention_maskflat_inputs_embedsr   r  r@  reshaped_logitsr?  rC  r   s                         r0   r^   MPNetForMultipleChoice.forward  s   H &1%<k$++BYBY,5,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 ***.,/!5#  
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r2   )rY  rP   r   rE  )r4   r5   r6   r7   rE   r   r)   r+  r,  r  r   r   r   r^   r;   r<   r=   s   @r0   rd  rd  s  s      .2370426*.)-,0#'L
##d*L
 ))D0L
 &&-	L

 ((4/L
   4'L
  $;L
 #TkL
 D[L
 
u||	8	8L
 L
r2   rd  c                   (  ^  \ rS rSrU 4S jr\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	S-  S
\	S-  S\	S-  S\
\R                     \-  4S jj5       rSrU =r$ )MPNetForTokenClassificationi  c                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r0  )r   rE   rW  r  r   r   rN   rO   rP   rs   rH   rY  r  rR   s     r0   rE   $MPNetForTokenClassification.__init__  sk      ++%@
zz&"<"<=))F$6$68I8IJ 	r2   NrX   r   r(   rY   r;  r   r   r   r   c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr=  r   r   r{   r>  )r   r   r   rP   rY  r   r   rW  r   r   r   ra  s                   r0   r^   #MPNetForTokenClassification.forward  s    $ &1%<k$++BYBY**)%'/!5#  
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r2   )rY  rP   r   rW  rE  )r4   r5   r6   r7   rE   r   r)   r+  r,  r  r   r   r   r^   r;   r<   r=   s   @r0   rp  rp    s    	  .2370426*.)-,0#'0
##d*0
 ))D00
 &&-	0

 ((4/0
   4'0
  $;0
 #Tk0
 D[0
 
u||	4	40
 0
r2   rp  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rX  i  z-Head for sentence-level classification tasks.c                 ,  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  5      U l
        g r   )r   rE   r   rs   rH   r   rN   rO   rP   rW  out_projrR   s     r0   rE    MPNetClassificationHead.__init__  s`    YYv1163E3EF
zz&"<"<=		&"4"4f6G6GHr2   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ r  )rP   r   r)   tanhrw  rN  s       r0   r^   MPNetClassificationHead.forward  sY    Q1WLLOJJqMJJqMLLOMM!r2   )r   rP   rw  rQ  r=   s   @r0   rX  rX    s    7I r2   rX  c                   H  ^  \ rS rSrU 4S jr\         SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\	S-  S\	S-  S\	S-  S\
\R                     \-  4S jj5       rSrU =r$ )MPNetForQuestionAnsweringi#  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r0  )
r   rE   rW  r  r   r   rs   rH   
qa_outputsr  rR   s     r0   rE   "MPNetForQuestionAnswering.__init__%  sU      ++%@
))F$6$68I8IJ 	r2   NrX   r   r(   rY   start_positionsend_positionsr   r   r   r   c
           
         U	b  U	OU R                   R                  n	U R                  UUUUUUU	S9nUS   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU	(       d  X4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	Nr=  r   r   r   r}   )ignore_indexr{   )r?  start_logits
end_logitsr   r   )r   r   r   r  splitr`  r   lenrW   clampr   r   r   r   )r-   rX   r   r(   rY   r  r  r   r   r   rZ   r   r)  r@  r  r  
total_lossignored_indexrC  
start_lossend_lossr   s                         r0   r^   !MPNetForQuestionAnswering.forward/  s    &1%<k$++BYBY**)%'/!5#  
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r2   )r   rW  r  )	NNNNNNNNN)r4   r5   r6   r7   rE   r   r)   r+  r,  r  r   r   r   r^   r;   r<   r=   s   @r0   r}  r}  #  s      .23704263715)-,0#';
##d*;
 ))D0;
 &&-	;

 ((4/;
 ))D0;
 ''$.;
  $;;
 #Tk;
 D[;
 
u||	;	;;
 ;
r2   r}  c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
r   r}   )nerp   r)   cumsumtype_asrc   )rX   rA   maskincremental_indicess       r0   rU   rU   n  sP     <<$((*D,,t3;;DADH##%33r2   )r.  rd  r}  rU  rp  r   r  r   )4rR  r   r)   r   torch.nnr   r   r    r   r#   activationsr	   r
   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   utilsr   r   configuration_mpnetr   
get_loggerr4   loggerr   Moduler&   rh   r   r   r   r   r   r  r  r.  r"   rU  rd  rp  rX  r}  rU   __all__r3   r2   r0   <module>r     s       A A & '   . , , 
		H	% i? i i8=bii 8=v; ;|RYY 6		  "))  <T299 Tp"))  K
% K
 K
\F
+ F
R")) , L
%9 L
L
^ X
1 X
 X
v =
"6 =
 =
@bii & G
 4 G
 G
T4	r2   