
    Z j                        S SK Jr  S SKrS SKJr  S SKJrJrJr  SSKJ	r
  SSKJrJr  SSKJrJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJrJrJrJrJrJ r J!r!  SSK"J#r#J$r$  SSK%J&r&  SSK'J(r(  SSK)J*r*J+r+J,r,  SSK-J.r.J/r/  SSK0J1r1  SSK2J3r3  \,Rh                  " \55      r6 " S S\Rn                  5      r8  SIS\Rn                  S\Rr                  S\Rr                  S\Rr                  S\Rr                  S-  S\:S-  S\:S\&\*   4S jjr; " S S \Rn                  5      r< " S! S"\Rn                  5      r= " S# S$\Rn                  5      r> " S% S&\Rn                  5      r? " S' S(\Rn                  5      r@ " S) S*\Rn                  5      rA " S+ S,\5      rB\+ " S- S.\$5      5       rC " S/ S0\Rn                  5      rD " S1 S2\Rn                  5      rE\+ " S3 S4\C5      5       rF " S5 S6\Rn                  5      rG " S7 S8\Rn                  5      rH\+" S9S:9 " S; S<\C\5      5       rI\+ " S= S>\C5      5       rJ\+" S?S:9 " S@ SA\C5      5       rK\+ " SB SC\C5      5       rL\+ " SD SE\C5      5       rM\+ " SF SG\C5      5       rN/ SHQrOg)J    )CallableN)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FNgelu)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)create_bidirectional_maskcreate_causal_mask)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )Data2VecTextConfigc                      ^  \ rS rSrSrU 4S jr     SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	S
\R                  4S jjr\S 5       r\SS j5       rSrU =r$ )Data2VecTextEmbeddings7   zGConstruct the embeddings from word, position and token_type embeddings.c                 >  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR
                  UR                  S9U l
        [        R                  " UR                  5      U l        U R                  S[         R"                  " UR$                  5      R'                  S5      SS9  U R                  S[         R(                  " U R*                  R-                  5       [         R.                  S9SS9  UR                  U l        [        R                  " UR$                  UR
                  U R0                  S9U l        g )	N)padding_idxepsposition_idsr$   F)
persistenttoken_type_ids)dtype)super__init__nn	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangemax_position_embeddingsexpandzerosr-   sizelongr*   position_embeddingsselfconfig	__class__s     ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/data2vec/modeling_data2vec_text.pyr4   Data2VecTextEmbeddings.__init__:   s4   !||F,=,=v?Q?Q_e_r_rs%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
 "..#%<<**F,>,>DL\L\$
     N	input_idsr1   r-   inputs_embedspast_key_values_lengthreturnc                    Uc;  Ub  U R                  XR                  U5      nOU R                  X@R                  5      nUb  UR                  5       nOUR                  5       S S nUu  pxUc  [	        U S5      (       aQ  U R
                  R                  UR                  S   S5      n	[        R                  " U	SUS9n	U	R                  Xx5      nO8[        R                  " U[        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      n
XJ-   nU R!                  U5      nX-   nU R#                  U5      nU R%                  U5      nU$ )Nr/   r1   r   r$   )dimindexr2   device)"create_position_ids_from_input_idsr*   &create_position_ids_from_inputs_embedsrH   hasattrr1   rF   shaperC   gatherrG   rI   r-   rZ   r:   r<   rJ   r=   rA   )rL   rR   r1   r-   rS   rT   input_shape
batch_size
seq_lengthbuffered_token_type_idsr<   
embeddingsrJ   s                rO   forwardData2VecTextEmbeddings.forwardN   sb    $#FF//1G   $JJ=ZjZjk #..*K',,.s3K!,

 !t-..*.*=*=*D*D\EWEWXYEZ\^*_'*/,,7NTU]i*j'!8!?!?
!W!&[

SWSdSdSkSk!l  00;M $ : :> J":
"66|D5
^^J/
\\*-
rQ   c                     U R                  5       SS nUS   n[        R                  " US-   X1-   S-   [        R                  U R                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr/   r$   rY   r   )rH   rC   rD   rI   rZ   	unsqueezerF   )rS   r*   r`   sequence_lengthr-   s        rO   r\   =Data2VecTextEmbeddings.create_position_ids_from_inputs_embeds~   sn     $((*3B/%a.||!O_:Q>ejjYfYmYm
 %%a(//<<rQ   c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-   U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: torch.Tensor x:

Returns: torch.Tensor
r$   rW   )neintrC   cumsumtype_asrI   )rR   r*   rT   maskincremental_indicess        rO   r[   9Data2VecTextEmbeddings.create_position_ids_from_input_ids   sW     ||K(,,.$||Da8@@FI__cgg"'')K77rQ   )r=   rA   r*   rJ   r<   r:   )NNNNr   )r   )__name__
__module____qualname____firstlineno____doc__r4   rC   
LongTensorFloatTensorrn   Tensorre   staticmethodr\   r[   __static_attributes____classcell__rN   s   @rO   r'   r'   7   s    Q
, .2260426&'.##d*. ((4/. &&-	.
 ((4/. !$. 
.` = =" 8 8rQ   r'   modulequerykeyvalueattention_maskscalingrA   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr/            r   rl   )ptrainingr$   )
rH   rC   matmul	transposer5   
functionalsoftmaxrA   r   
contiguous)
r   r   r   r   r   r   rA   r   attn_weightsattn_outputs
             rO   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$rQ   c                      ^  \ rS rSrSU 4S jjr  SS\R                  S\R                  S-  S\S-  S\	\
   S\\R                     4
S	 jjrS
rU =r$ )Data2VecTextSelfAttention   Nc                 N  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        U R                  S-  U l
        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                   " UR"                  5      U l        UR&                  U l        X l        X0l        g Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r   )r3   r4   r8   num_attention_headsr]   
ValueErrorrM   rn   attention_head_sizeall_head_sizer   r5   Linearr   r   r   r?   attention_probs_dropout_probrA   
is_decoder	is_causal	layer_idxrL   rM   r   r   rN   s       rO   r4   "Data2VecTextSelfAttention.__init__   sM    : ::a?PVXhHiHi#F$6$6#7 8 445Q8  #)#=#= #&v'9'9F<V<V'V#W !558P8PP//5YYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF ++""rQ   hidden_statesr   past_key_valuesr   rU   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  " U6 R	                  SS5      nU R                  U5      R                  " U6 R	                  SS5      nU R                  U5      R                  " U6 R	                  SS5      n	UbA  Un
[        U[        5      (       a  UR                  n
U
R                  XU R                  5      u  p[        R                  " U R                  R                  [         5      nU" U UUU	U4U R"                  (       d  SOU R$                  R&                  U R(                  S.UD6u  pUR*                  " / UQSP76 R-                  5       nX4$ )Nr/   r$   r           rA   r   )r^   r   r   viewr   r   r   
isinstancer   self_attention_cacheupdater   r   get_interfacerM   _attn_implementationr   r   rA   r   r   reshaper   )rL   r   r   r   r   r`   hidden_shapequery_layer	key_layervalue_layercurrent_past_key_valuesattention_interfacer   r   s                 rO   re   !Data2VecTextSelfAttention.forward   s    $))#2.CCbC$*B*BC jj/44lCMMaQRSHH]+00,?II!QO	jj/44lCMMaQRS&&5#/+>??*9*N*N' &=%C%CI\`\j\j%k"I(?(M(MKK,,.E)
 %8	%
  $}}C$,,..LL	%
 	%
! "));;;;FFH((rQ   )r   r   rM   rA   r   r   r   r   r   r   r   r   FN)NNrt   ru   rv   rw   r4   rC   r{   rz   r   r   r   tuplere   r}   r~   r   s   @rO   r   r      sl    #6 48(,	')||') ))D0') 	')
 +,') 
u||	') ')rQ   r   c                      ^  \ rS rSrSU 4S jjr   SS\R                  S\R                  S-  S\R                  S-  S\S-  S\	\
   S	\\R                     4S
 jjrSrU =r$ )Data2VecTextCrossAttention   Nc                 ,  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        U R                  S-  U l
        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                   " UR"                  5      U l        X l        X0l        g r   )r3   r4   r8   r   r]   r   rM   rn   r   r   r   r5   r   r   r   r   r?   r   rA   r   r   r   s       rO   r4   #Data2VecTextCrossAttention.__init__  s@    : ::a?PVXhHiHi#F$6$6#7 8 445Q8  #)#=#= #&v'9'9F<V<V'V#W !558P8PP//5YYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF""rQ   r   encoder_hidden_statesr   r   r   rU   c                 z   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nUb%  UR
                  R                  U R                  5      OSn	Ubb  U	(       a[  UR                  R                  U R                     R                  n
UR                  R                  U R                     R                  nO/ UR                   S S QSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUbA  UR                  R                  XU R                  5      u  pSUR
                  U R                  '   [        R                   " U R"                  R$                  [&        5      nU" U UU
UU4U R(                  (       d  SOU R*                  R,                  U R.                  S.UD6u  pUR0                  " / UQSP76 R3                  5       nX4$ )Nr/   r$   r   FTr   r   )r^   r   r   r   r   
is_updatedgetr   cross_attention_cachelayerskeysvaluesr   r   r   r   r   rM   r   r   r   rA   r   r   r   r   )rL   r   r   r   r   r   r`   r   r   r   r   r   kv_shaper   r   r   s                   rO   re   "Data2VecTextCrossAttention.forward  s    $))#2.CCbC$*B*BC jj/44\BLLQPQRGVGb_//33DNNChm
&:'==DDT^^TYYI)??FFt~~V]]KX.44Sb9X2Xt?W?WXH!67<<XFPPQRTUVI**%:;@@JTTUVXYZK*)8)N)N)U)UDNN*&	 >B**4>>:(?(M(MKK,,.E)
 %8	%
  $}}C$,,..LL	%
 	%
! "));;;;FFH((rQ   )r   r   rM   rA   r   r   r   r   r   r   r   r   )NNN)rt   ru   rv   rw   r4   rC   r{   rz   r   r   r   r   re   r}   r~   r   s   @rO   r   r      s    #4 ;?376:1)||1)  %00471) ))D0	1)
 -t31) +,1) 
u||	1) 1)rQ   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )Data2VecTextSelfOutputiL  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr+   )r3   r4   r5   r   r8   denser=   r>   r?   r@   rA   rK   s     rO   r4   Data2VecTextSelfOutput.__init__M  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rQ   r   input_tensorrU   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   rA   r=   rL   r   r   s      rO   re   Data2VecTextSelfOutput.forwardS  5    

=1]3}'CDrQ   r=   r   rA   
rt   ru   rv   rw   r4   rC   r{   re   r}   r~   r   s   @rO   r   r   L  6    >U\\  RWR^R^  rQ   r   c                      ^  \ rS rSrSU 4S jjr    SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\	\
   S
\\R                     4S jjrSrU =r$ )Data2VecTextAttentioniZ  Nc                    > [         TU ]  5         X@l        U(       a  [        O[        nU" XUS9U l        [        U5      U l        g )Nr   r   )r3   r4   is_cross_attentionr   r   rL   r   output)rL   rM   r   r   r   attention_classrN   s         rO   r4   Data2VecTextAttention.__init__[  s9    "48J4Pi#F9U	,V4rQ   r   r   r   encoder_attention_maskr   r   rU   c                     U R                   (       d  UOUnU R                  " U4UUUS.UD6u  pxU R                  Xq5      nXx4$ )N)r   r   r   )r   rL   r   )	rL   r   r   r   r   r   r   attention_outputr   s	            rO   re   Data2VecTextAttention.forwardb  s\     04/F/FLb)-*
"7)+	*

 *
&  ;;'7G--rQ   )r   r   rL   )FNFNNNNr   r   s   @rO   r   r   Z  s    5 48:>;?(,.||. ))D0.  %0047	.
 !& 1 1D 8. . +,. 
u||	. .rQ   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )Data2VecTextIntermediateiw  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r3   r4   r5   r   r8   intermediate_sizer   r   
hidden_actstrr	   intermediate_act_fnrK   s     rO   r4   !Data2VecTextIntermediate.__init__x  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rQ   r   rU   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )rL   r   s     rO   re    Data2VecTextIntermediate.forward  s&    

=100?rQ   r   r   r   s   @rO   r   r   w  s(    9U\\ ell  rQ   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )Data2VecTextOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r3   r4   r5   r   r   r8   r   r=   r>   r?   r@   rA   rK   s     rO   r4   Data2VecTextOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rQ   r   r   rU   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      rO   re   Data2VecTextOutput.forward  r   rQ   r   r   r   s   @rO   r   r     r   rQ   r   c                      ^  \ rS rSrSU 4S jjr    SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\	\
   S
\R                  4S jjrS rSrU =r$ )Data2VecTextLayeri  Nc                   > [         TU ]  5         UR                  U l        SU l        [	        XR
                  US9U l        UR
                  U l        UR                  U l        U R                  (       a0  U R
                  (       d  [        U  S35      e[	        USUSS9U l	        [        U5      U l        [        U5      U l        g )Nr$   r   z> should be used as a decoder model if cross attention is addedFT)r   r   r   )r3   r4   chunk_size_feed_forwardseq_len_dimr   r   	attentionadd_cross_attentionr   crossattentionr   intermediater   r   )rL   rM   r   rN   s      rO   r4   Data2VecTextLayer.__init__  s    '-'E'E$.vARAR^gh ++#)#=#= ##?? D6)g!hii"7##'	#D 5V<(0rQ   r   r   r   r   r   r   rU   c                 2   U R                   " UU4SU0UD6u  pxUn	U R                  (       a?  Ub<  [        U S5      (       d  [        SU  S35      eU R                  " US UU4SU0UD6u  pU
n	[        U R                  U R                  U R                  U	5      nU$ )Nr   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`)	r   r   r]   r   r   r   feed_forward_chunkr   r   )rL   r   r   r   r   r   r   self_attention_output_r   cross_attention_outputlayer_outputs               rO   re   Data2VecTextLayer.forward  s     $(>>$
 ,$
 	$
  1??4@4!122 =dV DD D 
 )-(;(;%%&	)
 !0) )%"  60##T%A%A4CSCSUe
 rQ   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r  r   )rL   r   intermediate_outputr  s       rO   r  $Data2VecTextLayer.feed_forward_chunk  s)    "//0@A{{#6IrQ   )r   r   r   r   r  r   r   r   r   r   )rt   ru   rv   rw   r4   rC   r{   rz   r   r   r   re   r  r}   r~   r   s   @rO   r   r     s    1, 48:>;?(,%||% ))D0%  %0047	%
 !& 1 1D 8% % +,% 
%N rQ   r   c                   X   ^  \ rS rSr\rSrSrSS/rSr	Sr
SrSr\\\S.rU 4S jrSrU =r$ )	Data2VecTextPreTrainedModeli  data2vec_textTData2VecTextForTextEmbeddingsr   )r   
attentionscross_attentionsc                 F  > [         TU ]  U5        [        U[        5      (       a|  [        R
                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        [        R                  " UR                  5        g g )Nr/   r.   )r3   _init_weightsr   r'   initcopy_r-   rC   rD   r^   rF   zeros_r1   )rL   r   rN   s     rO   r  )Data2VecTextPreTrainedModel._init_weights  sp    f%f455JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--. 6rQ    )rt   ru   rv   rw   r%   config_classbase_model_prefixsupports_gradient_checkpointing_no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr   r   r   _can_record_outputsr  r}   r~   r   s   @rO   r  r    sR    %L'&*#8:MNN"&*/6/ /rQ   r  c                      ^  \ rS rSrU 4S jr     SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\	S-  S
\
\   S\\R                     \-  4S jjrSrU =r$ )Data2VecTextEncoderi  c           
         > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        XS9PM     sn5      U l        g s  snf )N)r   )	r3   r4   rM   r5   
ModuleListrangenum_hidden_layersr   layer)rL   rM   irN   s      rO   r4   Data2VecTextEncoder.__init__  sJ    ]]TYZ`ZrZrTs#tTsq$5f$JTs#tu
#ts   ANr   r   r   r   r   	use_cacher   rU   c                     [        U R                  5       H  u  pU	" UUU4UUS.UD6nM     [        UU(       a  US9$ S S9$ )N)r   r   )last_hidden_stater   )	enumerater)  r   )
rL   r   r   r   r   r   r,  r   r*  layer_modules
             rO   re   Data2VecTextEncoder.forward  sg      )4OA(% (> / M  5 9+/8O
 	
>B
 	
rQ   )rM   r)  )NNNNN)rt   ru   rv   rw   r4   rC   r{   rz   r   boolr   r   r   r   re   r}   r~   r   s   @rO   r$  r$    s    v 48:>;?(,!%
||
 ))D0
  %0047	

 !& 1 1D 8
 
 $;
 +,
 
u||	H	H
 
rQ   r$  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )Data2VecTextPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r3   r4   r5   r   r8   r   Tanh
activationrK   s     rO   r4   Data2VecTextPooler.__init__  s9    YYv1163E3EF
'')rQ   r   rU   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ Nr   )r   r7  )rL   r   first_token_tensorpooled_outputs       rO   re   Data2VecTextPooler.forward  s6     +1a40

#566rQ   )r7  r   r   r   s   @rO   r4  r4    s(    $
U\\ ell  rQ   r4  c                     ^  \ rS rSrSS/rSU 4S jjrS rS r\\	\
         SS\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\S-  S\\   S\\R                     \-  4S jj5       5       5       rS rSrU =r$ )Data2VecTextModeli  r'   r   c                    > [         TU ]  U5        Xl        SU l        [	        U5      U l        [        U5      U l        U(       a  [        U5      OSU l	        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
FN)r3   r4   rM   gradient_checkpointingr'   rd   r$  encoderr4  pooler	post_init)rL   rM   add_pooling_layerrN   s      rO   r4   Data2VecTextModel.__init__  sT    
 	 &+#08*624E(04 	rQ   c                 .    U R                   R                  $ r   rd   r:   rL   s    rO   get_input_embeddings&Data2VecTextModel.get_input_embeddings0  s    ...rQ   c                 $    XR                   l        g r   rH  )rL   r   s     rO   set_input_embeddings&Data2VecTextModel.set_input_embeddings3  s    */'rQ   NrR   r   r1   r-   rS   r   r   r   r,  r   rU   c
           
         US L US L-  (       a  [        S5      eU R                  R                  (       a  U	b  U	OU R                  R                  n	OSn	U	(       ab  Uc_  Uc  U R                  R                  (       a.  [        [        U R                  S9[        U R                  S95      O[        U R                  S9nUb  UR                  5       OSnU R                  UUUUUS9nU R                  UUUUUS9u  p'U R                  " U4UUUUU	US.U
D6nUR                  nU R                  b  U R                  U5      OS n[        UUUR                  S9$ )	Nz:You must specify exactly one of input_ids or inputs_embedsF)rM   r   )rR   r-   r1   rS   rT   )r   r   embedding_outputr   r   )r   r   r   r   r,  r-   )r.  pooler_outputr   )r   rM   r   r,  is_encoder_decoderr   r   get_seq_lengthrd   _create_attention_masksrB  r.  rC  r   r   )rL   rR   r   r1   r-   rS   r   r   r   r,  r   rT   rP  encoder_outputssequence_outputr<  s                   rO   re   Data2VecTextModel.forward6  sw     -t";<YZZ;;!!%.%:	@U@UII0 )48V8V $L$DlZ^ZeZeFfg!5  FUE`!?!?!Afg??%)'#9 + 
 261M1M)#9-"7+ 2N 2
. ,,	
)"7#9+%	
 	
 *;;8<8OO4UY;-'+;;
 	
rQ   c                     U R                   R                  (       a  [        U R                   UUUS9nO[        U R                   UUS9nUb  [        U R                   UUUS9nX4$ )N)rM   rS   r   r   )rM   rS   r   )rM   rS   r   r   )rM   r   r   r   )rL   r   r   rP  r   r   s         rO   rT  )Data2VecTextModel._create_attention_masksz  sr     ;;!!/{{.- /	N 7{{.-N "-%>{{.5&;	&" 55rQ   )rM   rd   rB  rA  rC  )T)	NNNNNNNNN)rt   ru   rv   rw   r  r4   rJ  rM  r"   r#   r   rC   r{   r   r2  r   r   r   r   re   rT  r}   r~   r   s   @rO   r?  r?    s2   13FG"/0   *..2.2,0-1596:(,!%?
<<$&?
 t+?
 t+	?

 llT)?
 ||d*?
  %||d2?
 !&t 3?
 ?
 $;?
 +,?
 
u||	K	K?
    ?
B6 6rQ   r?  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )Data2VecTextLMHeadi  z/Data2VecText Head for masked language modeling.c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        [        R                  " [        R                  " UR                  5      5      U l        g r   )r3   r4   r5   r   r8   r   r=   r>   
layer_normr7   decoder	ParameterrC   rG   biasrK   s     rO   r4   Data2VecTextLMHead.__init__  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FGLLV->->!?@	rQ   c                     U R                  U5      n[        U5      nU R                  U5      nU R                  U5      nU$ r   )r   r
   r]  r^  rL   featuresr   xs       rO   re   Data2VecTextLMHead.forward  s;    JJx GOOA LLOrQ   )r`  r^  r   r]  	rt   ru   rv   rw   rx   r4   re   r}   r~   r   s   @rO   r[  r[    s    9A rQ   r[  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )Data2VecTextClassificationHeadi  z-Head for sentence-level classification tasks.c                 b  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        g r   )r3   r4   r5   r   r8   r   classifier_dropoutr@   r?   rA   
num_labelsout_projrL   rM   rk  rN   s      rO   r4   'Data2VecTextClassificationHead.__init__  s    YYv1163E3EF
)/)B)B)NF%%TZTnTn 	 zz"45		&"4"4f6G6GHrQ   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ r:  )rA   r   rC   tanhrm  rc  s       rO   re   &Data2VecTextClassificationHead.forward  sY    Q1WLLOJJqMJJqMLLOMM!rQ   )r   rA   rm  rg  r   s   @rO   ri  ri    s    7I rQ   ri  zX
    Data2VecText Model with a `language modeling` head on top for CLM fine-tuning.
    )custom_introc                     ^  \ rS rSrSSS.rU 4S jrS rS r\\	           SS	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\\
R                        S-  S\S-  S\\
R                   -  S\\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForCausalLMi  /data2vec_text.embeddings.word_embeddings.weightlm_head.biaszlm_head.decoder.weightzlm_head.decoder.biasc                    > [         TU ]  U5        UR                  (       d  [        R	                  S5        [        USS9U l        [        U5      U l        U R                  5         g )NzTIf you want to use `Data2VecTextLMHeadModel` as a standalone, add `is_decoder=True.`FrE  
r3   r4   r   loggerwarningr?  r  r[  lm_headrD  rK   s     rO   r4    Data2VecTextForCausalLM.__init__  sM       NNqr.vO)&1 	rQ   c                 .    U R                   R                  $ r   r~  r^  rI  s    rO   get_output_embeddings-Data2VecTextForCausalLM.get_output_embeddings      ||###rQ   c                 $    XR                   l        g r   r  rL   new_embeddingss     rO   set_output_embeddings-Data2VecTextForCausalLM.set_output_embeddings      -rQ   NrR   r   r1   r-   rS   r   r   labelsr   r,  logits_to_keepr   rU   c                    Ub  Sn
U R                   " U4UUUUUUU	U
SS.	UD6nUR                  n[        U[        5      (       a  [	        U* S5      OUnU R                  USS2USS24   5      nSnUb)  U R                  " SUXR                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
    `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
    ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Example:

```python
>>> from transformers import AutoTokenizer, Data2VecTextForCausalLM, Data2VecTextConfig
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("facebook/data2vec-text-base")
>>> config = Data2VecTextConfig.from_pretrained("facebook/data2vec-text-base")
>>> config.is_decoder = True
>>> model = Data2VecTextForCausalLM.from_pretrained("facebook/data2vec-text-base", config=config)

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> prediction_logits = outputs.logits
```NFT)	r   r1   r-   rS   r   r   r   r,  return_dict)logitsr  r7   )lossr  r   r   r  r  r  )r  r.  r   rn   slicer~  loss_functionrM   r7   r   r   r   r  r  )rL   rR   r   r1   r-   rS   r   r   r  r   r,  r  r   outputsr   slice_indicesr  r  s                     rO   re   Data2VecTextForCausalLM.forward  s    N I@D@R@RA
))%'"7#9+A
 A
  118B>SV8W8W~ot4]kmA}a,?@A%%pVF{{OeOepiopD0#33!//))$55
 	
rQ   r  r~  )NNNNNNNNNNr   )rt   ru   rv   rw   _tied_weights_keysr4   r  r  r!   r   rC   ry   rz   r   r2  rn   r{   r   r   r   re   r}   r~   r   s   @rO   ru  ru    sw    #T .

$.  .237260426:>;?*.BF!%-.F
##d*F
 ))D0F
 ((4/	F

 &&-F
 ((4/F
  %0047F
 !& 1 1D 8F
   4'F
 uU%6%6784?F
 $;F
 ell*F
 +,F
 
2	2F
  F
rQ   ru  c                   t  ^  \ rS rSrSSS.rU 4S jrS rS r\\	        SS	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForMaskedLMi/  rv  rw  rx  c                    > [         TU ]  U5        UR                  (       a  [        R	                  S5        [        USS9U l        [        U5      U l        U R                  5         g )NzsIf you want to use `Data2VecTextForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Frz  r{  rK   s     rO   r4    Data2VecTextForMaskedLM.__init__6  sS     NN1
 /vO)&1 	rQ   c                 .    U R                   R                  $ r   r  rI  s    rO   r  -Data2VecTextForMaskedLM.get_output_embeddingsE  r  rQ   c                 $    XR                   l        g r   r  r  s     rO   r  -Data2VecTextForMaskedLM.set_output_embeddingsH  r  rQ   NrR   r   r1   r-   rS   r   r   r  r   rU   c	                 p   U R                   " U4UUUUUUSS.U	D6n
U
S   nU R                  U5      nSnUba  [        5       nUR                  UR                  5      nU" UR                  SU R                  R                  5      UR                  S5      5      n[        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
T)r   r1   r-   rS   r   r   r  r   Nr/   r  r  r   r  )r  r~  r   torZ   r   rM   r7   r   r   r  )rL   rR   r   r1   r-   rS   r   r   r  r   r  rV  prediction_scoresmasked_lm_lossloss_fcts                  rO   re   Data2VecTextForMaskedLM.forwardK  s    ( $$

))%'"7#9

 

 "!* LL9')HYY0778F%&7&<&<RAWAW&XZ`ZeZefhZijN$!//))	
 	
rQ   r  )NNNNNNNN)rt   ru   rv   rw   r  r4   r  r  r!   r   rC   ry   rz   r   r   r   r   re   r}   r~   r   s   @rO   r  r  /  s    #T .
$.  .237260426:>;?*.,
##d*,
 ))D0,
 ((4/	,

 &&-,
 ((4/,
  %0047,
 !& 1 1D 8,
   4',
 +,,
 
	,
  ,
rQ   r  z
    Data2VecText Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )%Data2VecTextForSequenceClassificationi|  c                    > [         TU ]  U5        UR                  U l        Xl        [	        USS9U l        [        U5      U l        U R                  5         g NFrz  )	r3   r4   rl  rM   r?  r  ri  
classifierrD  rK   s     rO   r4   .Data2VecTextForSequenceClassification.__init__  sI      ++.vO8@ 	rQ   NrR   r   r1   r-   rS   r  r   rU   c           	         U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n
SnUGb  UR                  U
R                  5      nU R                  R
                  c  U R                  S:X  a  SU R                  l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R
                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                  R
                  S:X  a=  [        5       nU" U
R                  S	U R                  5      UR                  S	5      5      nO,U R                  R
                  S:X  a  [        5       nU" X5      n[!        UU
UR"                  UR$                  S
9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Tr   r1   r-   rS   r  r   Nr$   
regressionsingle_label_classificationmulti_label_classificationr/   r  )r  r  r  rZ   rM   problem_typerl  r2   rC   rI   rn   r   squeezer   r   r   r   r   r  rL   rR   r   r1   r-   rS   r  r   r  rV  r  r  r  s                rO   re   -Data2VecTextForSequenceClassification.forward  s   $ $$
))%'
 
 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rQ   )r  rM   r  rl  NNNNNN)rt   ru   rv   rw   r4   r!   r   rC   ry   rz   r   r   r   r   re   r}   r~   r   s   @rO   r  r  |  s    	  .237260426*.:
##d*:
 ))D0:
 ((4/	:

 &&-:
 ((4/:
   4':
 +,:
 
)	):
  :
rQ   r  c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForMultipleChoicei  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g )Nr$   )r3   r4   r?  r  r5   r?   r@   rA   r   r8   r  rD  rK   s     rO   r4   &Data2VecTextForMultipleChoice.__init__  sW     .v6zz&"<"<=))F$6$6: 	rQ   NrR   r1   r   r  r-   rS   r   rU   c           	      *   Ub  UR                   S   OUR                   S   nUb!  UR                  SUR                  S5      5      OSn	Ub!  UR                  SUR                  S5      5      OSn
Ub!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb1  UR                  SUR                  S5      UR                  S5      5      OSnU R                  " U	4U
UUUSS.UD6nUS   nU R	                  U5      nU R                  U5      nUR                  SU5      nSnUb.  [        5       nUR                  UR                  5      nU" UU5      n[        UUUR                  UR                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
Nr$   r/   T)r-   r1   r   rS   r  r  )r^   r   rH   r  rA   r  r   r  rZ   r   r   r  )rL   rR   r1   r   r  r-   rS   r   num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsr  r<  r  reshaped_logitsr  r  s                       rO   re   %Data2VecTextForMultipleChoice.forward  s   T -6,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrvR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 $$
*..,
 
  
]3/ ++b+6')HYY556FOV4D("!//))	
 	
rQ   )r  r  rA   r  )rt   ru   rv   rw   r4   r!   r   rC   ry   rz   r   r   r   r   re   r}   r~   r   s   @rO   r  r    s      .22637*.0426O
##d*O
 ((4/O
 ))D0	O

   4'O
 &&-O
 ((4/O
 +,O
 
*	*O
  O
rQ   r  c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )"Data2VecTextForTokenClassificationi-  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )r3   r4   rl  r?  r  rk  r@   r5   r?   rA   r   r8   r  rD  rn  s      rO   r4   +Data2VecTextForTokenClassification.__init__/  s      ++.vO)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rQ   NrR   r   r1   r-   rS   r  r   rU   c           	      z   U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n	U R                  U	5      n
SnUbW  [        5       nUR	                  U
R
                  5      nU" U
R                  SU R                  5      UR                  S5      5      n[        UU
UR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Tr  r   Nr/   r  )r  rA   r  r   r  rZ   r   rl  r   r   r  r  s                rO   re   *Data2VecTextForTokenClassification.forward=  s      $$
))%'
 
 "!*,,71')HYYv}}-FFKKDOO<fkk"oND$!//))	
 	
rQ   )r  r  rA   rl  r  )rt   ru   rv   rw   r4   r!   r   rC   ry   rz   r   r   r   r   re   r}   r~   r   s   @rO   r  r  -  s      .237260426*.)
##d*)
 ))D0)
 ((4/	)

 &&-)
 ((4/)
   4')
 +,)
 
&	&)
  )
rQ   r  c                   >  ^  \ rS rSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\
\   S\\-  4S jj5       5       rSrU =r$ ) Data2VecTextForQuestionAnsweringik  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  )
r3   r4   rl  r?  r  r5   r   r8   
qa_outputsrD  rK   s     rO   r4   )Data2VecTextForQuestionAnswering.__init__m  sV      ++.vO))F$6$68I8IJ 	rQ   NrR   r   r1   r-   rS   start_positionsend_positionsr   rU   c           	         U R                   " U4UUUUSS.UD6n	U	S   n
U R                  U
5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  n[        UUUU	R                  U	R                  S	9$ )
NTr  r   r$   r/   rl   )ignore_indexr   )r  start_logits
end_logitsr   r  )r  r  splitr  r   lenrH   clampr   r   r   r  )rL   rR   r   r1   r-   rS   r  r  r   r  rV  r  r  r  
total_lossignored_indexr  
start_lossend_losss                      rO   re   (Data2VecTextForQuestionAnswering.forwardw  sz    $$
))%'
 
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
rQ   )r  rl  r  )NNNNNNN)rt   ru   rv   rw   r4   r!   r   rC   ry   rz   r   r   r   r   re   r}   r~   r   s   @rO   r  r  k  s      .23726042637153
##d*3
 ))D03
 ((4/	3

 &&-3
 ((4/3
 ))D03
 ''$.3
 +,3
 
-	-3
  3
rQ   r  )ru  r  r  r  r  r  r?  r  )Nr   )Pcollections.abcr   rC   torch.nnr5   r   r   r    r   r  activationsr	   r
   cache_utilsr   r   r   
generationr   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r    utils.genericr!   r"   utils.output_capturingr#   configuration_data2vec_textr%   
get_loggerrt   r|  Moduler'   r{   floatr   r   r   r   r   r   r   r   r  r$  r4  r?  r[  ri  ru  r  r  r  r  r  __all__r  rQ   rO   <module>r     s  * %   A A & ' C C ) J 9	 	 	 G & 6 @ @ I 5 ; 
		H	%g8RYY g8` !%II%<<% 
% <<	%
 LL4'% T\% % '(%8@)		 @)FI) I)XRYY .BII .:ryy  >2 >B // / /,
")) 
@  |63 |6 |6~ ,RYY , 
`
9? `

`
F I
9 I
 I
X H
,G H
H
V \
$? \
 \
~ :
)D :
 :
z @
'B @
 @
F	rQ   