
    Z jY                        S r SSKrSSKJr  SSKJrJrJr  SSKJr	  SSK
Jr  SSKJrJrJrJrJrJrJr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%  SSK&J'r'  \RP                  " \)5      r* " S S\!5      r+ " S S\%5      r, " S S\ 5      r- " S S\"5      r.\ " S S\5      5       r/\ " S S\$5      5       r0 " S S\#5      r1 " S S\5      r2\" S S!9 " S" S#\/\5      5       r3\ " S$ S%\/5      5       r4\" S&S!9 " S' S(\/5      5       r5\ " S) S*\/5      5       r6\ " S+ S,\/5      5       r7\ " S- S.\/5      5       r8/ S/Qr9g)0zPyTorch Data2VecText model.    N)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)GenerationMixin),BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)can_return_tuple   )RobertaClassificationHeadRobertaCrossAttentionRobertaEmbeddingsRobertaLayerRobertaLMHeadRobertaModelRobertaSelfAttention   )Data2VecTextConfigc                       \ rS rSrSrg)Data2VecTextEmbeddings2    N__name__
__module____qualname____firstlineno____static_attributes__r#       ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/data2vec/modular_data2vec_text.pyr!   r!   2       r*   r!   c                       \ rS rSrSrg)Data2VecTextSelfAttention6   r#   Nr$   r#   r*   r+   r.   r.   6   r,   r*   r.   c                       \ rS rSrSrg)Data2VecTextCrossAttention:   r#   Nr$   r#   r*   r+   r1   r1   :   r,   r*   r1   c                       \ rS rSrSrg)Data2VecTextLayer>   r#   Nr$   r#   r*   r+   r4   r4   >   r,   r*   r4   c                   X   ^  \ rS rSr\rSrSrSS/rSr	Sr
SrSr\\\S.rU 4S jrSrU =r$ )	Data2VecTextPreTrainedModelB   data2vec_textTData2VecTextForTextEmbeddingsr4   )hidden_states
attentionscross_attentionsc                 F  > [         TU ]  U5        [        U[        5      (       a|  [        R
                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        [        R                  " UR                  5        g g )N)r   r?   )super_init_weights
isinstancer!   initcopy_position_idstorcharangeshapeexpandzeros_token_type_ids)selfmodule	__class__s     r+   rA   )Data2VecTextPreTrainedModel._init_weightsR   sp    f%f455JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--. 6r*   r#   )r%   r&   r'   r(   r   config_classbase_model_prefixsupports_gradient_checkpointing_no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr4   r.   r1   _can_record_outputsrA   r)   __classcell__rN   s   @r+   r7   r7   B   sR    %L'&*#8:MNN"&*/6/ /r*   r7   c                       \ rS rSrSrg)Data2VecTextModelY   r#   Nr$   r#   r*   r+   r\   r\   Y   s    r*   r\   c                       \ rS rSrSrg)Data2VecTextLMHead^   r#   Nr$   r#   r*   r+   r_   r_   ^   r,   r*   r_   c                       \ rS rSrSrg)Data2VecTextClassificationHeadb   r#   Nr$   r#   r*   r+   rb   rb   b   r,   r*   rb   zX
    Data2VecText Model with a `language modeling` head on top for CLM fine-tuning.
    )custom_introc                     ^  \ rS rSrSSS.rU 4S jrS rS r\\	           SS	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\\
R                        S-  S\S-  S\\
R                   -  S\\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForCausalLMf   /data2vec_text.embeddings.word_embeddings.weightlm_head.biaszlm_head.decoder.weightzlm_head.decoder.biasc                    > [         TU ]  U5        UR                  (       d  [        R	                  S5        [        USS9U l        [        U5      U l        U R                  5         g )NzTIf you want to use `Data2VecTextLMHeadModel` as a standalone, add `is_decoder=True.`Fadd_pooling_layer
r@   __init__
is_decoderloggerwarningr\   r9   r_   lm_head	post_initrL   configrN   s     r+   ro    Data2VecTextForCausalLM.__init__q   sM       NNqr.vO)&1 	r*   c                 .    U R                   R                  $ Nrs   decoderrL   s    r+   get_output_embeddings-Data2VecTextForCausalLM.get_output_embeddings}       ||###r*   c                 $    XR                   l        g ry   rz   rL   new_embeddingss     r+   set_output_embeddings-Data2VecTextForCausalLM.set_output_embeddings       -r*   N	input_idsattention_maskrK   rE   inputs_embedsencoder_hidden_statesencoder_attention_masklabelspast_key_values	use_cachelogits_to_keepkwargsreturnc                    Ub  Sn
U R                   " U4UUUUUUU	U
SS.	UD6nUR                  n[        U[        5      (       a  [	        U* S5      OUnU R                  USS2USS24   5      nSnUb)  U R                  " SUXR                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
    `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
    ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Example:

```python
>>> from transformers import AutoTokenizer, Data2VecTextForCausalLM, Data2VecTextConfig
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("facebook/data2vec-text-base")
>>> config = Data2VecTextConfig.from_pretrained("facebook/data2vec-text-base")
>>> config.is_decoder = True
>>> model = Data2VecTextForCausalLM.from_pretrained("facebook/data2vec-text-base", config=config)

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> prediction_logits = outputs.logits
```NFT)	r   rK   rE   r   r   r   r   r   return_dict)logitsr   
vocab_size)lossr   r   r;   r<   r=   r#   )r9   last_hidden_staterB   intslicers   loss_functionrv   r   r
   r   r;   r<   r=   )rL   r   r   rK   rE   r   r   r   r   r   r   r   r   outputsr;   slice_indicesr   r   s                     r+   forwardData2VecTextForCausalLM.forward   s    N I@D@R@RA
))%'"7#9+A
 A
  118B>SV8W8W~ot4]kmA}a,?@A%%pVF{{OeOepiopD0#33!//))$55
 	
r*   r9   rs   )NNNNNNNNNNr   )r%   r&   r'   r(   _tied_weights_keysro   r}   r   r   r   rF   
LongTensorFloatTensortupleboolr   Tensorr   r   r
   r   r)   rY   rZ   s   @r+   rf   rf   f   sw    #T .

$.  .237260426:>;?*.BF!%-.F
##d*F
 ))D0F
 ((4/	F

 &&-F
 ((4/F
  %0047F
 !& 1 1D 8F
   4'F
 uU%6%6784?F
 $;F
 ell*F
 +,F
 
2	2F
  F
r*   rf   c                   t  ^  \ rS rSrSSS.rU 4S jrS rS r\\	        SS	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForMaskedLM   rh   ri   rj   c                    > [         TU ]  U5        UR                  (       a  [        R	                  S5        [        USS9U l        [        U5      U l        U R                  5         g )NzsIf you want to use `Data2VecTextForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Frl   rn   ru   s     r+   ro    Data2VecTextForMaskedLM.__init__   sS     NN1
 /vO)&1 	r*   c                 .    U R                   R                  $ ry   rz   r|   s    r+   r}   -Data2VecTextForMaskedLM.get_output_embeddings   r   r*   c                 $    XR                   l        g ry   rz   r   s     r+   r   -Data2VecTextForMaskedLM.set_output_embeddings   r   r*   Nr   r   rK   rE   r   r   r   r   r   r   c	                 p   U R                   " U4UUUUUUSS.U	D6n
U
S   nU R                  U5      nSnUba  [        5       nUR                  UR                  5      nU" UR                  SU R                  R                  5      UR                  S5      5      n[        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
T)r   rK   rE   r   r   r   r   r   Nr?   r   r   r;   r<   )r9   rs   r   todeviceviewrv   r   r   r;   r<   )rL   r   r   rK   rE   r   r   r   r   r   r   sequence_outputprediction_scoresmasked_lm_lossloss_fcts                  r+   r   Data2VecTextForMaskedLM.forward   s    ( $$

))%'"7#9

 

 "!* LL9')HYY0778F%&7&<&<RAWAW&XZ`ZeZefhZijN$!//))	
 	
r*   r   )NNNNNNNN)r%   r&   r'   r(   r   ro   r}   r   r   r   rF   r   r   r   r   r   r   r   r)   rY   rZ   s   @r+   r   r      s    #T .
$.  .237260426:>;?*.,
##d*,
 ))D0,
 ((4/	,

 &&-,
 ((4/,
  %0047,
 !& 1 1D 8,
   4',
 +,,
 
	,
  ,
r*   r   z
    Data2VecText Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )%Data2VecTextForSequenceClassificationi  c                    > [         TU ]  U5        UR                  U l        Xl        [	        USS9U l        [        U5      U l        U R                  5         g NFrl   )	r@   ro   
num_labelsrv   r\   r9   rb   
classifierrt   ru   s     r+   ro   .Data2VecTextForSequenceClassification.__init__"  sI      ++.vO8@ 	r*   Nr   r   rK   rE   r   r   r   r   c           	         U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n
SnUGb  UR                  U
R                  5      nU R                  R
                  c  U R                  S:X  a  SU R                  l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R
                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                  R
                  S:X  a=  [        5       nU" U
R                  S	U R                  5      UR                  S	5      5      nO,U R                  R
                  S:X  a  [        5       nU" X5      n[!        UU
UR"                  UR$                  S
9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Tr   rK   rE   r   r   r   Nr   
regressionsingle_label_classificationmulti_label_classificationr?   r   )r9   r   r   r   rv   problem_typer   dtyperF   longr   r   squeezer   r   r   r   r;   r<   rL   r   r   rK   rE   r   r   r   r   r   r   r   r   s                r+   r   -Data2VecTextForSequenceClassification.forward-  s   $ $$
))%'
 
 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
r*   )r   rv   r9   r   NNNNNN)r%   r&   r'   r(   ro   r   r   rF   r   r   r   r   r   r   r   r)   rY   rZ   s   @r+   r   r     s    	  .237260426*.:
##d*:
 ))D0:
 ((4/	:

 &&-:
 ((4/:
   4':
 +,:
 
)	):
  :
r*   r   c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )Data2VecTextForMultipleChoiceil  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g )Nr   )r@   ro   r\   r9   nnDropouthidden_dropout_probdropoutLinearhidden_sizer   rt   ru   s     r+   ro   &Data2VecTextForMultipleChoice.__init__n  sW     .v6zz&"<"<=))F$6$6: 	r*   Nr   rK   r   r   rE   r   r   r   c           	      *   Ub  UR                   S   OUR                   S   nUb!  UR                  SUR                  S5      5      OSn	Ub!  UR                  SUR                  S5      5      OSn
Ub!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb1  UR                  SUR                  S5      UR                  S5      5      OSnU R                  " U	4U
UUUSS.UD6nUS   nU R	                  U5      nU R                  U5      nUR                  SU5      nSnUb.  [        5       nUR                  UR                  5      nU" UU5      n[        UUUR                  UR                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
Nr   r?   T)rE   rK   r   r   r   r   )rH   r   sizer9   r   r   r   r   r   r   r;   r<   )rL   r   rK   r   r   rE   r   r   num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsr   pooled_outputr   reshaped_logitsr   r   s                       r+   r   %Data2VecTextForMultipleChoice.forwardx  s   T -6,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrvR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 $$
*..,
 
  
]3/ ++b+6')HYY556FOV4D("!//))	
 	
r*   )r   r9   r   r   )r%   r&   r'   r(   ro   r   r   rF   r   r   r   r   r   r   r   r)   rY   rZ   s   @r+   r   r   l  s      .22637*.0426O
##d*O
 ((4/O
 ))D0	O

   4'O
 &&-O
 ((4/O
 +,O
 
*	*O
  O
r*   r   c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )"Data2VecTextForTokenClassificationi  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r   )r@   ro   r   r\   r9   classifier_dropoutr   r   r   r   r   r   r   rt   )rL   rv   r   rN   s      r+   ro   +Data2VecTextForTokenClassification.__init__  s      ++.vO)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r*   Nr   r   rK   rE   r   r   r   r   c           	      z   U R                   " U4UUUUSS.UD6nUS   n	U R                  U	5      n	U R                  U	5      n
SnUbW  [        5       nUR	                  U
R
                  5      nU" U
R                  SU R                  5      UR                  S5      5      n[        UU
UR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Tr   r   Nr?   r   )r9   r   r   r   r   r   r   r   r   r;   r<   r   s                r+   r   *Data2VecTextForTokenClassification.forward  s      $$
))%'
 
 "!*,,71')HYYv}}-FFKKDOO<fkk"oND$!//))	
 	
r*   )r   r9   r   r   r   )r%   r&   r'   r(   ro   r   r   rF   r   r   r   r   r   r   r   r)   rY   rZ   s   @r+   r   r     s      .237260426*.)
##d*)
 ))D0)
 ((4/	)

 &&-)
 ((4/)
   4')
 +,)
 
&	&)
  )
r*   r   c                   >  ^  \ rS rSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\
\   S\\-  4S jj5       5       rSrU =r$ ) Data2VecTextForQuestionAnsweringi
  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )
r@   ro   r   r\   r9   r   r   r   
qa_outputsrt   ru   s     r+   ro   )Data2VecTextForQuestionAnswering.__init__  sV      ++.vO))F$6$68I8IJ 	r*   Nr   r   rK   rE   r   start_positionsend_positionsr   r   c           	         U R                   " U4UUUUSS.UD6n	U	S   n
U R                  U
5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  n[        UUUU	R                  U	R                  S	9$ )
NTr   r   r   r?   )dim)ignore_indexr   )r   start_logits
end_logitsr;   r<   )r9   r   splitr   
contiguouslenr   clampr   r   r;   r<   )rL   r   r   rK   rE   r   r   r   r   r   r   r   r   r   
total_lossignored_indexr   
start_lossend_losss                      r+   r   (Data2VecTextForQuestionAnswering.forward  sz    $$
))%'
 
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
r*   )r9   r   r   )NNNNNNN)r%   r&   r'   r(   ro   r   r   rF   r   r   r   r   r   r   r   r)   rY   rZ   s   @r+   r   r   
  s      .23726042637153
##d*3
 ))D03
 ((4/	3

 &&-3
 ((4/3
 ))D03
 ''$.3
 +,3
 
-	-3
  3
r*   r   )rf   r   r   r   r   r   r\   r7   ):__doc__rF   torch.nnr   r   r   r    r   rC   
generationr   modeling_outputsr	   r
   r   r   r   r   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   roberta.modeling_robertar   r   r   r   r   r   r   configuration_data2vec_textr   
get_loggerr%   rq   r!   r.   r1   r4   r7   r\   r_   rb   rf   r   r   r   r   r   __all__r#   r*   r+   <module>r
     s   "   A A & )   . & @ @ -   < 
		H	%	. 		 4 		!6 		 	 // / /, 	 	 		 		%> 	 
`
9? `

`
F I
9 I
 I
X H
,G H
H
V \
$? \
 \
~ :
)D :
 :
z @
'B @
 @
F	r*   