
    Z j4                        S SK r S SK Jr  S SKJrJrJr  SSKJr  SSKJ	r	  SSK
JrJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJr  SSKJrJrJrJr  \" SS9\ " S S\5      5       5       r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r#\ " S S\"5      5       r$\ " S S\"5      5       r%\ " S S \"5      5       r&/ S!Qr'g)"    N)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )strict)create_bidirectional_mask)BaseModelOutputMaskedLMOutputSequenceClassifierOutputTokenClassifierOutput)RopeParameters)Unpack)auto_docstring)TransformersKwargscan_return_tuple   )LlamaConfig)LlamaAttention
LlamaModelLlamaPreTrainedModelLlamaRMSNormzEuroBERT/EuroBERT-210m)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   S
r\\S'   Sr\S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\\\   -  S-  \S'   Sr\S-  \S'   Sr\\S'   Sr\\S'   S r\\S!'   Sr\\-  S-  \S"'   S r\\S#'   S$r \\-  \S%'   S r!\\S&'   Sr"\S-  \S''   S(r#\\S)'   U 4S* jr$S+r%U =r&$ ),EuroBertConfig   aM  
mask_token_id (`int`, *optional*, defaults to 128002):
    Mask token id.
classifier_pooling (`str`, *optional*, defaults to `"late"`):
    The pooling strategy to use for the classifier. Can be one of ['bos', 'mean', 'late'].

```python
>>> from transformers import EuroBertModel, EuroBertConfig

>>> # Initializing a EuroBert eurobert-base style configuration
>>> configuration = EuroBertConfig()

>>> # Initializing a model from the eurobert-base style configuration
>>> model = EuroBertModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```euroberti  
vocab_sizei   hidden_sizei   intermediate_size   num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti    max_position_embeddingsg{Gz?initializer_rangeh㈵>rms_norm_epsi  bos_token_idi eos_token_idpad_token_idi mask_token_id   pretraining_tpFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutmlp_biashead_dimlateclassifier_poolingc                 b   > U R                   c  U R                  U l         [        TU ]  " S0 UD6  g )N )r$   r#   super__post_init__)selfkwargs	__class__s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/eurobert/modular_eurobert.pyr<   EuroBertConfig.__post_init__N   s-    ##+'+'?'?D$''    )r$   )'__name__
__module____qualname____firstlineno____doc__
model_typer   int__annotations__r   r    r"   r#   r$   r&   strr'   r(   floatr*   r+   r,   listr-   r.   r0   r1   boolr2   r   dictr3   r4   r5   r6   r8   r<   __static_attributes____classcell__r?   s   @r@   r   r      s)   & JJK!s!s!!&*t*J#'S'#u#L%%L#*%+1L#S	/D(1%L#*%M3NC %%48O^d*T18 ND %(sU{(HdHcDj$$( (rB   r   c                   ,   ^  \ rS rSrSU 4S jjrSrU =r$ )EuroBertRMSNormT   c                 $   > [         TU ]  X5        g N)r;   __init__)r=   r   epsr?   s      r@   rX   EuroBertRMSNorm.__init__U   s    *rB   r:   )r)   )rC   rD   rE   rF   rX   rP   rQ   rR   s   @r@   rT   rT   T   s    + +rB   rT   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )EuroBertAttentionY   config	layer_idxc                 2   > [         TU ]  X5        SU l        g )NF)r;   rX   	is_causal)r=   r^   r_   r?   s      r@   rX   EuroBertAttention.__init__Z   s    +rB   )ra   )	rC   rD   rE   rF   r   rI   rX   rP   rQ   rR   s   @r@   r\   r\   Y   s    ~ #  rB   r\   c                       \ rS rSrSrg)EuroBertPreTrainedModel_   r:   N)rC   rD   rE   rF   rP   r:   rB   r@   rd   rd   _   s    rB   rd   c                       \ rS rSr    SS\R
                  S\R                  S-  S\R
                  S-  S\R                  S-  S\\	   S\
\-  4S	 jjrS
rg)EuroBertModelc   N	input_idsattention_maskposition_idsinputs_embedsr>   returnc                    US L US L-  (       a  [        S5      eUc  U R                  U5      nUc;  [        R                  " UR                  S   UR
                  S9R                  S5      n[        U R                  UUS9nUnU R                  XsS9nU R                  S U R                  R                    H  n	U	" U4UUUS.UD6nM     U R                  U5      n[        US9$ )	Nz:You must specify exactly one of input_ids or inputs_embedsr/   )devicer   )r^   rl   rj   )rk   )rj   position_embeddingsrk   )last_hidden_state)
ValueErrorembed_tokenstorcharangeshapero   	unsqueezer	   r^   
rotary_emblayersr"   normr
   )
r=   ri   rj   rk   rl   r>   bidirectional_maskhidden_statesrp   encoder_layers
             r@   forwardEuroBertModel.forwardd   s    -t";<YZZ *.*;*;I*FM <<(;(;A(>}G[G[\ffghiL6;;')
 &"oomoW![[)H4;;+H+HIM)1$7)	
 M J 		-0+
 	
rB   r:   )NNNN)rC   rD   rE   rF   rt   
LongTensorTensorFloatTensorr   r   tupler
   r~   rP   r:   rB   r@   rg   rg   c   s     '+.20426&
##&
 t+&
 &&-	&

 ((4/&
 +,&
 
	 &
 &
rB   rg   c                   @  ^  \ rS rSrSS0rSS0rSS/S/40rS\4U 4S	 jjr\	\
     SS\R                  S
-  S\R                  S
-  S\R                  S
-  S\R                  S
-  S\R                  S
-  S\\   S\\R                     \-  4S jj5       5       rSrU =r$ )EuroBertForMaskedLM   zlm_head.weightzmodel.embed_tokens.weightlm_headcolwise_gather_outputr|   logitsr^   c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  UR                  5      U l	        U R                  5         g rW   )r;   rX   rg   modelr   Linearr   r   r5   r   	post_initr=   r^   r?   s     r@   rX   EuroBertForMaskedLM.__init__   sL     "6*
yy!3!3V5F5FX 	rB   Nri   rj   rk   rl   labelsr>   rm   c                    U R                   " SUUUUS.UD6nU R                  UR                  5      nSn	Ub)  U R                  " SXU R                  R
                  S.UD6n	[        U	UUR                  UR                  S9$ )a  
Example:

```python
>>> from transformers import AutoTokenizer, EuroBertForMaskedLM

>>> model = EuroBertForMaskedLM.from_pretrained("EuroBERT/EuroBERT-210m")
>>> tokenizer = AutoTokenizer.from_pretrained("EuroBERT/EuroBERT-210m")

>>> text = "The capital of France is <|mask|>."
>>> inputs = tokenizer(text, return_tensors="pt")
>>> outputs = model(**inputs)

>>> # To get predictions for the mask:
>>> masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
>>> predicted_token_id = outputs.logits[0, masked_index].argmax(axis=-1)
>>> predicted_token = tokenizer.decode(predicted_token_id)
>>> print("Predicted token:", predicted_token)
Predicted token:  Paris
```)ri   rj   rk   rl   N)r   r   r   lossr   r|   
attentionsr:   )	r   r   rq   loss_functionr^   r   r   r|   r   )
r=   ri   rj   rk   rl   r   r>   outputsr   r   s
             r@   r~   EuroBertForMaskedLM.forward   s    > $(:: $
)%'	$

 $
 g778%%pVt{{OeOepiopD!//))	
 	
rB   )r   r   NNNNN)rC   rD   rE   rF   _tied_weights_keys_tp_plan_pp_planr   rX   r   r   rt   r   r   r   r   r   r   r   r~   rP   rQ   rR   s   @r@   r   r      s    *,GH23H_-z:;H~   .2.20426*./
##d*/
 t+/
 &&-	/

 ((4//
   4'/
 +,/
 
u||	~	-/
  /
rB   r   c                      ^  \ rS rSrS\4U 4S jjr\\     SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\\   S\\R                     \-  4S jj5       5       rSrU =r$ )!EuroBertForSequenceClassification   r^   c                   > [         TU ]  U5        UR                  U l        UR                  U l        [	        U5      U l        [        R                  " UR                  UR                  5      U l	        [        R                  " 5       U l        [        R                  " UR                  U R                  5      U l        U R                  5         g rW   )r;   rX   
num_labelsr8   rg   r   r   r   r   denseGELU
activation
classifierr   r   s     r@   rX   *EuroBertForSequenceClassification.__init__   s      ++"(";";"6*
YYv1163E3EF
'')))F$6$6HrB   Nri   rj   rk   rl   r   r>   rm   c                    U R                   " U4UUUS.UD6nUS   nU R                  S;   a  U R                  S:X  a
  US S 2S4   n	OpU R                  S:X  a`  Uc  UR                  SS9n	OMUR                  UR                  5      nXR                  S5      -  R                  SS9n	XR                  SS	S
9-  n	U R                  W	5      n	U R                  U	5      n	U R                  U	5      n
OU R                  S:X  a  U R                  U5      nU R                  U5      nU R                  U5      n
Uc  U
R                  SS9n
OMUR                  U
R                  5      nXR                  S5      -  R                  SS9n
XR                  SS	S
9-  n
S nUGb  UR                  W
R                  5      nU R                  R                  c  U R                  S:X  a  SU R                  l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                   :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [#        5       nU R                  S:X  a&  U" U
R%                  5       UR%                  5       5      nOU" X5      nOU R                  R                  S:X  a=  ['        5       nU" U
R)                  SU R                  5      UR)                  S5      5      nO,U R                  R                  S:X  a  [+        5       nU" X5      n[-        UW
UR.                  UR0                  S9$ )Nrj   rk   rl   r   )bosmeanr   r   r/   )dimT)r   keepdimr7   
regressionsingle_label_classificationmulti_label_classificationr   )r   r8   r   toro   rw   sumr   r   r   r^   problem_typer   dtypert   longrI   r   squeezer   viewr   r   r|   r   )r=   ri   rj   rk   rl   r   r>   encoder_outputrq   pooled_outputr   xr   loss_fcts                 r@   r~   )EuroBertForSequenceClassification.forward   s    
)%'	

 
 +1-""o5&&%/ 1!Q$ 7((F2!)$5$:$:q$:$AM%3%6%67H7O7O%PN%69Q9QRT9U%U$Z$Z_`$Z$aM!%7%7At%7%LLM JJ}5M OOM:M__]3F$$.

,-A"A__Q'F%+!/!2!26==!A #;#;B#??DDDK,,D,AAYYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'(66%00	
 	
rB   )r   r   r8   r   r   r   r   )rC   rD   rE   rF   r   rX   r   r   rt   r   r   r   r   r   r   r   r~   rP   rQ   rR   s   @r@   r   r      s    	~ 	  .2.20426*.J
##d*J
 t+J
 &&-	J

 ((4/J
   4'J
 +,J
 
u||	7	7J
  J
rB   r   c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	     SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )EuroBertForTokenClassificationi+  r^   c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g rW   )
r;   rX   r   rg   r   r   r   r   r   r   r   s     r@   rX   'EuroBertForTokenClassification.__init__-  sQ      ++"6*
))F$6$68I8IJrB   c                 .    U R                   R                  $ rW   r   rs   )r=   s    r@   get_input_embeddings3EuroBertForTokenClassification.get_input_embeddings5  s    zz&&&rB   c                 $    XR                   l        g rW   r   )r=   values     r@   set_input_embeddings3EuroBertForTokenClassification.set_input_embeddings8  s    "'

rB   Nri   rj   rk   rl   r   r>   rm   c                    U R                   " U4UUUS.UD6nUS   nU R                  U5      n	Sn
Ub<  [        5       nU" U	R                  SU R                  5      UR                  S5      5      n
[        U
U	UR                  UR                  S9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
r   r   Nr   r   )r   r   r   r   r   r   r|   r   )r=   ri   rj   rk   rl   r   r>   r   sequence_outputr   r   r   s               r@   r~   &EuroBertForTokenClassification.forward;  s    " **
)%'	

 
 "!*1')HFKKDOO<fkk"oND$!//))	
 	
rB   )r   r   r   r   )rC   rD   rE   rF   r   rX   r   r   r   r   rt   r   r   r   r   r   r   r   r~   rP   rQ   rR   s   @r@   r   r   +  s    ~ '(  .2.20426*.#
##d*#
 t+#
 &&-	#

 ((4/#
   4'#
 +,#
 
&	&#
  #
rB   r   )r   rd   rg   r   r   r   )(rt   r   torch.nnr   r   r   configuration_utilsr   masking_utilsr	   modeling_outputsr
   r   r   r   modeling_rope_utilsr   processing_utilsr   utilsr   utils.genericr   r   llama.configuration_llamar   llama.modeling_llamar   r   r   r   r   rT   r\   rd   rg   r   r   r   __all__r:   rB   r@   <module>r      s       A A ) 6 p p 1 & # A 3 a a 340([ 0(  50(f+l +
 	2 	'
J '
T >
1 >
 >
B X
(? X
 X
v 4
%< 4
 4
nrB   