
    Z j                     d    S SK JrJr  S SKJr  S SKJr  \" SS9\ " S S\5      5       5       rS/rg)	   )PreTrainedConfigstrict)RopeParameters)auto_docstringzEuroBERT/EuroBERT-210m)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSSSSS.rS/S	/4S
S/S
/4S
/S
/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
S-  \S'   Sr\\S'   Sr\
\S'   Sr\\S'   Sr\\S'   S r\\S!'   S"r\
S-  \S#'   S$r\
S-  \S%'   S"r\
\\
   -  S-  \S&'   S'r\
\S('   S)r\\S*'   Sr\ \!-  S-  \S+'   S)r"\\S,'   S-r#\
\-  \S.'   S)r$\\S/'   Sr%\
S-  \S0'   S1r&\
\S2'   S3r'\\S4'   U 4S5 jr(S6 r)S7r*U =r+$ )8EuroBertConfig   aM  
mask_token_id (`int`, *optional*, defaults to 128002):
    Mask token id.
classifier_pooling (`str`, *optional*, defaults to `"late"`):
    The pooling strategy to use for the classifier. Can be one of ['bos', 'mean', 'late'].

```python
>>> from transformers import EuroBertModel, EuroBertConfig

>>> # Initializing a EuroBert eurobert-base style configuration
>>> configuration = EuroBertConfig()

>>> # Initializing a model from the eurobert-base style configuration
>>> model = EuroBertModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```eurobertpast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei   intermediate_size   num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti    max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachei pad_token_idi  bos_token_ideos_token_id   pretraining_tpFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutmlp_biashead_dimi mask_token_idlateclassifier_poolingc                    > U R                   c  U R                  U l         U R                  c  U R                  U R                  -  U l        U R                   c  U R                  U l         [        TU ]  " S0 UD6  g )N )r   r   r-   r   super__post_init__)selfkwargs	__class__s     ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/eurobert/configuration_eurobert.pyr4   EuroBertConfig.__post_init__\   si    ##+'+'?'?D$==  ,,0H0HHDM##+'+'?'?D$''    c                     U R                   U R                  -  S:w  a&  [        SU R                    SU R                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.    zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)r5   s    r8   validate_architecture$EuroBertConfig.validate_architecturef   sS    d666!;#D$4$4#5 622327  <r:   )r-   r   ),__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r   strr   r    floatr!   r"   boolr#   r$   r%   listr'   r(   r)   r   dictr*   r+   r,   r-   r.   r0   r4   r>   __static_attributes____classcell__)r7   s   @r8   r	   r	      s   & J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK!s!s!!&*t*J#'S'#u#L%It%L#*%%L#*%+1L#S	/D(1NC %%48O^d*T18 ND %(sU{(HdHcDjM3$$( r:   r	   N)	configuration_utilsr   r   modeling_rope_utilsr   utilsr   r	   __all__r2   r:   r8   <module>rV      sH   . < 1 # 34N% N  5Nb 
r:   