
    Z jK                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zXLM configuration    )strict   )PreTrainedConfig)auto_docstringzFacebookAI/xlm-mlm-en-2048)
checkpointc                   .   \ rS rSr% SrSrSSSSSS	S
S.rSr\\	S'   Sr
\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   S r\\	S!'   S"r\\	S#'   S$r\\	S%'   S&r\\	S''   Sr\\	S('   S)r\\	S*'   Sr \\	S+'   S,r!\S,-  \	S-'   Sr"\\	S.'   Sr#\\-  \	S/'   S&r$\\	S0'   S&r%\\	S1'   S2r&\S,-  \	S3'   S2r'\\	S4'   S5r(\S,-  \	S
'   S2r)\S,-  \	S'   Sr*\\+\   -  S,-  \	S	'   Sr,\\	S6'   S7r-g,)8	XLMConfig   a  
gelu_activation (`bool`, *optional*, defaults to `True`):
    Whether or not to use *gelu* for the activations instead of *relu*.
sinusoidal_embeddings (`bool`, *optional*, defaults to `False`):
    Whether or not to use sinusoidal positional embeddings instead of absolute positional embeddings.
causal (`bool`, *optional*, defaults to `False`):
    Whether or not the model should behave in a causal manner. Causal models use a triangular attention mask in
    order to only attend to the left-side context instead if a bidirectional context.
asm (`bool`, *optional*, defaults to `False`):
    Whether or not to use an adaptive log softmax projection layer instead of a linear layer for the prediction
    layer.
n_langs (`int`, *optional*, defaults to 1):
    The number of languages the model handles. Set to 1 for monolingual models.
use_lang_emb (`bool`, *optional*, defaults to `True`):
    Whether to use language embeddings. Some models use additional language embeddings, see [the multilingual
    models page](http://huggingface.co/transformers/multilingual.html#xlm-language-embeddings) for information
    on how to use them.
embed_init_std (`float`, *optional*, defaults to 2048^-0.5):
    The standard deviation of the truncated_normal_initializer for initializing the embedding matrices.
unk_index (`int`, *optional*, defaults to 3):
    The index of the unknown token in the vocabulary.
mask_index (`int`, *optional*, defaults to 5):
    The index of the masking token in the vocabulary.
is_encoder (`bool`, *optional*, defaults to `True`):
    Whether or not the initialized model should be a transformer encoder or decoder as seen in Vaswani et al.
summary_type (`string`, *optional*, defaults to "first"):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Has to be one of the following options:
        - `"last"`: Take the last token hidden state (like XLNet).
        - `"first"`: Take the first token hidden state (like BERT).
        - `"mean"`: Take the mean of all tokens hidden states.
        - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
        - `"attn"`: Not implemented now, use multi-head attention.
summary_use_proj (`bool`, *optional*, defaults to `True`):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Whether or not to add a projection after the vector extraction.
summary_activation (`str`, *optional*):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
    Used in the sequence classification and multiple choice models.
    Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
summary_first_dropout (`float`, *optional*, defaults to 0.1):
    Used in the sequence classification and multiple choice models.
    The dropout ratio to be used after the projection and activation.
start_n_top (`int`, *optional*, defaults to 5):
    Used in the SQuAD evaluation script.
end_n_top (`int`, *optional*, defaults to 5):
    Used in the SQuAD evaluation script.
mask_token_id (`int`, *optional*, defaults to 0):
    Model agnostic parameter to identify masked tokens when generating text in an MLM context.
lang_id (`int`, *optional*, defaults to 1):
    The ID of the language used by the model. This parameter is used when generating text in a given language.

Examples:

```python
>>> from transformers import XLMConfig, XLMModel

>>> # Initializing a XLM configuration
>>> configuration = XLMConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = XLMModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```xlmemb_dimn_headsn_layers
vocab_sizebos_token_ideos_token_idpad_token_id)hidden_sizenum_attention_headsnum_hidden_layersn_words	bos_index	eos_index	pad_indexiu  i         g?dropoutattention_dropoutTgelu_activationFsinusoidal_embeddingscausalasm   n_langsuse_lang_embi   max_position_embeddingsg;f?embed_init_stdg-q=layer_norm_epsg{Gz?init_stdr   	unk_index   
mask_index
is_encoderfirstsummary_typesummary_use_projNsummary_activationsummary_proj_to_labelssummary_first_dropoutstart_n_top	end_n_topr   mask_token_idlang_id   tie_word_embeddings ).__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr   int__annotations__r   r   r   r   floatr   r   boolr   r    r!   r#   r$   r%   r&   r'   r(   r)   r+   r,   r.   strr/   r0   r1   r2   r3   r4   r5   r6   r   r   r   listr8   __static_attributes__r9       z/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/xlm/configuration_xlm.pyr	   r	      s   CJ J ('###M JGSHcGSGUS[%(us{( OT "'4'FDCGSL$#&S&&NE&!NE!HeIsJJL#!d!%)d
)#'D'),53;,KIs !M3:!GS L#*  L#* +,L#S	/D(, $$rH   r	   N)	r>   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r9   rH   rI   <module>rN      sJ     . 3 # 78p%  p%  9p%f -rH   