
    Z jE                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zFlaubert configuration    )strict   )PreTrainedConfig)auto_docstringzflaubert/flaubert_base_uncased)
checkpointc                   t   \ rS rSr% SrSrSSSSSS	S
S.rSr\\	S'   Sr
\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S '   S!r\\	S"'   S#r\\	S$'   S%r\\	S&'   S'r\\	S('   Sr\\	S)'   S*r\\	S+'   S,r \\	S-'   S.r!\\	S/'   Sr"\\	S0'   S1r#\$\	S2'   Sr%\\	S3'   S4r&\$S4-  \	S5'   Sr'\\	S6'   Sr(\\-  \	S7'   S.r)\\	S8'   S.r*\\	S9'   S'r+\\	S:'   S'r,\\	S;'   S*r-\S4-  \	S
'   S'r.\S4-  \	S'   Sr/\\0\   -  S4-  \	S	'   Sr1\\	S<'   S=r2g4)>FlaubertConfig   a  
pre_norm (`bool`, *optional*, defaults to `False`):
    Whether to apply the layer normalization before or after the feed forward layer following the attention in
    each layer (Vaswani et al., Tensor2Tensor for Neural Machine Translation. 2018)
emb_dim (`int`, *optional*, defaults to 2048):
    The dimensionality of embedding layer.
gelu_activation (`bool`, *optional*, defaults to True):
    Whether to use GeLU activation function.
sinusoidal_embeddings (`bool`, *optional*, defaults to `False`):
    Whether or not to use sinusoidal positional embeddings instead of absolute positional embeddings.
causal (`bool`, *optional*, defaults to `False`):
    Whether or not the model should behave in a causal manner. Causal models use a triangular attention mask in
    order to only attend to the left-side context instead if a bidirectional context.
asm (`bool`, *optional*, defaults to `False`):
    Whether or not to use an adaptive log softmax projection layer instead of a linear layer for the prediction
    layer.
n_langs (`int`, *optional*, defaults to 1):
    The number of languages the model handles. Set to 1 for monolingual models.
use_lang_emb (`bool`, *optional*, defaults to `True`)
    Whether to use language embeddings. Some models use additional language embeddings, see [the multilingual
    models page](http://huggingface.co/transformers/multilingual.html#xlm-language-embeddings) for information
    on how to use them.
embed_init_std (`float`, *optional*, defaults to 2048^-0.5):
    The standard deviation of the truncated_normal_initializer for initializing the embedding matrices.
embed_init_std (`float`, *optional*, defaults to `2048**-0.5`):
    Initializer std for embedding layers.
bos_index (`int`, *optional*, defaults to 0):
    The index of the beginning of sentence token in the vocabulary.
eos_index (`int`, *optional*, defaults to 1):
    The index of the end of sentence token in the vocabulary.
pad_index (`int`, *optional*, defaults to 2):
    The index of the padding token in the vocabulary.
unk_index (`int`, *optional*, defaults to 3):
    The index of the unknown token in the vocabulary.
mask_index (`int`, *optional*, defaults to 5):
    The index of the masking token in the vocabulary.
is_encoder (`bool`, *optional*, defaults to True):
    Whether the model is used as an encoder.
summary_type (`string`, *optional*, defaults to "first"):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Has to be one of the following options:
        - `"last"`: Take the last token hidden state (like XLNet).
        - `"first"`: Take the first token hidden state (like BERT).
        - `"mean"`: Take the mean of all tokens hidden states.
        - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
        - `"attn"`: Not implemented now, use multi-head attention.
summary_use_proj (`bool`, *optional*, defaults to `True`):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Whether or not to add a projection after the vector extraction.
summary_activation (`str`, *optional*):
    Argument used when doing sequence summary. Used in the sequence classification and multiple choice models.
    Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
    Used in the sequence classification and multiple choice models.
    Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
summary_first_dropout (`float`, *optional*, defaults to 0.1):
    Used in the sequence classification and multiple choice models.
    The dropout ratio to be used after the projection and activation.
start_n_top (`int`, *optional*, defaults to 5):
    Used in the SQuAD evaluation script.
end_n_top (`int`, *optional*, defaults to 5):
    Used in the SQuAD evaluation script.
mask_token_id (`int`, *optional*, defaults to 0):
    Model agnostic parameter to identify masked tokens when generating text in an MLM context.
lang_id (`int`, *optional*, defaults to 1):
    The ID of the language used by the model. This parameter is used when generating text in a given language.
flaubertemb_dimn_headsn_layers
vocab_sizebos_token_ideos_token_idpad_token_id)hidden_sizenum_attention_headsnum_hidden_layersn_words	bos_index	eos_index	pad_indexFpre_normg        	layerdropiu  i         g?dropoutattention_dropoutTgelu_activationsinusoidal_embeddingscausalasm   n_langsuse_lang_embi   max_position_embeddingsg;f?embed_init_stdg-q=layer_norm_epsg{Gz?init_stdr   r   r      r   r   	unk_index   
mask_index
is_encoderfirstsummary_typesummary_use_projNsummary_activationsummary_proj_to_labelssummary_first_dropoutstart_n_top	end_n_topmask_token_idlang_idtie_word_embeddings )3__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr   bool__annotations__r   floatintr   r   r   r   r   r   r    r!   r"   r#   r%   r&   r'   r(   r)   r*   r   r   r   r,   r.   r/   r1   strr2   r3   r4   r5   r6   r7   r8   r9   r   r   r   listr:   __static_attributes__r;       ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/flaubert/configuration_flaubert.pyr	   r	      s   BH J ('###M Hd Ius{ JGSHcGSGUS[%(us{( OT "'4'FDCGSL$#&S&&NE&!NE!HeIsIsIsIsJJL#!d!%)d
)#'D'),53;,KIsM3GS L#*  L#* +,L#S	/D(, $$rJ   r	   N)	r@   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r;   rJ   rK   <module>rP      sK     . 3 # ;<t%% t%  =t%n 
rJ   