
    Z j                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zMixtral model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzmistralai/Mixtral-8x7B-v0.1)
checkpointc                   
  ^  \ rS rSr% SrSrS/rSrSSSSSSS	S
.rS/S/4SS/S/4S/S/4S.r	SS0r
Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S%r\\S&'   S'r\\S('   Sr\S-  \S)'   S*r\S-  \S+'   S,r\\\   -  S-  \S-'   S.r \\S/'   Sr!\S-  \S0'   S1r"\\-  \S2'   S,r#\\S3'   Sr$\\S'   S.r%\\S4'   S5r&\\S6'   S1r'\\S7'   Sr(\)\*-  S-  \S8'   U 4S9 jr+S:r,U =r-$ );MixtralConfig   a`  
Example:

```python
>>> from transformers import MixtralModel, MixtralConfig

>>> # Initializing a Mixtral 7B style configuration
>>> configuration = MixtralConfig()

>>> # Initializing a model from the Mixtral 7B style configuration
>>> model = MixtralModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mixtralpast_key_valuesg    .Acolwiserowwisepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.experts	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_expertsnum_local_expertsi }  
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headsNhead_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingssliding_windowg        attention_dropoutnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefrouter_jitter_noiserope_parametersc                 b   > U R                   c  U R                  U l         [        TU ]  " S0 UD6  g )N )r"   r    super__post_init__)selfkwargs	__class__s     ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mixtral/configuration_mixtral.pyr:   MixtralConfig.__post_init__V   s-    ##+'+'?'?D$''    )r"   ).__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r    r"   r#   r%   strr&   r'   floatr(   r)   boolr*   r,   r.   listr/   r0   r1   r2   r   r3   r4   r5   r6   r   dictr:   __static_attributes____classcell__)r=   s   @r>   r
   r
      s     J#4"5M%.%.%.%.-=*3 0 &(9:#%568IJ!"_$56
 #$78MJK"s"s!!  HcDjJ#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%!%NC$J%%(us{(  s!&$&"'%'!$$48O^d*T18( (r@   r
   N)rE   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r8   r@   r>   <module>rZ      sN    " . 3 1 # 89A($ A(  :A(H 
r@   