
    Z j                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzMiniMaxAI/MiniMax-Text-01-hf)
checkpointc                     ^  \ rS rSr% SrSrS/rSrSSSSSSS	S
.rS/S/4SS/S/4S/S/4S.r	SS0r
Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S%r\\S&'   S'r\\S('   Sr\S-  \S)'   S*r\S-  \S+'   S,r\\\   -  S-  \S-'   S.r \\S/'   Sr!\S-  \S0'   S1r"\\-  \S2'   S,r#\\S3'   Sr$\\S'   S.r%\\S4'   S5r&\\S6'   S1r'\\S7'   Sr(\)\*-  S-  \S8'   Sr+\\   S-  \S9'   S:r,\\S;'   S*r-\\-  \S<'   S*r.\\-  \S='   S*r/\\-  \S>'   S*r0\\-  \S?'   S*r1\\-  \S@'   S*r2\\-  \SA'   U 4SB jr3SCr4U =r5$ )DMiniMaxConfig   aA  
block_size (`int`, *optional*, defaults to 256):
    The length of each attention block, determining how queries, keys, and values
    are grouped and processed for intra- and inter-block attention.
full_attn_alpha_factor (`float`, *optional*, defaults to 1):
    Weight for residual value in residual connection after normal attention.
full_attn_beta_factor (`float`, *optional*, defaults to 1):
    Weight for hidden state value in residual connection after normal attention.
linear_attn_alpha_factor (`float`, *optional*, defaults to 1):
    Weight for residual value in residual connection after lightning attention.
linear_attn_beta_factor (`float`, *optional*, defaults to 1):
    Weight for hidden state value in residual connection after lightning attention.
mlp_alpha_factor (`float`, *optional*, defaults to 1):
    Weight for residual value in residual connection after MLP.
mlp_beta_factor (`float`, *optional*, defaults to 1):
    Weight for hidden state value in residual connection after MLP.

```python
>>> from transformers import MiniMaxModel, MiniMaxConfig

>>> # Initializing a MiniMax style configuration
>>> configuration = MiniMaxConfig()

>>> # Initializing a model from the MiniMax style configuration
>>> model = MiniMaxModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```minimaxpast_key_valuesg    .Acolwiserowwisepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.experts	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_expertsnum_local_expertsi }  
vocab_sizei   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headsNhead_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingssliding_windowg        attention_dropoutnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefrouter_jitter_noiserope_parameterslayer_types   
block_sizefull_attn_alpha_factorfull_attn_beta_factorlinear_attn_alpha_factorlinear_attn_beta_factormlp_alpha_factormlp_beta_factorc                   > U R                   c  U R                  U l         U R                  cC  [        U R                  5       Vs/ s H  n[        US-   S-  5      (       a  SOSPM     snU l        [        TU ]  " S0 UD6  g s  snf )Nr+   r-   full_attentionlinear_attention )r"   r    r7   ranger   boolsuper__post_init__)selfkwargsi	__class__s      ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/minimax/configuration_minimax.pyrG   MiniMaxConfig.__post_init__q   s    ##+'+'?'?D$#W\]a]s]sWt WtRSD!a%1$5$5 ;MMWt D 	''	 s   $B)r7   r"   )6__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencedefault_thetabase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r    r"   r#   r%   strr&   r'   floatr(   r)   rE   r*   r,   r.   listr/   r0   r1   r2   r   r3   r4   r5   r6   r   dictr7   r9   r:   r;   r<   r=   r>   r?   rG   __static_attributes____classcell__)rK   s   @rL   r
   r
      s%   < J#4"5M%.%.%.%.-=*3 0 &(9:#%568IJ!"_$56
 #$78MJK"s"s!!  HcDjJ#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%!%NC$J%%(us{(  s!&$&"'%'!$$48O^d*T18$(KcT!(J*+C%K+)*3;*,-cEk-+,S5[,$%cEk%#$OS5[$	( 	(    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rC   ra   rL   <module>rg      sK   * / 3 1 # 9:\($ \(  ;\(~ 
ra   