
    Z j                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzrednote-hilab/dots.llm1.base)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSSSSSS	SSSSSSS
.rS/S/4SS/S/4S/S/4S.rSS0r	Sr
\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\S-  \S '   Sr\S-  \S'   S!r\S-  \S"'   S!r\S-  \S#'   Sr\S-  \S$'   S%r\S-  \S&'   S'r\S-  \S('   S)r\\S*'   S+r\\S,'   S-r\\S.'   S/r \\S0'   S1r!\\S2'   S'r"\\S3'   Sr#\$\%-  S-  \S4'   S'r&\\S5'   S6r'\\-  S-  \S7'   S8r(\\S9'   S:r)\S-  \S;'   Sr*\S-  \S<'   Sr+\,\   S-  \S='   Sr-\S-  \S>'   Sr.\S-  \S?'   Sr/\\,\   -  S-  \S@'   U 4SA jr0SBr1U =r2$ )CDots1Config   a  
n_group (`int`, *optional*, defaults to 1):
    Number of groups for routed experts.
first_k_dense_replace (`int`, *optional*, defaults to 0):
    Number of dense layers at the beginning of the model before the first MoE layer.

Examples:

```python
>>> from transformers import Dots1Model, Dots1Config
>>> # Initializing a Dots1 style configuration
>>> configuration = Dots1Config()
>>> # Accessing the model configuration
>>> configuration = model.config
```
dots1past_key_valuescolwiserowwisereplicated_with_grad_allreducepacked_colwisemoe_tp_experts)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normz!layers.*.mlp.experts.gate_up_projzlayers.*.mlp.experts.down_projzlayers.*.mlp.expertsz%layers.*.mlp.shared_experts.gate_projz#layers.*.mlp.shared_experts.up_projz%layers.*.mlp.shared_experts.down_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormnum_local_expertsn_routed_expertsi R 
vocab_sizei   hidden_sizei*  intermediate_sizei  moe_intermediate_size>   num_hidden_layers    num_attention_headsNnum_key_value_headsn_shared_experts   n_group
topk_groupnum_experts_per_tokr   first_k_dense_replaceFnorm_topk_probsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachetie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?routed_scaling_factori   sliding_windowmax_window_layerslayer_typespad_token_idbos_token_ideos_token_idc                   > U R                   c  U R                  U l         U R                  cI  [        U R                  5       Vs/ s H#  nU R
                  b  X R                  :  a  SOSPM%     snU l        [        TU ]   " S0 UD6  g s  snf )Nsliding_attentionfull_attention )	r$   r#   r9   ranger!   r7   r8   super__post_init__)selfkwargsi	__class__s      ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/dots1/configuration_dots1.pyrC   Dots1Config.__post_init__l   s    ##+'+'?'?D$#
 t556	  7A &&2q<R<R7R $%& 7	 D 	'' s   *B)r9   r$   )3__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planattribute_mapr   int__annotations__r   r   r   r!   r#   r$   r%   r   r'   r(   r)   r*   r+   boolr-   strr.   r/   floatr0   r1   r2   r3   r   dictr4   r5   r6   r7   r8   r9   listr:   r;   r<   rC   __static_attributes____classcell__)rG   s   @rH   r
   r
      s4   " J#4"5 &/%.%.%.%E%E-=*3 01:/81:"+ )"+& &(9:#%568IJ!"_$56 	/M JK"s"!%3%s!!&(t(#'cDj'#'cDj'GS4ZJd
&*t*()3:)"'ND4K'J#'S'#u#L%It %%48O^d*T18 ND ,/us{T)/#&5&!%NC$J%$&sTz&$(KcT!(#L#*##L#*#+/L#S	/D(/( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r@   r]   rH   <module>rc      sJ   ( / 3 1 # 9:[(" [(  ;[(| /r]   