
    Z ju                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zJetMoe model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzjetmoe/jetmoe-8b)
checkpointc                     ^  \ rS rSr% SrSrS/rSS0rSr\	\
S'   S	r\	\
S
'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S r\	S!-  \
S"'   Sr\	\\	   -  S!-  \
S#'   S!r\	S!-  \
S$'   Sr\\
S%'   S!r\ \!-  S!-  \
S&'   S'r"\\
S('   Sr#\\
S)'   S*r$\\	-  \
S+'   U 4S, jr%S- r&S.r'U =r($ )/JetMoeConfig   a2  
kv_channels (`int`, *optional*, defaults to 128):
    Defines the number of channels for the key and value tensors.
num_local_experts (`int`, *optional*, defaults to 8):
    Defines the number of experts in the MoE and MoA.

```python
>>> from transformers import JetMoeModel, JetMoeConfig

>>> # Initializing a JetMoe 4B style configuration
>>> configuration = JetMoeConfig()

>>> # Initializing a model from the JetMoe 4B style configuration
>>> model = JetMoeModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
jetmoepast_key_valueshead_dimkv_channelsi }  
vocab_sizei   hidden_size   num_hidden_layers   num_key_value_heads   i   intermediate_sizei   max_position_embeddingssiluactivation_function   num_local_experts   num_experts_per_tokFoutput_router_logitsg{Gz?aux_loss_coefT	use_cache   Nbos_token_ideos_token_idpad_token_idtie_word_embeddingsrope_parametersgư>rms_norm_epsinitializer_rangeg        attention_dropoutc                 b   > U R                   U R                  -  U l        [        TU ]  " S0 UD6  g )N )r   r   num_attention_headssuper__post_init__)selfkwargs	__class__s     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/jetmoe/configuration_jetmoe.pyr/   JetMoeConfig.__post_init__H   s,    #'#;#;d>V>V#V ''    c                 N    U R                   U R                  :  a  [        S5      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.zG`num_experts_per_tok` must be less than or equal to `num_local_experts`N)r   r   
ValueError)r0   s    r3   validate_architecture"JetMoeConfig.validate_architectureL   s'    ##d&<&<<fgg =r5   )r-   ))__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   strr   r   r   boolr    floatr!   r#   r$   listr%   r&   r'   r   dictr(   r)   r*   r/   r8   __static_attributes____classcell__)r2   s   @r3   r
   r
      s3   ( J#4"5/MJKs!!K!s!#'S'%%s  !&$&M5It L#* +,L#S	/D(,#L#*# $$48O^d*T18L%#u#%(us{((h hr5   r
   N)r>   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r,   r5   r3   <module>rP      sN    ! . 3 1 # -.6h# 6h  /6hr 
r5   