
    Z j                     l    S r SSKrSSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       rS
/r	g)zJamba model configuration    N)strict   )PreTrainedConfig)auto_docstringzai21labs/Jamba-v0.1)
checkpointc                   R  ^  \ rS rSr% SrSrS/rSS0rSr\	\
S'   S	r\\
S
'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S	r\\
S'   Sr\\
S'   Sr\	S -  \
S!'   S"r\	S -  \
S#'   S$r\	\\	   -  S -  \
S%'   S&r\	\
S''   S(r\\	-  \
S)'   S$r \	\
S*'   S+r!\	\
S'   S$r"\	\
S,'   S"r#\	\
S-'   Sr$\	\
S.'   S/r%\	\
S0'   Sr&\\
S1'   S+r'\	\
S2'   S/r(\	\
S3'   S$r)\	\
S4'   S5r*\	\-  \
S6'   Sr+\\
S7'   S	r,\\
S8'   U 4S9 jr-\.S: 5       r/\.S; 5       r0\.S< 5       r1S= r2S>r3U =r4$ )?JambaConfig   a  
expert_layer_period (`int`, *optional*, defaults to 2):
    Once in this many layers, we will have an expert layer
expert_layer_offset (`int`, *optional*, defaults to 1):
    The first layer index that contains an expert mlp layer
attn_layer_period (`int`, *optional*, defaults to 8):
    Once in this many layers, we will have a vanilla attention layer
attn_layer_offset (`int`, *optional*, defaults to 4):
    The first layer index that contains a vanilla attention mlp layer
use_mamba_kernels (`bool`, *optional*, defaults to `True`):
    Flag indicating whether or not to use the fast mamba kernels. These are available only if `mamba-ssm` and
    `causal-conv1d` are installed, and the mamba modules are running on a CUDA device. Raises ValueError if
    `True` and kernels are not available
mamba_dt_rank (`Union[int,str]`, *optional*, defaults to `"auto"`):
    Rank of the mamba discretization projection matrix. `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)`
jambapast_key_valuesnum_local_expertsnum_expertsi   
vocab_sizeFtie_word_embeddingsi   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headssilu
hidden_actg{Gz?initializer_rangegư>rms_norm_epsT	use_cacheoutput_router_logitsgMbP?router_aux_loss_coefr   Npad_token_id   bos_token_id   eos_token_idi   max_position_embeddingsg        attention_dropoutnum_experts_per_tok   expert_layer_periodexpert_layer_offsetattn_layer_period   attn_layer_offsetuse_mamba_kernelsmamba_d_statemamba_d_convmamba_expandautomamba_dt_rankmamba_conv_biasmamba_proj_biasc                    > U R                   c  U R                  U l         U R                  S:X  a#  [        R                  " U R
                  S-  5      OU R                  U l        [        TU ]  " S0 UD6  g )Nr1   r'    )r   r   r2   mathceilr   super__post_init__)selfkwargs	__class__s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/jamba/configuration_jamba.pyr:   JambaConfig.__post_init__R   s`    ##+'+'?'?D$AEASASW]A]TYYt'7'7"'<=cgcucu''    c                     [        U R                  5       Vs/ s H#  nXR                  -  U R                  :X  a  SOSPM%     sn$ s  snf )N	attentionmamba)ranger   r*   r,   r;   is     r>   layers_block_typeJambaConfig.layers_block_typeY   sP     4112
2 5559O9OOKU\\2
 	
 
s   *Ac                 X    U R                   nU Vs/ s H  o"S:X  a  SOUPM     sn$ s  snf )NrB   full_attention)rG   )r;   layer_typesxs      r>   rK   JambaConfig.layer_types`   s2     ,,EPQ[$4 !;[QQQs   'c                     [        U R                  5       Vs/ s H-  nXR                  -  U R                  :X  a  U R                  OSPM/     sn$ s  snf )Nr    )rD   r   r(   r)   r   rE   s     r>   layers_num_expertsJambaConfig.layers_num_expertsf   sV     4112
2 !"$<$< <@X@X XD^__2
 	
 
s   4Ac                    U R                   U R                  :  a&  [        SU R                    SU R                   S35      eU R                  U R                  :  a&  [        SU R                   SU R                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.zattention layer offset (z/) must be smaller than attention layer period ()zexpert layer offset (z,) must be smaller than expert layer period (N)r,   r*   
ValueErrorr)   r(   )r;   s    r>   validate_architecture!JambaConfig.validate_architecturem   s    !!T%;%;;*4+A+A*BBqrv  sI  sI  rJ  JK  L  ##t'?'??'(@(@'AAmnr  oG  oG  nH  HI  J  @r@   )r2   r   )5__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   boolr   r   r   r   r   r   strr   floatr   r   r   r   r   r!   r#   listr$   r%   r&   r   r(   r)   r*   r,   r-   r.   r/   r0   r2   r3   r4   r:   propertyrG   rK   rO   rT   __static_attributes____classcell__)r=   s   @r>   r	   r	      s   " J#4"5]M J %%K"s"s!!  J#u#L%It!&$&"'%' L#*  L#* +,L#S	/D(,#)S)%(us{(  K    ss"t"M3L#L#%M39% OT !OT!( 
 
 R R
 
 

 
r@   r	   )
rZ   r7   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r6   r@   r>   <module>rk      sM       . 3 # 01]" ]  2]@ /r@   