
    Z j                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzZyphra/Zamba2-2.7B)
checkpointc                     ^  \ rS rSr% SrSrSSS.rS/rSr\	\
S	'   S
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\   S-  \
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\   \\S4   -  S-  \
S '   S!r\	\
S"'   S#r\\
S$'   S#r\\
S%'   S&r\	\
S''   S(r \\
S)'   S(r!\\
S*'   Sr"\	S-  \
S+'   S,r#\\
S-'   S.r$\	\
S/'   Sr%\	S-  \
S0'   S1r&\\	-  \
S2'   Sr'\	\
S3'   S(r(\\
S4'   S5r)\	\
S6'   S(r*\\
S7'   Sr+\,\--  S-  \
S8'   S9r.\\
S:'   S;r/\\
S<'   S#r0\\
S='   Sr1\	\
S>'   S?r2\	S-  \
S@'   Sr3\	S-  \
SA'   Sr4\	\\	   -  S-  \
SB'   S(r5\\
SC'   S#r6\\
SD'   U 4SE jr7SFr8U =r9$ )GZamba2Config   aP	  
mamba_ngroups (`int`, *optional*, defaults to 1):
    Number of groups for the evolution matrices of mamba 2.
n_mamba_heads (`int`, *optional*, defaults to 8):
    Number of heads for the evolution matrices of mamba 2.
use_conv_bias (`bool`, *optional*, defaults to `True`):
    Whether or not to use bias in the convolution layer of the mixer block.
chunk_size (`int`, *optional*, defaults to 256):
    Size of the chunks that will comprise the sequence.
use_mamba_kernels (`bool`, *optional*, defaults to `True`):
    Flag indicating whether or not to use the fast mamba kernels.
use_mem_eff_path (`bool`, *optional*, defaults to `False`):
    Whether or not to use the fused conv1d and scan in mamba2 layers.
add_bias_linear (`bool`, *optional*, defaults to `False`):
    Flag indicating whether or not to use bias in various layers
num_mem_blocks (`int`, *optional*, defaults to 1):
    Number of unshared transformer blocks.
use_shared_attention_adapter (`bool`, *optional*, defaults to `False`):
    If True, unshared adapters (formally the same as LoRA but used in the base model) will be added to the q, k, v projectors in the shared attention layers.
adapter_rank (`int`, *optional*, defaults to 128):
    Rank of the adapter in the shared MLP and shared attention layers.
use_mem_rope (`bool`, *optional*, defaults to `False`):
    If True, includes RoPE in the shared attention layers.
num_logits_to_keep (`int` or `None`, *optional*, defaults to 1):
    Number of prompt logits to calculate during generation. If `None`, all logits will be calculated. If an
    integer value, only last `num_logits_to_keep` logits will be calculated. Default is 1 because only the
    logits of the last prompt token are needed for generation. For long sequences, the logits for the entire
    sequence may use a lot of memory so, setting `num_logits_to_keep=1` will reduce memory footprint
    significantly.
use_long_context (`bool`, *optional*, defaults to `False`):
    Activates the context-extended version of Zamba by modifying RoPE.

Example:
```python
>>> from transformers import Zamba2Model, Zamba2Config
>>> # Initializing a Zamba2-2.7B style configuration
>>> configuration = Zamba2Config()
>>> # Initializing a model from the Zamba2-2.7B style configuration
>>> model = Zamba2Model(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```zamba2layers_block_typeattention_head_dim)layer_typeshead_dimpast_key_valuesi }  
vocab_sizei   max_position_embeddingsi 
  hidden_size6   num_hidden_layersN@   mamba_d_state   mamba_d_conv   mamba_expand   mamba_ngroupsgMbP?time_step_ming?time_step_maxg-C6?time_step_floor.time_step_limit   n_mamba_headsTuse_mamba_kernelsuse_conv_bias   
chunk_sizeFuse_mem_eff_pathadd_bias_linearintermediate_sizegelu
hidden_act    num_attention_headsnum_key_value_headsg        attention_dropoutnum_mem_blocksuse_shared_attention_adapter   adapter_rankuse_mem_roperope_parametersg{Gz?initializer_rangegh㈵>rms_norm_eps	use_cachenum_logits_to_keepr   pad_token_idbos_token_ideos_token_iduse_long_contexttie_word_embeddingsc                 
  > U R                   =(       d    SU R                  -  U l         SU R                  -  U l        SU R                  -  U R                  -  U l        [        U R                  U R                  -  5      U R                  -  U l        U R                  (       a  SU l
        U R                  c  U R                  U l        U R                  U R                  -  U l        U R                  U l        U R                  c3  S/S/S-  S/-   S-  -   S/S-  -   S/-   S/S-  -   S/-   S/S-  -   U l        [        U R                  5       VVs/ s H  u  p#US:X  d  M  UPM     snnU l        ["        TU ]H  " S	0 UD6  g s  snnf )
Nr   r   i @  mamba   hybrid   r    )r+   r   attention_hidden_sizer/   r   intr   r$   mamba_headdimr?   r   r0   kv_channelsnum_query_groupsr   	enumeratehybrid_layer_idssuper__post_init__)selfkwargsindextype	__class__s       ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/zamba2/configuration_zamba2.pyrO   Zamba2Config.__post_init__q   s   !%!7!7!O1t?O?O;O%&)9)9%9""#d&6&6"6$:R:R"R !2!2T5E5E!EF$J\J\\  +0D(##+'+'?'?D$++t/G/GG $ 8 8 !!)	9q=H:-23)a-  * )a-	 
 * )a-  " ;DDDZDZ:[ p:[;5_cgo_o:[ p'' !qs   E?E?)
r   rG   rM   r+   rJ   r   rI   r   r0   rK   ):__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapkeys_to_ignore_at_inferencer   rH   __annotations__r   r   r   r   liststrr   r   r   r   r   floatr    r!   r"   tupler$   r%   boolr&   r(   r)   r*   r+   r-   r/   r0   r1   r2   r3   r5   r6   r7   r   dictr8   r9   r:   r;   r<   r=   r>   r?   r@   rO   __static_attributes____classcell__)rT   s   @rU   r
   r
      s   )V J$7EYZM#4"5J#'S'Ks*.tCy4'.M3L#L#M3 M5 M5!OU!>BOT%[5#44t;BM3"t"M4J"d"!OT!$(sTz(J!!&*t*%(us{(NC). $.L#L$48O^d*T18#u#L%It L#*  L#* +,L#S	/D(,"d" $$( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rF   rh   rU   <module>rn      sK   " / 3 1 # /0q(# q(  1q(h 
rh   