
    Z j                     l    S r SSKrSSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       rS
/r	g)zMAMBA2 configuration    N)strict   )PreTrainedConfig)auto_docstringzstate-spaces/mamba2-2.8b)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\S-  \S'   Sr\\\   -  S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   S r\\S!'   S"r\\S#'   Sr\\S$'   S%r\\-  \S&'   S'r\\S('   S"r \\S)'   S*r!\\S+'   S,\" S-5      4r"\\   \#\S.4   -  \S/'   Sr$\\S0'   Sr%\\S1'   Sr&\\S2'   S3r'\\S4'   Sr(\\S5'   U 4S6 jr)S7 r*\+S8 5       r,S9r-U =r.$ ):Mamba2Config   aX  
layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
    The epsilon to use in the layer normalization layers..
expand (`int`, *optional*, defaults to 2):
    Expanding factor used to determine the intermediate size.
n_groups (`int`, *optional*, defaults to 8):
    Number of groups for the evolution matrices of mamba 2.
use_bias (`bool`, *optional*, defaults to `False`):
    Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
use_conv_bias (`bool`, *optional*, defaults to `True`):
    Whether or not to use bias in the convolution layer of the mixer block.
residual_in_fp32 (`bool`, *optional*, defaults to `True`):
    Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
    Whether or not to rescale `out_proj` weights when initializing.
chunk_size (`int`, *optional*, defaults to 256):
    Size of the chunks that will comprise the sequence.

Example:

```python
>>> from transformers import Mamba2Config, Mamba2Model

>>> # Initializing a Mamba2 configuration
>>> configuration = Mamba2Config()

>>> # Initializing a model (with random weights) from the configuration
>>> model = Mamba2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mamba2   	num_heads@   head_dimi   
vocab_sizei   hidden_size
state_sizenum_hidden_layersgh㈵>layer_norm_epsilon   Npad_token_idr   bos_token_id   eos_token_idexpand   conv_kernel   n_groupsFuse_biasTuse_conv_biassilu
hidden_actg?initializer_rangeresidual_in_fp32autotime_step_rankgMbP?time_step_mintime_step_maxg-C6?time_step_floorg        inf.time_step_limitrescale_prenorm_residual	use_cacherms_norm   
chunk_sizetie_word_embeddingsc                    > U R                   S:X  a#  [        R                  " U R                  S-  5      OU R                   U l         [        TU ]  " S0 UD6  g )Nr%       )r&   mathceilr   super__post_init__)selfkwargs	__class__s     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mamba2/configuration_mamba2.pyr8   Mamba2Config.__post_init__[   sJ    040C0Cv0MDIId&&+,SWSfSf 	 	''    c                     U R                   U R                  -  U R                  U R                  -  :w  a@  [	        SU R                   U R                  -   SU R                  U R                  -   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.z2Inconsistent configuration: hidden_size * expand (z#) must equal num_heads * head_dim (z).N)r   r   r   r   
ValueErrorr9   s    r<   validate_architecture"Mamba2Config.validate_architecturea   sr    t{{*0NO$$t{{23 4NNT]]2327  Pr>   c                 "    S/U R                   -  $ )Nmamba)r   rA   s    r<   layer_typesMamba2Config.layer_typesj   s    y41111r>   )r&   )/__name__
__module____qualname____firstlineno____doc__
model_typer   int__annotations__r   r   r   r   r   r   floatr   r   r   listr   r   r   r   boolr    r"   strr#   r$   r&   r'   r(   r)   r+   tupler,   r-   r.   r0   r1   r8   rB   propertyrF   __static_attributes____classcell__)r;   s   @r<   r	   r	      s   B JIsHcJKJs $$ L#*  L#* +,L#S	/D(,FCOKHcHdM4J"u"!d! &NC#I& M5 M5!OU!8;U5\7JOT%[5#44J%*d*ItHdJ %%( 2 2r>   r	   )
rL   r5   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r4   r>   r<   <module>r\      sN      . 3 # 56R2# R2  7R2j 
r>   