
    Z j                     l    S r SSKrSSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       rS
/r	g)zMAMBA configuration    N)strict   )PreTrainedConfig)auto_docstringzstate-spaces/mamba-2.8b)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\S'   Sr\S-  \S'   Sr\S-  \S'   Sr\\\   -  S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   S r\\-  \S!'   S"r\\S#'   S$r\\S%'   Sr\\S&'   S'r\\S('   S)r \\S*'   Sr!\\S+'   Sr"\\S,'   Sr#\\S-'   Sr$\\S.'   Sr%\\S/'   U 4S0 jr&\'S1 5       r(S2r)U =r*$ )3MambaConfig   a  
layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
    The epsilon to use in the layer normalization layers.
expand (`int`, *optional*, defaults to 2):
    Expanding factor used to determine the intermediate size.
use_bias (`bool`, *optional*, defaults to `False`):
    Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block
use_conv_bias (`bool`, *optional*, defaults to `True`):
    Whether or not to use bias in the convolution layer of the mixer block.
residual_in_fp32 (`bool`, *optional*, defaults to `True`):
    Whether or not residuals should be in `float32`. If set to `False` residuals will keep the same `dtype` as the rest of the model
rescale_prenorm_residual (`bool`, *optional*, defaults to `False`):
    Whether or not to rescale `out_proj` weights when initializing.
use_mambapy (`bool`, *optional*, defaults to `False`):
    Determines the fallback strategy during training if the CUDA-based official implementation of Mamba is not available. If `True`,
    the mamba.py implementation is used. If `False`, the naive and slower implementation is used. Consider switching to the naive
    version if memory is limited.
use_associative_scan (`bool`, *optional*, defaults to `True`):
    Whether to use PyTorch's `torch._higher_order_ops.associative_scan` for the parallel scan instead of the naive
    sequential implementation. The associative scan is only active during `torch.compile` tracing and
    requires torch >= 2.9.0. Both paths are tested to produce numerically identical results (see
    `test_associative_scan_matches_sequential`). Set to `False` to fall back to the sequential loop.

Example:

```python
>>> from transformers import MambaConfig, MambaModel

>>> # Initializing a Mamba configuration
>>> configuration = MambaConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = MambaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mambaih  
vocab_sizei   hidden_size   
state_size    num_hidden_layersgh㈵>layer_norm_epsilonr   Npad_token_idbos_token_ideos_token_id   expand   conv_kernelFuse_biasTuse_conv_biassilu
hidden_actg?initializer_rangeresidual_in_fp32autotime_step_rankg      ?time_step_scalegMbP?time_step_mintime_step_maxrandomtime_step_init_schemeg-C6?time_step_floorrescale_prenorm_residual	use_cacheuse_mambapyuse_associative_scantie_word_embeddingsc                    > [        U R                  U R                  -  5      U l        U R                  S:X  a#  [
        R                  " U R                  S-  5      OU R                  U l        [        TU ]   " S0 UD6  g )Nr    r    )	intr   r   intermediate_sizer!   mathceilsuper__post_init__)selfkwargs	__class__s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mamba/configuration_mamba.pyr4   MambaConfig.__post_init__^   sf    !$T[[43C3C%C!D040C0Cv0MDIId&&+,SWSfSf 	 	''    c                 "    S/U R                   -  $ )Nr   )r   )r5   s    r8   layer_typesMambaConfig.layer_typese   s    y41111r:   )r0   r!   )+__name__
__module____qualname____firstlineno____doc__
model_typer   r/   __annotations__r   r   r   r   floatr   r   r   listr   r   r   boolr   r   strr   r   r!   r"   r#   r$   r&   r'   r(   r)   r*   r+   r,   r4   propertyr<   __static_attributes____classcell__)r7   s   @r8   r	   r	      sM   $L JJKJs $$ L#*  L#* +,L#S	/D(,FCOKHdM4J"u"!d! &NC#I& OU  M5 M5!)3)!OU!%*d*ItK!%$% $$( 2 2r:   r	   )
rB   r1   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r.   r:   r8   <module>rP      sM      . 3 # 45M2" M2  6M2` /r:   