
    Z j                     r    S r SSKJr  SSKJr  SSKJr  SSKJr  \\" SSS	9 " S
 S\5      5       5       r	S/r
g)zBamba model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz
    The BambaModel is a hybrid [mamba2](https://github.com/state-spaces/mamba) architecture with SwiGLU.
    The checkpoints are  jointly trained by IBM, Princeton, and UIUC.
    zibm-fms/Bamba-9.8b-2.2T-hf)custom_intro
checkpointc                     ^  \ rS rSr% SrSrSS0rS/rSr\	\
S'   S	r\\
S
'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\	S-  \
S'   Sr\	S-  \
S '   Sr\	S-  \
S!'   S"r\	\\	   -  S-  \
S#'   S$r\	\
S%'   S&r\\	-  S-  \
S''   Sr\\	   S-  \
S('   S)r \	S-  \
S*'   S+r!\\	-  S-  \
S,'   Sr"\	S-  \
S-'   S.r#\	S-  \
S/'   S0r$\	S-  \
S1'   S"r%\	S-  \
S2'   S.r&\	S-  \
S3'   Sr'\S-  \
S4'   S	r(\S-  \
S5'   S6r)\S-  \
S7'   S8r*\S-  \
S9'   S&\" S:5      4r+\\   \,\\4   -  S-  \
S;'   S&r-\S-  \
S<'   Sr.\/\0-  S-  \
S='   S	r1\\
S>'   S	r2\\
S?'   U 4S@ jr3\4SA 5       r5SB r6SCr7U =r8$ )DBambaConfig   a"  
num_logits_to_keep (`int` or `None`, *optional*, defaults to 1):
    Number of prompt logits to calculate during generation. If `None`, all logits will be calculated. If an
    integer value, only last `num_logits_to_keep` logits will be calculated. Default is 1 because only the
    logits of the last prompt token are needed for generation. For long sequences, the logits for the entire
    sequence may use a lot of memory so, setting `num_logits_to_keep=1` will reduce memory footprint
    significantly.
attn_layer_indices (`list`, *optional*):
    Specifies the layer indices that will have full attention. Must contain values at most num_hidden_layers.
z_loss_coefficient (`float`, *optional*, defaults to 0.0):
    Coefficient for auxiliary z-loss used to control logit growth during training
bambalayer_typeslayers_block_typepast_key_valuesi  
vocab_sizeFtie_word_embeddingsi   hidden_sizei 8  intermediate_size    num_hidden_layersnum_attention_heads   Nnum_key_value_headssilu
hidden_actg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cache   num_logits_to_keepr   pad_token_idbos_token_id   eos_token_idi   max_position_embeddingsg        attention_dropoutattn_layer_indices   mamba_n_headsautomamba_d_headmamba_n_groups   mamba_d_state   mamba_d_convmamba_expandmamba_chunk_sizemamba_conv_biasmamba_proj_biasgMbP?time_step_ming?time_step_maxinftime_step_limitz_loss_coefficientrope_parametersattention_biasmlp_biasc                 4  > U R                   c  U R                  U l         U R                  S:X  a+  U R                  U R                  -  U R
                  -  U l        U R                  b  [        U R                  5      OS U l        SUS'   [        TU ]$  " S0 UD6  g )Nr*   g      ?partial_rotary_factor )
r   r   r+   r1   r   r)   r8   tuplesuper__post_init__)selfkwargs	__class__s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/bamba/configuration_bamba.pyrB   BambaConfig.__post_init__T   s    ##+'+'?'?D$ & $ 1 1D4D4D DHZHZ ZD>B>R>R>^uT%9%9:dh*-&'''    c                     [        U R                  5       Vs/ s H'  nU R                  (       a  XR                  ;   a  SOSPM)     sn$ s  snf )N	attentionmamba)ranger   r'   )rC   is     rF   r   BambaConfig.layers_block_typeb   sM     4112
2 !33=T=T8TK[bb2
 	
 
s   .A	c                     U R                   U R                  -  nXR                  -  S:w  a  [        S5      eU R                  U R                  -  U:w  a  [        S5      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   z4mamba_n_heads must divide mamba_expand * hidden_sizezPThe dimensions for the Mamba head state do not match the model intermediate_sizeN)r1   r   r)   
ValueErrorr+   )rC   mamba_intermediates     rF   validate_architecture!BambaConfig.validate_architecturei   sb    !..1A1AA 2 22a7STTt1115GGopp HrH   )r+   r   r8   )9__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapkeys_to_ignore_at_inferencer   int__annotations__r   boolr   r   r   r   r   r   strr   floatr   r   r    r!   r"   r$   listr%   r&   r'   r)   r+   r,   r.   r0   r1   r2   r3   r4   r5   r6   r8   r@   r9   r:   r   dictr;   r<   rB   propertyr   rR   __static_attributes____classcell__)rE   s   @rF   r   r      s6    J"$78M#4"5J %%K"s"s!!&'t'J#u#L%It%&d
& L#*  L#* +,L#S	/D(,#)S),/us{T)/+/S	D(/ #M3:#%+L#)d"+!"NC$J" #M3:# L#*  L#* #&cDj&#'OTD['#(OTD[("'M54<'"%M54<%ADeEl@SOT%[5#66=S'**48O^d*T18 ND Hd( 
 
q qrH   r   N)rX   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   __all__r?   rH   rF   <module>rk      sY      . 3 1 #  ,Qq" Qq Qqh /rH   