
    Z j                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)z$GraniteMoeHybrid model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz!ibm-granite/granite-speech-3.2-8b)
checkpointc                     ^  \ rS rSr% SrSrSS0rS/rSr\	\
S'   S	r\	\
S
'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\	S-  \
S'   Sr\	S-  \
S'   Sr\	\\	   -  S-  \
S '   S!r\\
S"'   Sr\\-  S-  \
S#'   S!r \\
S$'   S%r!\\	-  S-  \
S&'   S'r"\	\-  S-  \
S('   S'r#\	\-  S-  \
S)'   S'r$\	\-  S-  \
S*'   S'r%\	\-  S-  \
S+'   S,r&\	S-  \
S-'   Sr'\	S-  \
S.'   S!r(\S-  \
S/'   S0r)\S-  \
S1'   S2r*\	\
S3'   Sr+\S-  \
S4'   Sr,\\   S-  \
S'   S5r-\	S-  \
S6'   Sr.\	S-  \
S7'   S8r/\	S-  \
S9'   S:r0\	\-  S-  \
S;'   S<r1\	S-  \
S='   Sr2\	S-  \
S>'   S8r3\	S-  \
S?'   Sr4\S-  \
S@'   S!r5\S-  \
SA'   S0r6\S-  \
SB'   SCr7\S-  \
SD'   S%\" SE5      4r8\\\4   \9\\4   -  S-  \
SF'   U 4SG jr:SH r;SIr<U =r=$ )JGraniteMoeHybridConfig   aW  
embedding_multiplier (`float`, *optional*, defaults to 1.0):
    embedding multiplier.
logits_scaling (`float`, *optional*, defaults to 1.0):
    divisor for output logits.
residual_multiplier (`float`, *optional*, defaults to 1.0):
    residual multiplier.
attention_multiplier (`float`, *optional*, defaults to 1.0):
    attention multiplier.
shared_intermediate_size (`int`, *optional*, defaults to 1024):
    intermediate size for shared experts.
position_embedding_type (`str`, *optional*):
    Positional embedding type to be used; defaults to None. Allowed options: `[None, "rope"]`

Example:

```python
>>> from transformers import GraniteMoeHybridModel, GraniteMoeHybridConfig

>>> # Initializing a GraniteMoeHybrid config
>>> configuration = GraniteMoeHybridConfig()

>>> # Accessing the model configuration
>>> configuration = model.config
```granitemoehybridlayers_block_typelayer_typespast_key_valuesi }  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?embedding_multiplierlogits_scalingresidual_multiplierattention_multiplier   num_local_expertsnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefi   shared_intermediate_sizeposition_embedding_type   mamba_n_headsmamba_n_groups   mamba_d_stateautomamba_d_head   mamba_d_convmamba_expandmamba_chunk_sizemamba_conv_biasmamba_proj_biastime_step_ming?time_step_maxinftime_step_limitc                 p  > U R                   c  U R                  U l         U R                  U R                  -  nU R                  S:X  a  X R
                  -  U l        U R                  b  [        U R                  5      OS U l        U R                  c  S/U R                  -  U l        [        TU ],  " S0 UD6  g )Nr6   mamba )r   r   r:   r   r7   r2   rA   tupler   r   super__post_init__)selfkwargsmamba_intermediate	__class__s      ڔ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/granitemoehybrid/configuration_granitemoehybrid.pyrG   $GraniteMoeHybridConfig.__post_init__c   s    ##+'+'?'?D$!..1A1AA& 26H6H HD>B>R>R>^uT%9%9:dh# 'y4+A+AAD''    c                     U R                   U R                  -  nXR                  -  S:w  a  [        S5      eU R                  U R                  -  U:w  a  [        S5      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   z4mamba_n_heads must divide mamba_expand * hidden_sizezPThe dimensions for the Mamba head state do not match the model intermediate_sizeN)r:   r   r2   
ValueErrorr7   )rH   rJ   s     rL   validate_architecture,GraniteMoeHybridConfig.validate_architectureq   sd     "..1A1AA 2 22a7STTt1115GGopp HrN   )r   r7   r   rA   )>__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapkeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r!   listr"   r#   r   dictr$   r%   r&   r'   r(   r)   r+   r,   r-   r.   r/   r0   r   r2   r3   r5   r7   r9   r:   r;   r<   r=   r>   r?   rA   rE   rG   rQ   __static_attributes____classcell__)rK   s   @rL   r
   r
      s   4 $J(-8M#4"5JK"s"s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)//2#+,2),NC%K$&,.1ut+1/2#+,2$%sTz%&'t'(-$+-).%$,.$(c(*.S4Z.$(KcT!( #M3:#!"NC$J" #M3:#%+L#)d"+ L#*  L#* #&cDj&#'OTD['#(OTD[("'M54<'"%M54<%HKUSX\GZOT%,'%u*==DZ(q qrN   r
   N)rW   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rD   rN   rL   <module>ri      sR    + . 3 1 # >?_q- _q  @_qD $
$rN   