
    Z j                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)z$GraniteMoeShared model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringz!ibm-granite/granite-speech-3.2-8b)
checkpointc                   6  ^  \ rS rSr% SrSrS/rSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\	S '   Sr\\-  S-  \	S!'   Sr\\	S"'   S#r \\-  S-  \	S$'   S%r!\\-  S-  \	S&'   S%r"\\-  S-  \	S''   S%r#\\-  S-  \	S('   S%r$\\-  S-  \	S)'   S*r%\S-  \	S+'   Sr&\S-  \	S,'   Sr'\S-  \	S-'   S.r(\S-  \	S/'   S0r)\\	S1'   U 4S2 jr*S3r+U =r,$ )4GraniteMoeSharedConfig   aR  
embedding_multiplier (`float`, *optional*, defaults to 1.0):
    embedding multiplier
logits_scaling (`float`, *optional*, defaults to 1.0):
    divisor for output logits
residual_multiplier (`float`, *optional*, defaults to 1.0):
    residual multiplier
attention_multiplier (`float`, *optional*, defaults to 1.0):
    attention multiplier
shared_intermediate_size (`int`, *optional*, defaults to 1024):
    intermediate size for shared experts.

```python
>>> from transformers import GraniteMoeSharedModel, GraniteMoeSharedConfig

>>> # Initializing a GraniteMoeShared granitemoe-3b style configuration
>>> configuration = GraniteMoeSharedConfig()

>>> # Initializing a model from the granitemoe-7b style configuration
>>> model = GraniteMoeSharedModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
granitemoesharedpast_key_valuesi }  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg      ?embedding_multiplierlogits_scalingresidual_multiplierattention_multiplier   num_local_expertsnum_experts_per_tokoutput_router_logitsgMbP?router_aux_loss_coefr   shared_intermediate_sizec                 b   > U R                   c  U R                  U l         [        TU ]  " S0 UD6  g )N )r   r   super__post_init__)selfkwargs	__class__s     ڔ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/granitemoeshared/configuration_granitemoeshared.pyr1   $GraniteMoeSharedConfig.__post_init__X   s-    ##+'+'?'?D$''    )r   )-__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r   listr    r!   r   dictr"   r#   r$   r%   r&   r'   r)   r*   r+   r,   r-   r1   __static_attributes____classcell__)r4   s   @r5   r
   r
      s   4 $J#4"5JK"s"s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)//2%#+,2),NECK$&,.1t+1/2%#+,2$%sTz%&'t'(-$+-).%$,.$%c%( (r7   r
   N)r<   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r/   r7   r5   <module>rM      sL   & + . 3 1 # >?>(- >(  @>(B $
$r7   