
    Z j	                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zStableLM model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzstabilityai/stablelm-3b-4e1t)
checkpointc                     ^  \ rS rSr% SrSrS/rSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   S r\\-  \	S!'   S r\\-  \	S"'   S#r \S-  \	S$'   S#r!\\"\   -  S-  \	S%'   Sr#\S-  \	S&'   U 4S' jr$S(r%U =r&$ ))StableLmConfig   a  
use_parallel_residual (`bool`, *optional*, defaults to `False`):
    Whether to use a "parallel" formulation in each Transformer layer, which can provide a slight training
    speedup at large scales.
hidden_dropout (`float`, *optional*, defaults to 0.0):
    The dropout ratio after applying the MLP to the hidden states.

Example:

```python
>>> from transformers import StableLmModel, StableLmConfig

>>> # Initializing a StableLM stablelm-3b style configuration
>>> configuration = StableLmConfig()
```stablelmpast_key_valuesi  
vocab_sizei   intermediate_sizei 
  hidden_size    num_hidden_layersnum_attention_headsnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>layer_norm_epsT	use_cacheFtie_word_embeddingsNrope_parametersuse_qkv_biasqk_layernormuse_parallel_residualg        hidden_dropoutattention_dropoutr   bos_token_ideos_token_idpad_token_idc                 J   > UR                  SS5        [        TU ]  " S0 UD6  g )Npartial_rotary_factorg      ? )
setdefaultsuper__post_init__)selfkwargs	__class__s     ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/stablelm/configuration_stablelm.pyr*   StableLmConfig.__post_init__C   s$    148''    r'   )'__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r   dictr   r   r   r    r!   r"   r#   listr$   r*   __static_attributes____classcell__)r-   s   @r.   r
   r
      s$     J#4"5J!s!Ks!!!!J#'S'#u#"NE"It %%48O^d*T18L$L$"'4'"%NECK%%(us{( L#* +,L#S	/D(,#L#*#( (r0   r
   N)r5   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r'   r0   r.   <module>rF      sK    # . 3 1 # 9:,(% ,(  ;,(^ 
r0   