
    Z j!                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzUsefulSensors/moonshine-tiny)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSS	.rS
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\	S-  \
S'   Sr\	S-  \
S'   Sr\\
S'   Sr\\
S'   Sr\	\
S'   Sr\\
S'   Sr\	\
S'   S r\\
S!'   Sr\\-  S-  \
S"'   S r \\
S#'   S$r!\\
S%'   S&r"\\	-  \
S''   Sr#\	S-  \
S('   S)r$\	\%\	   -  S-  \
S*'   Sr&\	S-  \
S+'   S r'\\
S,'   U 4S- jr(S.r)U =r*$ )/MoonshineConfig   a	  
encoder_num_key_value_heads (`int`, *optional*):
    This is the number of key_value heads that should be used to implement Grouped Query Attention. If
    `encoder_num_key_value_heads=encoder_num_attention_heads`, the model will use Multi Head Attention (MHA), if
    `encoder_num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
    converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
    by meanpooling all the original heads within that group. For more details, check out [this
    paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
    `num_attention_heads`.
decoder_num_key_value_heads (`int`, *optional*):
    This is the number of key_value heads that should be used to implement Grouped Query Attention. If
    `decoder_num_key_value_heads=decoder_num_attention_heads`, the model will use Multi Head Attention (MHA), if
    `decoder_num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
    converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
    by meanpooling all the original heads within that group. For more details, check out [this
    paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
    `decoder_num_attention_heads`.
pad_head_dim_to_multiple_of (`int`, *optional*):
    Pad head dimension in encoder and decoder to the next multiple of this value. Necessary for using certain
    optimized attention implementations.
encoder_hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
    The non-linear activation function (function or string) in the encoder.
decoder_hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
    The non-linear activation function (function or string) in the decoder.

Example:

```python
>>> from transformers import MoonshineModel, MoonshineConfig

>>> # Initializing a Moonshine style configuration
>>> configuration = MoonshineConfig().from_pretrained("UsefulSensors/moonshine-tiny")

>>> # Initializing a model from the configuration
>>> model = MoonshineModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```	moonshinepast_key_valuesdecoder_num_key_value_headsdecoder_num_attention_headsdecoder_num_hidden_layersdecoder_hidden_act)num_key_value_headsnum_attention_headsnum_hidden_layers
hidden_acti   
vocab_sizei   hidden_sizei  intermediate_size   encoder_num_hidden_layers   encoder_num_attention_headsNencoder_num_key_value_headspad_head_dim_to_multiple_ofgeluencoder_hidden_actsilui   max_position_embeddingsg{Gz?initializer_range   decoder_start_token_idT	use_cacherope_parametersis_encoder_decoderFattention_biasg        attention_dropoutbos_token_id   eos_token_idpad_token_idtie_word_embeddingsc                    > U R                   c  U R                  U l         U R                  c  U R                  U l        UR	                  SS5        [
        TU ]  " S0 UD6  g )Npartial_rotary_factorg? )r   r   r   r   
setdefaultsuper__post_init__)selfkwargs	__class__s     چ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/moonshine/configuration_moonshine.pyr5   MoonshineConfig.__post_init__i   sX    ++3/3/O/OD,++3/3/O/OD,137''    )r   r   )+__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   r    strr   r"   r#   floatr%   r&   boolr'   r   dictr(   r)   r*   r+   r-   listr.   r/   r5   __static_attributes____classcell__)r8   s   @r9   r
   r
      sg   &P J#4"5<<8*	M JK!s!%&s&%&s&'(('((.2t2.2t2.2t2$$$$#&S&#u#"#C#It48O^d*T18## ND %(us{( L#* +,L#S	/D(,#L#*# $$( (r;   r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r2   r;   r9   <module>rR      sK   * / 3 1 # 9:S(& S(  ;S(l 
r;   