
    Z j                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zDiffLlama model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzkajuma/DiffLlama-0.3B-handcut)
checkpointc                     ^  \ rS rSr% SrSrS/rSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\	S '   Sr\\-  S-  \	S!'   Sr\\	S"'   S#r \\-  S-  \	S$'   S%r!\S-  \	S&'   Sr"\S-  \	S''   U 4S( jr#S)r$U =r%$ )*DiffLlamaConfig   a  
lambda_std_dev (`float`, *optional*, defaults to 0.1):
    The standard deviation for initialization of parameter lambda in attention layer.

```python
>>> from transformers import DiffLlamaModel, DiffLlamaConfig

>>> # Initializing a DiffLlama diffllama-7b style configuration
>>> configuration = DiffLlamaConfig()

>>> # Initializing a model from the diffllama-7b style configuration
>>> model = DiffLlamaModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
	diffllamapast_key_valuesi }  
vocab_sizei   hidden_sizei    intermediate_size   num_hidden_layers    num_attention_headsNnum_key_value_headssilu
hidden_actmax_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg?lambda_std_devhead_dimc                    > U R                   c  U R                  U l         U R                  b  U R                  OU R                  U R                  -  U l        [        TU ]  " S0 UD6  g )N )r   r   r&   r   super__post_init__)selfkwargs	__class__s     چ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/diffllama/configuration_diffllama.pyr*   DiffLlamaConfig.__post_init__G   sU    ##+'+'?'?D$)-)BHXHX\`\t\tHt''    )r&   r   )&__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r    listr!   r"   r   dictr#   r$   r%   r&   r*   __static_attributes____classcell__)r-   s   @r.   r
   r
      s&   $ J#4"5JK!s!s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)/#&NEDL&HcDj( (r0   r
   N)r5   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r(   r0   r.   <module>rF      sK   " $ . 3 1 # :;1(& 1(  <1(h 
r0   