
    Z j>                     `    S SK Jr  SSKJr  SSKJr  \" SS9\ " S S\5      5       5       rS/rg	)
    )strict   )PreTrainedConfig)auto_docstringzfacebook/cwm)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSSSSS.rS/S	/4S
S/S
/4S
/S
/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\\S'   Sr\
\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\
S#-  \S$'   S%r\
\S&'   S#r\
\\
   -  S#-  \S''   S(r\
\S)'   S*r\\S+'   S#r\ S#-  \S,'   S-r!\\
-  \S.'   S*r"\\S/'   S0r#\
\S1'   S2r$S3r%\
\S4'   S#r&\\   S#-  \S5'   U 4S6 jr'S7 r(S8r)U =r*$ )9	CwmConfig   aB  
```python
>>> from transformers import CwmModel, CwmConfig

>>> # Initializing a Cwm cwm-7b style configuration
>>> configuration = CwmConfig()

>>> # Initializing a model from the cwm-7b style configuration
>>> model = CwmModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```cwmpast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei T  intermediate_size@   num_hidden_layers0   num_attention_heads   num_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_idi  bos_token_ideos_token_id   pretraining_tpFtie_word_embeddingsrope_parametersg        attention_dropoutmlp_bias   head_dim    .A    sliding_windowlayer_typesc                 R  > U R                   c  SSSSSSS.U l         U R                  c7  Sn[        U R                  5       Vs/ s H  nX2-  S	:X  a  S
OSPM     snU l        U R                  (       a  [        U R                  5      OS U l        [        U R                  5      U l        U R                  b  U R                  O/ SQU l        U R                  c  U R                  U R                  -  U l        U R                  c  U R                  U l        [        TU ]4  " S0 UD6  g s  snf )Nr0   g      0@g      @g      ?r1   llama3)
rope_thetafactorhigh_freq_factorlow_freq_factor original_max_position_embeddings	rope_type   r   full_attentionsliding_attention)i i i	  )r+   r3   ranger   r2   intlistr'   r/   r   r   r   super__post_init__)selfkwargswindow_patterni	__class__s       z/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/cwm/configuration_cwm.pyrD   CwmConfig.__post_init__W   s   ')$'#&48%$D  #N t556 6A '(&8A&=!DWW6 D
 ;?:M:Mc$"5"56SW 0 01151B1B1ND--Tl==  ,,0H0HHDM##+'+'?'?D$'' s   D$c                     U R                   U R                  -  S:w  a&  [        SU R                    SU R                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)rE   s    rJ   validate_architectureCwmConfig.validate_architecturet   sS    d666!;#D$4$4#5 622327  <    )r'   r/   r3   r   r+   r2   )+__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   rA   __annotations__r   r   r   r   r   r    strr!   r"   floatr#   r$   boolr%   r&   r'   rB   r)   r*   r+   dictr,   r-   r/   default_thetar2   r3   rD   rN   __static_attributes____classcell__)rI   s   @rJ   r	   r	      s    J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!  J#)S)#u#L%It#L#*#L#+/L#S	/D(/NC %%#'OTD['%(us{(HdHcMNC$(KcT!((: rP   r	   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r?   rP   rJ   <module>rf      sF   , / 3 # >*\  \  +\~ -rP   