
    Z j(                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zQwen3 model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzQwen/Qwen3-8B)
checkpointc            
          ^  \ rS rSr% SrSrS/rSSSSSSSSSS.	rS	/S
/4SS/S/4S/S/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
S-  \S'   Sr\
\S'   Sr\\S'   Sr\
\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S%r\\S&'   Sr\\-  S-  \S''   S%r\\S('   S%r\\S)'   Sr \
S-  \S*'   S+r!\
\S,'   Sr"\#\   S-  \S-'   S.r$\\
-  \S/'   Sr%\
S-  \S0'   Sr&\
S-  \S1'   Sr'\
\#\
   -  S-  \S2'   U 4S3 jr(S4r)U =r*$ )5Qwen3Config   aH  
```python
>>> from transformers import Qwen3Model, Qwen3Config

>>> # Initializing a Qwen3 style configuration
>>> configuration = Qwen3Config()

>>> # Initializing a model from the Qwen3-8B style configuration
>>> model = Qwen3Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
qwen3past_key_valuescolwisereplicated_with_grad_allreducerowwise)	zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormiQ 
vocab_sizei   hidden_sizei V  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_heads   head_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cacheFtie_word_embeddingsrope_parametersattention_biasuse_sliding_windowsliding_window   max_window_layerslayer_typesg        attention_dropoutpad_token_idbos_token_ideos_token_idc                 `  > U R                   (       a  U R                  OS U l        U R                  c  U R                  U l        U R                  cI  [        U R                  5       Vs/ s H#  nU R                  b  X R                  :  a  SOSPM%     snU l        [        TU ]$  " S0 UD6  g s  snf )Nsliding_attentionfull_attention )
r*   r+   r   r   r.   ranger   r-   super__post_init__)selfkwargsi	__class__s      ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/qwen3/configuration_qwen3.pyr9   Qwen3Config.__post_init__V   s    595L5Ld11RV##+'+'?'?D$#
 t556	  7A &&2q<R<R7R $%& 7	 D 	'' s   (*B+)r.   r   r+   )+__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r    r"   strr#   r$   floatr%   r&   boolr'   r(   r   dictr)   r*   r+   r-   r.   listr/   r0   r1   r2   r9   __static_attributes____classcell__)r=   s   @r>   r
   r
      s    J#4"5 &/%.%.%E%E%."+ )"+
 &(9:#%568IJ!"_$56 JK"s"s!!&(t(HcJ#(S(#u#L%It %%48O^d*T18 ND $$!%NC$J%s$(KcT!(%(us{(#L#*##L#*#+/L#S	/D(/( (    r
   N)rD   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r6   rR   r>   <module>rX      sL      . 3 1 # ?+I(" I(  ,I(X /rR   