
    Z j                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzallenai/Olmo-3-7B-Instruct)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSSSSS	.rS
/S/4SS/S/4S/S/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
S-  \S'   Sr\\S'   Sr\
\S'   Sr\\S'   S r\\S!'   S"r\
S-  \S#'   Sr\
S-  \S$'   S%r\
\\
   -  S-  \S&'   S'r\\S('   Sr\\-  S-  \S)'   S'r \\S*'   S+r!\\
-  \S,'   S-r"\\S.'   Sr#\
S-  \S/'   Sr$\\   S-  \S0'   U 4S1 jr%S2r&U =r'$ )3Olmo3Config   aU  
Example:

```python
>>> from transformers import Olmo3Model, Olmo3Config

>>> # Initializing a Olmo3 7B style configuration
>>> configuration = Olmo3Config()

>>> # Initializing a model from the Olmo3 7B style configuration
>>> model = Olmo3Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
olmo3past_key_valuescolwise_gather_outputrowwise_split_inputcolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangeT	use_cache   pad_token_idbos_token_idig  eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutgh㈵>rms_norm_epssliding_windowlayer_typesc                 2  > U R                   c  U R                  U l         U R                  c8  [        U R                  5       Vs/ s H  o"S-   S-  S:w  a  SOSPM     snU l        U R                   c  U R                  U l         [
        TU ]  " S0 UD6  g s  snf )Nr%      r   sliding_attentionfull_attention )r   r   r/   ranger   super__post_init__)selfkwargsi	__class__s      ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/olmo3/configuration_olmo3.pyr7   Olmo3Config.__post_init__X   s    ##+'+'?'?D$#W\]a]s]sWt WtRSA{a'7#=MMWt D ##+'+'?'?D$'' s   B)r/   r   )(__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r!   strr"   r#   floatr$   boolr&   r'   r(   listr)   r*   r   dictr+   r,   r-   r.   r/   r7   __static_attributes____classcell__)r;   s   @r<   r
   r
      s{   " J#4"5%<%<%<%:"+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!&*t*J#'S'#u#It L#* #L#*#+0L#S	/D(0 %%48O^d*T18 ND %(us{(L%!%NC$J%$(KcT!(
( 
(    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r4   rP   r<   <module>rV      sJ   * / 3 1 # 78D(" D(  9D(N /rP   