
    Z j                     l    S SK Jr  SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/r	g
)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzLGAI-EXAONE/EXAONE-4.0-32B)
checkpointc            
         ^  \ rS rSr% SrSrS/rSSSSSSSSSS.	rS	/S
/4SS/S/4S/S/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\\S'   Sr\
\S'   Sr\\S'   Sr\\S'   S r\\S!'   S"r\
S#-  \S$'   S%r\
\\
   -  S#-  \S&'   S#r\
S#-  \S''   S(r\\S)'   S#r\\ -  S#-  \S*'   S+r!\\
-  \S,'   Sr"\
S#-  \S-'   S.r#\\
-  S#-  \S/'   S#r$\\   S#-  \S0'   U 4S1 jr%S2r&U =r'$ )3Exaone4Config   a*  
sliding_window_pattern (`str`, *optional*):
    The pattern to use for sliding window attention. Can be one of:
        - `None`: No sliding window attention is used
        - `int`: Every `sliding_window` layers, use global attention, else use local attention.
        - `str`: A sequence of "L" (local attention) and "G" (global attention) characters that defines the
          attention pattern. The pattern starts from layer 0 and repeats every `sliding_window` layers. The
          final layer always uses global attention regardless of the pattern.
    For instance, sliding_window_pattern="LLLG" same as sliding_window=4, which means:
        - Layer 0, 1, 2: local attention,
        - Layer 3: global attention,
        ...(repeated)

Example:

```python
>>> from transformers import Exaone4Model, Exaone4Config

>>> # Initializing a EXAONE configuration
>>> configuration = Exaone4Config()

>>> # Initializing a model from configuration
>>> model = Exaone4Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```exaone4past_key_valuescolwisereplicated_with_grad_allreducerowwise)	zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei @  intermediate_size    num_hidden_layersnum_attention_headsnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacher   Nbos_token_id   eos_token_idpad_token_idFtie_word_embeddingsrope_parametersg        attention_dropoutsliding_window   sliding_window_patternlayer_typesc                   > U R                   c  SU l        U R                  cR  [        U R                  5       Vs/ s H,  nUS-   U R                  -  S:w  a  X R                  :  a  SOSPM.     snU l        [
        TU ]  " S0 UD6  g s  snf )Nr      sliding_attentionfull_attention )r,   r.   r/   ranger   super__post_init__)selfkwargsi	__class__s      ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/exaone4/configuration_exaone4.pyr7   Exaone4Config.__post_init__d   s    &*+D'#
 t556	  7A Ut::;q@QI_I_E_ $%& 7	 D 	'' s   3B)r/   r.   )(__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r    strr!   r"   floatr#   r$   boolr%   r'   listr(   r)   r*   r   dictr+   r,   r.   r/   r7   __static_attributes____classcell__)r;   s   @r<   r
   r
      s   8 J#4"5 &/%.%.%E%E%."+ )"+
 &(9:#%568IJ!"_$56 JK"s"s!!!!J#'S'#u#L%It L#* +,L#S	/D(,#L#*# %%48O^d*T18%(us{(!%NC$J%/0C#I,0$(KcT!(( (    r
   N)
huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r4   rP   r<   <module>rV      sK   * / 3 1 # 78Q($ Q(  9Q(h 
rP   