
    Z jP"                        S r SSKJr  SSKJr  SSKJrJr  \" 5       (       a  SSKJ	r	J
r
JrJrJrJrJrJr  SrO1SSKJr  \S	   r	\S
   r
\S   r\S   r\S   r\S   rS\S\S\4S jrSr\" SS9\ " S S\5      5       5       rS/rg)zxLSTM configuration.    )strict   )PreTrainedConfig)auto_docstringis_xlstm_available)BackendModeTypeChunkwiseKernelType	DtypeTypeSequenceKernelTypeStepKernelTypeWeightModeTyperound_up_to_next_multiple_ofxLSTMLargeConfigT)Literal)traintrain_with_padding	inference)chunkwise--native_autogradzparallel--native_autograd)float32bfloat16float16native_sequence__nativenative)singlefusedxmultiple_ofreturnc                 .    [        X-   S-
  U-  U-  5      $ )z0Rounds up x to the next multiple of multiple_of.   )int)r   r   s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/xlstm/configuration_xlstm.pyr   r   1   s    Q_q([8KGHH    FzNX-AI/xLSTM-7b)
checkpointc                   b  ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\S-  \S	'   S
r\\S'   Sr\S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   Sr \\S%'   S&r!\"\S''   Sr#\\S('   S)r$\"\S*'   S+r%\\S,'   S#r&\\S-'   S.r'\\S/'   S0r(\\S1'   S2r)\*\S3'   Sr+\\S4'   S5r,\S-  \S6'   S7r-\S-  \S8'   S9r.\\/\   -  S-  \S:'   S;r0\\S<'   U 4S= jr1\2S> 5       r3\2S? 5       r4\2S@ 5       r5\2SA 5       r6SB r7SCr8U =r9$ )DxLSTMConfig8   a~  
num_blocks (int, optional, *optional*, defaults to 32):
    Number of blocks of the xLSTM model, use num_hidden_layers if None.
num_heads (int, optional, *optional*, defaults to 8):
    Number of heads for the xLSTM Layer/Cell.
use_bias (bool, optional, *optional*, defaults to `False`):
    Whether to use biases in the xLSTM model.
norm_reduction_force_float32 (bool, optional, *optional*, defaults to `True`):
    Whether to force the float32 norm reduction op to be done in fp32 precision.
add_out_norm (bool, optional, *optional*, defaults to `True`):
    Whether to add an output norm after the blocks before the LMHead.
qk_dim_factor (float, optional, *optional*, defaults to 0.5):
    Scale factor for the query and key dimension.
v_dim_factor (float, optional, *optional*, defaults to 1.0):
    Scale factor for the value dimension.
chunkwise_kernel (ChunkwiseKernelType, optional, *optional*, defaults to `"chunkwise--native_autograd"`):
    Kernel type for chunkwise processing mode.
sequence_kernel (SequenceKernelType, optional, *optional*, defaults to `"native_sequence__native"`):
    Kernel type for sequence processing mode.
step_kernel (StepKernelType, optional, *optional*, defaults to `"native"`):
    Kernel type for step processing mode.
mode (BackendModeType, optional, *optional*, defaults to `"inference"`):
    Operation mode (inference is needed for generation).
chunk_size (int, optional, *optional*, defaults to 64):
    Internal chunk size.
return_last_states (bool, optional, *optional*, defaults to `True`):
    If to return the last states / cache internally. Needed as True for generation.
autocast_kernel_dtype (DtypeType, optional, *optional*, defaults to `"bfloat16"`):
    Kernel dtype for the states.
inference_state_dtype (DtypeType, optional, *optional*, defaults to `"float32"`):
    Kernel dtype for states in inference.
ffn_proj_factor (float, optional, *optional*, defaults to 2.667):
    Size factor of the post-up projection gated Feed Forward network.
ffn_round_up_to_multiple_of (int, optional, *optional*, defaults to 64):
    Size factor round value of the post-up projection gated Feed Forward network.
gate_soft_cap (float, optional, *optional*, defaults to 15.0):
    Gate soft cap scale.
output_logit_soft_cap (float, optional, *optional*, defaults to 30.0):
    Output logit soft cap scale.
weight_mode (`Literal`, *optional*, defaults to `"single"`):
    Whether parallel linear layers are separated or fused (single).
max_inference_chunksize (int, optional, *optional*, defaults to 16384):
    Limit the chunk size for inference to save memory.

Example:

```python
>>> from transformers import xLSTMConfig, xLSTMModel

>>> # Initializing a xLSTM configuration
>>> configuration = xLSTMConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = xLSTMModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```xlstmi  
vocab_sizei   hidden_sizeNembedding_dim    num_hidden_layers
num_blocks   	num_headsFuse_biasTnorm_reduction_force_float32tie_word_embeddingsadd_out_normgư>norm_epsg      ?qk_dim_factorg      ?v_dim_factorr   chunkwise_kernelr   sequence_kernelr   step_kernelr   mode@   
chunk_sizereturn_last_statesr   autocast_kernel_dtypeepsr   inference_state_dtypegtV@ffn_proj_factorffn_round_up_to_multiple_ofg      .@gate_soft_capg      >@output_logit_soft_capr   weight_mode	use_cacher    pad_token_idr   bos_token_id   eos_token_idi @  max_inference_chunksizec                 v  > U R                   b  U R                   OU R                  U l         U R                  b  U R                  OU R                   U l        U R                  b  U R                  OU R                  U l        U R                  b  U R                  OU R                  U l        [        TU ]  " S0 UD6  g )N )r*   r+   r-   r.   super__post_init__)selfkwargs	__class__s     r"   rP   xLSTMConfig.__post_init__   s    /3/?/?/K4++QUQcQc373E3E3QT//W[WgWg;?;Q;Q;]!7!7cgcrcr-1__-H$//dNdNd''r#   c                 D    [        U R                  U R                  -  SS9$ Nr<   )r   )r   r*   r6   rQ   s    r"   qk_dimxLSTMConfig.qk_dim   s&    +t111
 	
r#   c                 D    [        U R                  U R                  -  SS9$ rV   )r   r*   r7   rW   s    r"   v_dimxLSTMConfig.v_dim   s&    +t000
 	
r#   c                 4    U R                   U R                  -  $ N)rX   r0   rW   s    r"   qk_head_dimxLSTMConfig.qk_head_dim   s    {{dnn,,r#   c                 4    U R                   U R                  -  $ r^   )r[   r0   rW   s    r"   
v_head_dimxLSTMConfig.v_head_dim   s    zzT^^++r#   c                    [         (       GaA  [        S0 SU R                  _SU R                  _SU R                  _SU R
                  _SU R                  _SU R                  _SU R                  _SU R                  _S	U R                  _S
U R                  _SU R                  _SU R                  _SU R                  _SU R                  _SU R                   _SU R"                  _SU R$                  _SU R&                  _SU R(                  _SU R*                  _SU R,                  _SU R.                  _SU R0                  _SU R2                  _6$ U $ )Nr)   r+   r.   r0   r1   r4   r5   r2   r6   r7   r8   r9   r:   r;   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rN   )external_xlstmr   r)   r*   r-   r0   r1   r4   r5   r2   r6   r7   r8   r9   r:   r;   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rW   s    r"   to_xlstm_block_config!xLSTMConfig.to_xlstm_block_config   s   ># ??"..  11 ..	
  "..  .2-N-N #00 ".. "&!6!6 !% 4 4 !,,  YY!"  ??#$ $(#:#:%& '+&@&@'( HH)* '+&@&@+. !% 4 4/0 -1,L,L14 #0056 '+&@&@78 !,,9 > Kr#   )r+   r*   r.   r-   ):__name__
__module____qualname____firstlineno____doc__
model_typer)   r!   __annotations__r*   r+   r-   r.   r0   r1   boolr2   r3   r4   r5   floatr6   r7   r8   r	   r9   r   r:   r   r;   r   r=   r>   r?   r
   r@   rA   rB   rC   rD   rE   rF   r   rG   rH   rI   rK   listrL   rP   propertyrX   r[   r_   rb   rf   __static_attributes____classcell__)rS   s   @r"   r&   r&   8   s   9v JJK $M3:$s!Jd
!IsHd)- $- %%L$HeM5L%,H)H*CO'C"*K*'D/'J##'191C'090"OU"'))M5#'5'"*K*It L#*  L#* +,L#S	/D(,#(S(( 
 
 
 
 - - , ,! !r#   r&   N)rl   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   xlstm.xlstm_large.modelr   r	   r
   r   r   r   r   r   re   typingr   r!   r&   __all__rN   r#   r"   <module>r{      s     . 3 7 	 	 	 NHIO!	% 89I !:;X&N./NI I# I# I N +,]" ]  -]@ /r#   