
    Z j                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zPyTorch Phi-MoE model.    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzmicrosoft/Phi-3.5-MoE-instruct)
checkpointc                     ^  \ rS rSr% SrSrS/rSrSr\	\
S'   Sr\	\
S	'   S
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\	S-  \
S'   Sr\	S-  \
S'   Sr\	\\	   -  S-  \
S '   S!r\\
S"'   Sr\\-  S-  \
S#'   Sr \	S-  \
S$'   S%r!\\	-  \
S&'   Sr"\	\
S''   S(r#\	\
S)'   S!r$\\
S*'   S+r%\\
S,'   S-r&\\
S.'   S%r'\\
S/'   S!r(\\
S0'   S!r)\\
S1'   U 4S2 jr*U 4S3 jr+S4r,U =r-$ )5PhimoeConfig   ar  
num_local_experts (`int`, *optional*, defaults to 16):
    Number of experts per Sparse MLP layer.
input_jitter_noise (`float`, *optional*, defaults to 0.0):
    Input jitter noise
lm_head_bias (`bool`, *optional*, defaults to `False`):
    LM head bias

Example:

```python
>>> from transformers import PhimoeModel, PhimoeConfig
>>> # Initializing a Phi-3 style configuration
>>> configuration = PhimoeConfig.from_pretrained("microsoft/Phi-3.5-MoE-instruct")
>>> # Initializing a model from the configuration
>>> model = PhimoeModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```phimoepast_key_valuesg    .Ai@}  
vocab_sizei   hidden_sizei   intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parameterssliding_windowg        attention_dropoutnum_experts_per_tok   num_local_expertsoutput_router_logitsgMbP?router_aux_loss_coefg{Gz?router_jitter_noiseinput_jitter_noiseattention_biaslm_head_biasc                 b   > U R                   c  U R                  U l         [        TU ]  " S0 UD6  g )N )r   r   super__post_init__)selfkwargs	__class__s     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/phimoe/configuration_phimoe.pyr1   PhimoeConfig.__post_init__N   s-    ##+'+'?'?D$''    c                   > [         TU ]  5         U R                  S   S:w  a  SU R                  ;   a  U R                  S   U l        U R                  R	                  SS5      nU R                  R	                  SS5      n[        U[        [        45      (       d  [        SU 35      e[        U[        [        45      (       d  [        SU 35      egg)	z/
Validate the `rope_parameters` configuration.
	rope_typedefault original_max_position_embeddingsshort_mscaleNlong_mscalez=`rope_parameters`'s short_mscale field must be a number, got z<`rope_parameters`'s long_mscale field must be a number, got )	r0   validate_roper"   r;   get
isinstanceintfloat	TypeError)r2   rope_parameters_short_mscalerope_parameters_long_mscaler4   s      r5   r>   PhimoeConfig.validate_ropeS   s     	 ,	91T5I5II8<8L8LMo8p5+/+?+?+C+CNTX+Y(*.*>*>*B*B=RV*W':S%LIISTpSqr  9C<HHRSnRop  I :r7   )r   r;   ).__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencedefault_thetar   rA   __annotations__r   r   r   r   r   r   strr   r   rB   r   r   boolr   r   r    listr!   r"   r   dictr#   r$   r%   r'   r(   r)   r*   r+   r,   r-   r1   r>   __static_attributes____classcell__)r4   s   @r5   r
   r
      se   ( J#4"5MJK!s!s!!  J#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18!%NC$J%%(us{(  s!&$&"'%'!%% ## ND L$(
 r7   r
   N)rK   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r/   r7   r5   <module>r[      sN     . 3 1 # ;<L# L  =L^ 
r7   