
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zmT5 model configuration    )strict   )PreTrainedConfig)auto_docstringzgoogle/mt5-small)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSS	.rS
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\	-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S r\\
S!'   S r\\
S"'   S r\\
S#'   Sr\	S-  \
S$'   S%r\	S-  \
S&'   S'r\	\ \	   -  S-  \
S('   S%r!\	S-  \
S)'   S*r"\\	-  \
S+'   S,r#\\
S-'   U 4S. jr$S/ r%S0r&U =r'$ )1	MT5Config   a  
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
    The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
    The maximum distance of the longer sequences for the bucket separation.
feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
    Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
mt5past_key_valuesd_model	num_heads
num_layersd_kv)hidden_sizenum_attention_headsnum_hidden_layershead_dimi  
vocab_sizei   @   i   d_ff   Nnum_decoder_layers       relative_attention_num_buckets   relative_attention_max_distanceg?dropout_rategư>layer_norm_epsilong      ?initializer_factor
gated-gelufeed_forward_projTis_encoder_decoder	use_cachetie_word_embeddingsbos_token_idr   pad_token_id   eos_token_iddecoder_start_token_idg        classifier_dropoutF
is_decoderc                 >  > U R                   b  U R                   OU R                  U l         U R                  R                  S5      nUS   U l        US   S:H  U l        U R                  S:X  a  SU l        UR                  SS 5        SU l        [        TU ]$  " S	0 UD6  g )
N-r   gatedr"   gelu_newr&   T )
r   r   r#   splitdense_act_fnis_gated_actpopr&   super__post_init__)selfkwargsact_info	__class__s      z/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mt5/configuration_mt5.pyr9   MT5Config.__post_init__B   s    '+'>'>'JD##PTP_P_ 	 ))//4$RL$QK72!!\1 *D 	

($/#' ''    c                     U R                   R                  S5      n[        U5      S:  a	  US   S:w  d  [        U5      S:  a  [        SU R                    S35      eg)	zOPart of `@strict`-powered validation. Validates the architecture of the config.r/   r)   r   r1      z`feed_forward_proj`: z is not a valid activation function of the dense layer. Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu'N)r#   r4   len
ValueError)r:   r<   s     r>   validate_architectureMT5Config.validate_architectureS   sf    ))//4x=1!!73x=1;L'(>(>'? @) )  <Mr@   )r5   r6   r   r&   )(__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   floatr    r!   r#   strr$   boolr%   r&   r'   r(   r*   listr+   r,   r-   r9   rE   __static_attributes____classcell__)r=   s   @r>   r	   r	      sF    J#4"5 *)	M JGSD#ND#J%)d
)Is*,"C,+.#S. #L%#+# $$ ##)s)##It $$#L#*# L#* +,L#S	/D(,)*C$J*&))J("	 	r@   r	   N)	rK   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r3   r@   r>   <module>r[      sJ     . 3 # -.D  D  /DN -r@   