
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zUMT5 model configuration    )strict   )PreTrainedConfig)auto_docstringzgoogle/umt5-small)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSS	.rS
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\	-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S r\\
S!'   S r\\
S"'   S#r\	S-  \
S$'   S%r\	\\	   -  S-  \
S&'   S#r\	S-  \
S''   S(r \\	-  \
S)'   S*r!\\
S+'   U 4S, jr"S- r#S.r$U =r%$ )/
UMT5Config   a  
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
    The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
    The maximum distance of the longer sequences for the bucket separation.
feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
    Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
umt5past_key_valuesd_model	num_heads
num_layersd_kv)hidden_sizenum_attention_headsnum_hidden_layershead_dimi  
vocab_sizei   @   i   d_ff   Nnum_decoder_layers       relative_attention_num_buckets   relative_attention_max_distanceg?dropout_rategư>layer_norm_epsilong      ?initializer_factor
gated-gelufeed_forward_projTis_encoder_decoder	use_cacher   pad_token_id   eos_token_iddecoder_start_token_idg        classifier_dropoutF
is_decoderc                 >  > U R                   b  U R                   OU R                  U l         U R                  R                  S5      nUS   U l        US   S:H  U l        U R                  S:X  a  SU l        UR                  SS 5        SU l        [        TU ]$  " S	0 UD6  g )
N-r   gatedr"   gelu_newtie_word_embeddingsT )
r   r   r#   splitdense_act_fnis_gated_actpopr1   super__post_init__)selfkwargsact_info	__class__s      |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/umt5/configuration_umt5.pyr8   UMT5Config.__post_init__@   s    '+'>'>'JD##PTP_P_ 	 ))//4$RL$QK72!!\1 *D

($/#' ''    c                     U R                   R                  S5      n[        U5      S:  a	  US   S:w  d  [        U5      S:  a  [        SU R                    S35      eg)	zOPart of `@strict`-powered validation. Validates the architecture of the config.r-   r'   r   r/      z`feed_forward_proj`: z is not a valid activation function of the dense layer. Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu'N)r#   r3   len
ValueError)r9   r;   s     r=   validate_architecture UMT5Config.validate_architectureP   sf    ))//4x=1!!73x=1;L'(>(>'? @) )  <Mr?   )r4   r5   r   r1   )&__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   floatr    r!   r#   strr$   boolr%   r&   r(   listr)   r*   r+   r8   rD   __static_attributes____classcell__)r<   s   @r=   r	   r	      s-    J#4"5 *)	M JGSD#ND#J%)d
)Is*,"C,+.#S. #L%#+# $$ ##)s)##It L#* +,L#S	/D(,)*C$J*&))J(  r?   r	   N)	rJ   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r2   r?   r=   <module>rZ      sJ     . 3 # ./@! @  0@F .r?   