
    Z jz                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zT5 model configuration    )strict   )PreTrainedConfig)auto_docstringzgoogle-t5/t5-small)
checkpointc                     ^  \ rS rSr% SrSrS/rSSSSS	.rS
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\	-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S r\\
S!'   S r\\
S"'   S#r\	S-  \
S$'   S%r\	\\	   -  S-  \
S&'   S'r\\	-  \
S('   S)r \\
S*'   U 4S+ jr!S, r"S-r#U =r$$ ).T5Config   a  
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
    The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
    The maximum distance of the longer sequences for the bucket separation.
feed_forward_proj (`string`, *optional*, defaults to `"relu"`):
    Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`. T5v1.1 uses the
    `"gated-gelu"` feed forward projection. Original T5 uses `"relu"`.
t5past_key_valuesd_model	num_heads
num_layersd_kv)hidden_sizenum_attention_headsnum_hidden_layershead_dimi}  
vocab_sizei   @   i   d_ff   Nnum_decoder_layers       relative_attention_num_buckets   relative_attention_max_distanceg?dropout_rategư>layer_norm_epsilong      ?initializer_factorrelufeed_forward_projTis_encoder_decoder	use_cacher   pad_token_id   eos_token_idg        classifier_dropoutF
is_decoderc                 L  > U R                   b  U R                   OU R                  U l         U R                  R                  S5      nUS   U l        US   S:H  U l        U R                  S:X  a  SU l        UR                  SS 5      SLU l        S	U l        [        TU ](  " S
0 UD6  g )N-r   gatedz
gated-gelugelu_newtie_word_embeddingsFT )r   r   r#   splitdense_act_fnis_gated_actpopscale_decoder_outputsr0   super__post_init__)selfkwargsact_info	__class__s      x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/t5/configuration_t5.pyr8   T5Config.__post_init__@   s    '+'>'>'JD##PTP_P_ 	 ))//4$RL$QK72 !!\1 *D &,ZZ0Et%LTY%Y"#' ''    c                     U R                   R                  S5      n[        U5      S:  a	  US   S:w  d  [        U5      S:  a  [        SU R                    S35      eg)	zOPart of `@strict`-powered validation. Validates the architecture of the config.r,   r'   r   r.      z`feed_forward_proj`: z is not a valid activation function of the dense layer. Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu'N)r#   r2   len
ValueError)r9   r;   s     r=   validate_architectureT5Config.validate_architectureW   sf    ))//4x=1!!73x=1;L'(>(>'? @) )  <Mr?   )r3   r4   r   r6   r0   )%__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r   r   r   floatr    r!   r#   strr$   boolr%   r&   r(   listr)   r*   r8   rD   __static_attributes____classcell__)r<   s   @r=   r	   r	      s    J#4"5 *)	M JGSD#ND#J%)d
)Is*,"C,+.#S. #L%#+# $$ ###s###It L#* +,L#S	/D(,&))J(. r?   r	   N)	rJ   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r1   r?   r=   <module>rZ      sJ     . 3 # /0G G  1GT ,r?   