
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)z&Funnel Transformer model configuration    )strict   )PreTrainedConfig)auto_docstringzfunnel-transformer/small)
checkpointc                     ^  \ rS rSr% SrSrSSS.rSr\\	S'   S	r
\\   \\S
4   -  \	S'   Sr\\   S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S '   S!r\\	S"'   S#r\\	S$'   S%r\\	S&'   S%r \\	S''   S%r!\\	S('   Sr"\S-  \	S)'   S%r#\\	S*'   U 4S+ jr$S, r%\&S- 5       r'\'RP                  S. 5       r'\&S/ 5       r)\)RP                  S0 5       r)S1r*U =r+$ )2FunnelConfig   a  
block_sizes (`list[int]`, *optional*, defaults to `[4, 4, 4]`):
    The sizes of the blocks used in the model.
block_repeats (`list[int]`, *optional*):
    If passed along, each layer of each block is repeated the number of times indicated.
num_decoder_layers (`int`, *optional*, defaults to 2):
    The number of layers in the decoder (when not using the base model).
pooling_type (`str`, *optional*, defaults to `"mean"`):
    Possible values are `"mean"` or `"max"`. The way pooling is performed at the beginning of each block.
attention_type (`str`, *optional*, defaults to `"relative_shift"`):
    Possible values are `"relative_shift"` or `"factorized"`. The former is faster on CPU/GPU while the latter
    is faster on TPU.
separate_cls (`bool`, *optional*, defaults to `True`):
    Whether or not to separate the cls token when applying pooling.
truncate_seq (`bool`, *optional*, defaults to `True`):
    When using `separate_cls`, whether or not to truncate the last token when pooling, to avoid getting a
    sequence length that is not a multiple of 2.
pool_q_only (`bool`, *optional*, defaults to `True`):
    Whether or not to apply the pooling only to the query or to query, key and values for the attention layers.
funneld_modeln_head)hidden_sizenum_attention_headsi:w  
vocab_size)   r   r   .block_sizesNblock_repeats   num_decoder_layersi      @   d_headi   d_innergelu_new
hidden_actg?hidden_dropoutattention_dropoutg        activation_dropoutinitializer_rangeinitializer_stdg&.>layer_norm_epsmeanpooling_typerelative_shiftattention_typeTseparate_clstruncate_seqpool_q_onlypad_token_idtie_word_embeddingsc                    > U R                   c  S/[        U R                  5      -  OU R                   U l         [        TU ]  " S0 UD6  g )N    )r   lenr   super__post_init__)selfkwargs	__class__s     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/funnel/configuration_funnel.pyr0   FunnelConfig.__post_init__K   sA    <@<N<N<VaS3t'7'7#88\`\n\n''    c                    [        U R                  5      [        U R                  5      :w  a  [        S5      eU R                  S;  a  [        SU R                   S35      eU R
                  S;  a  [        SU R
                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.z>`block_sizes` and `block_repeats` should have the same length.)r"   maxzGot z< for `pooling_type` but only 'mean' and 'max' are supported.)r$   
factorizedzO for `attention_type` but only 'relative_shift' and 'factorized' are supported.N)r.   r   r   
ValueErrorr#   r%   r1   s    r4   validate_architecture"FunnelConfig.validate_architectureO   s    t C(:(:$;;]^^ %
 
 tD$5$5#66rstt '
 
 t**++z{ 	
r6   c                 ,    [        U R                  5      $ N)sumr   r;   s    r4   num_hidden_layersFunnelConfig.num_hidden_layers`       4##$$r6   c                     [        S5      e)NzYThis model does not support the setting of `num_hidden_layers`. Please set `block_sizes`.NotImplementedErrorr1   values     r4   rA   rB   d   s    !g
 	
r6   c                 ,    [        U R                  5      $ r?   )r.   r   r;   s    r4   
num_blocksFunnelConfig.num_blocksj   rC   r6   c                     [        S5      e)NzRThis model does not support the setting of `num_blocks`. Please set `block_sizes`.rE   rG   s     r4   rJ   rK   n   s    !"vwwr6   )r   ),__name__
__module____qualname____firstlineno____doc__
model_typeattribute_mapr   int__annotations__r   listtupler   r   r   r   r   r   r   strr   floatr   r   r   r    r!   r#   r%   r&   boolr'   r(   r)   r*   r0   r<   propertyrA   setterrJ   __static_attributes____classcell__)r3   s   @r4   r	   r	      s   * J 'M
 J/8KcU38_,8&*M49t#*GSFCFCGS J "%NECK%%(us{(&))"u"$(OUT\( NE L#*NC*L$L$K#L#*# $$(" % % 
 

 % % x xr6   r	   N)	rQ   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r-   r6   r4   <module>rc      sN    - . 3 # 56Xx# Xx  7Xxv 
r6   