
    Z jE                         S SK Jr  S SKJr  SSKJr  SSKJr  SSKJ	r	  \	" SS9\ " S	 S
\5      5       5       r
\	" SS9\ " S S\5      5       5       rSS
/rg)    )Any)strict   )PreTrainedConfig)RopeParameters)auto_docstringzgoogle/t5_gemma_module-7b)
checkpointc                   *  ^  \ rS rSr% SrSrS/rSSSSSSSS.rS/S	/4S
S/S
/4S
/S
/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\\S'   Sr\
\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S%r\
S&-  \S''   S(r\
\\
   -  S&-  \S)'   S*r\
S&-  \S+'   S#r\\S,'   S&r\ \!-  S&-  \S-'   S.r"\\S/'   S0r#\
\-  S&-  \S1'   Sr$\
\S2'   S3r%\
S&-  \S4'   S&r&\\   S&-  \S5'   S6r'\S&-  \S7'   S8r(\S&-  \S9'   S.r)\\S:'   U 4S; jr*S< r+S=r,U =r-$ )>T5GemmaModuleConfig   a  
query_pre_attn_scalar (`float`, *optional*, defaults to 256):
    scaling factor used on the attention scores
final_logit_softcapping (`float`, *optional*, defaults to 30.0):
    scaling factor when applying tanh softcapping on the logits.
attn_logit_softcapping (`float`, *optional*, defaults to 50.0):
    scaling factor when applying tanh softcapping on the attention scores.

```python
>>> from transformers import T5GemmaModuleModel, T5GemmaModuleConfig
>>> # Initializing a T5GemmaModule t5_gemma_module-7b style configuration
>>> configuration = T5GemmaModuleConfig()
>>> # Initializing a model from the t5_gemma_module-7b style configuration
>>> model = T5GemmaModuleModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```t5_gemma_modulepast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnorm  
vocab_sizei 	  hidden_sizei $  intermediate_size   num_hidden_layers   num_attention_heads   num_key_value_heads   head_dimgelu_pytorch_tanhhidden_activationi    max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_id   bos_token_idtie_word_embeddingsrope_parametersFattention_bias        attention_dropoutquery_pre_attn_scalari   sliding_windowlayer_typesg      >@final_logit_softcappingg      I@attn_logit_softcapping
is_decoderc                    > U R                   cC  [        U R                  5       Vs/ s H  n[        US-   S-  5      (       a  SOSPM     snU l         [        TU ]  " S0 UD6  g s  snf )Nr+   r-   sliding_attentionfull_attention )r6   ranger   boolsuper__post_init__)selfkwargsi	__class__s      ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/t5gemma/configuration_t5gemma.pyrA   !T5GemmaModuleConfig.__post_init___   si    #X]^b^t^tXu XuSTtQUaK'8'8#>NNXu D 	''	 s   $A#c                     U R                   U R                  -  S:w  a&  [        SU R                    SU R                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)rB   s    rF   validate_architecture)T5GemmaModuleConfig.validate_architectureg   sS    d666!;#D$4$4#5 622327  <    )r6   ).__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r!   r#   r%   strr&   r'   floatr(   r)   r?   r*   r,   listr.   r/   r0   r   dictr1   r3   r4   r5   r6   r7   r8   r9   rA   rJ   __static_attributes____classcell__rE   s   @rF   r   r      s   $ #J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK!s!s    Hc0s0#'S'#u#L%It L#* +,L#S	/D(, L#*  $$48O^d*T18 ND ,/sU{T)/!$3$!%NC$J%$(KcT!(,0UT\0+/EDL/J( rL   r   c                      ^  \ rS rSr% SrSrS/r\\S.rSr	\\
\\4   -  S-  \S'   Sr\\
\\4   -  S-  \S'   S	r\\S
'   Sr\\-  \S'   Sr\\-  \S'   Sr\\-  \S'   S	r\\S'   Sr\\S'   U 4S jrSrU =r$ )T5GemmaConfigp   a  
encoder (`Union[T5GemmaModuleConfig, dict]`, optional, *optional*):
    Configuration for the encoder.
decoder (`Union[T5GemmaModuleConfig, dict]`, optional, *optional*):
    Configuration for the decoder.

Example:

```python
>>> from transformers import T5GemmaConfig, T5GemmaModel
>>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
>>> model = T5GemmaModel(t5gemma_config)
```t5gemmar   )encoderdecoderNrc   rd   Tis_encoder_decoderr2   dropout_rateclassifier_dropout_rater3   r/   r   r   c                   > [        U R                  [        5      (       a  [        S0 U R                  D6U l        OU R                  c  [        5       U l        [        U R                  [        5      (       a  [        S0 U R                  D6U l        OU R                  c  [        5       U l        SU R                  l        U R                  U R                  l        U R                  U R                  l        SU R                  l        SU R                  l        U R                  U R                  l        U R                  U R                  l        U R                  R                  U R                  l
        UR                  SU R                  R                  5      U l        S H"  nX!;  d  M
  [        U R                  U5      X'   M$     [        TU ]<  " S0 UD6  g )NFTr'   )r.   r*   r,   r=   )
isinstancerc   r[   r   rd   r9   rf   r3   r)   r   cross_attention_hidden_sizepopr'   getattrr@   rA   )rB   rC   special_token_keyrE   s      rF   rA   T5GemmaConfig.__post_init__   sK   dllD)).>>DL\\!.0DLdllD)).>>DL\\!.0DL"'$($5$5!)-)?)?&"&!%$($5$5!)-)?)?&37<<3K3K0!',?A_A_!`!Q .,3DLLBS,T) "R 	''rL   )rd   rc   r'   )rM   rN   rO   rP   rQ   rR   rS   r   sub_configsrc   r[   r   rW   rd   re   r?   rf   rV   rY   rg   r3   r/   r   rA   r\   r]   r^   s   @rF   r`   r`   p   s     J#4"51>QRK;?G 4S>1D8?;?G 4S>1D8?## #L#+#+.S5[.%(us{( $$J( (rL   r`   N)typingr   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   r`   __all__r=   rL   rF   <module>rv      s|   *  . 3 1 # 67M* M  8M` 677($ 7(  87(t 1
2rL   