
    Z j                         S SK r S SKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
JrJrJrJr  \" S	S
9\ " S S\5      5       5       r " S S\5      r " S S\5      r " S S\
5      r " S S\5      r " S S\5      r/ SQrg)    N)strict   )Cache)auto_docstring   )Gemma2Config)Gemma2AttentionGemma2DecoderLayerGemma2ForCausalLM	Gemma2MLPGemma2RMSNormzgoogle/vaultgemma-1b)
checkpointc                   &    \ rS rSrSr\" 5       rSrg)VaultGemmaConfig   a  
query_pre_attn_scalar (`float`, *optional*, defaults to 256):
    scaling factor used on the attention scores
final_logit_softcapping (`float`, *optional*, defaults to 30.0):
    scaling factor when applying tanh softcapping on the logits.
attn_logit_softcapping (`float`, *optional*, defaults to 50.0):
    scaling factor when applying tanh softcapping on the attention scores.

```python
>>> from transformers import VaultGemmaModel, VaultGemmaConfig
>>> # Initializing a VaultGemma vaultgemma-7b style configuration
>>> configuration = VaultGemmaConfig()
>>> # Initializing a model from the vaultgemma-7b style configuration
>>> model = VaultGemmaModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
``` N)__name__
__module____qualname____firstlineno____doc__AttributeErroruse_bidirectional_attention__static_attributes__r       ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/vaultgemma/modular_vaultgemma.pyr   r      s    $ #1"2r   r   c                       \ rS rSrSrg)VaultGemmaRMSNorm1   r   Nr   r   r   r   r   r   r   r   r   r   1       r   r   c                       \ rS rSrSrg)VaultGemmaMLP5   r   Nr    r   r   r   r#   r#   5   r!   r   r#   c                   8   ^  \ rS rSrSrS\S\4U 4S jjrSrU =r	$ )VaultGemmaAttention9   z=Multi-headed attention from 'Attention Is All You Need' paperconfig	layer_idxc                 0   > [         TU ]  5         SU l        g )NT)super__init__	is_causal)selfr(   r)   	__class__s      r   r,   VaultGemmaAttention.__init__<   s    r   )r-   )
r   r   r   r   r   r   intr,   r   __classcell__r/   s   @r   r&   r&   9   s    G/ C  r   r&   c                   *  ^  \ rS rSrU 4S jr   SS\R                  S\\R                  \R                  4   S\R                  S-  S\R                  S-  S\	S-  S	\\R                  \\R                  \R                  4   S-  4   4S
 jjrSrU =r$ )VaultGemmaDecoderLayerA   c                 .   > [         TU ]  " S0 UD6  U ?U ?g )Nr   )r+   r,   post_attention_layernormpost_feedforward_layernorm)r.   super_kwargsr/   s     r   r,   VaultGemmaDecoderLayer.__init__B   s    (<()+r   Nhidden_statesposition_embeddingsattention_maskposition_idspast_key_valuesreturnc           	          UnU R                  U5      nU R                  " SUUUUUS.UD6u  pXq-   nUnU R                  U5      nU R                  U5      nXq-   nU$ )N)r<   r=   r>   r?   r@   r   )input_layernorm	self_attnpre_feedforward_layernormmlp)	r.   r<   r=   r>   r?   r@   kwargsresidual_s	            r   forwardVaultGemmaDecoderLayer.forwardG   s     !,,];>> 
' 3)%+
 
 !0 66}E/ 0r   r   )NNN)r   r   r   r   r,   torchTensortuple
LongTensorr   FloatTensorrJ   r   r2   r3   s   @r   r5   r5   A   s    , /304(,|| #5<<#=> t+	
 &&-  
u  %(9(95;L;L(L"MPT"TT	U r   r5   c                       \ rS rSrSrg)VaultGemmaForCausalLMe   r   Nr    r   r   r   rR   rR   e   r!   r   rR   )r   rR   VaultGemmaModelVaultGemmaPreTrainedModel)rL   huggingface_hub.dataclassesr   cache_utilsr   utilsr   gemma2.configuration_gemma2r   gemma2.modeling_gemma2r	   r
   r   r   r   r   r   r#   r&   r5   rR   __all__r   r   r   <module>r\      s      .   # 6 u u 123| 3  33,	 		I 	/ !/ !H	- 	r   