
    Z j%                     2   S SK r S SKJr  S SK Jr  SSKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJrJrJrJrJrJ r J!r!J"r"  SS0r#Sr$\RJ                  " \&5      r'\" SS9\ " S S\5      5       5       r( " S S\RR                  5      r* " S S\RV                  5      r, " S S\5      r- " S S\"5      r. " S S \5      r/ " S! S"\!5      r0 " S# S$\ 5      r1 " S% S&\5      r2 " S' S(\5      r3 " S) S*\5      r4/ S+Qr5g),    N)strict)nn   )initialization)CacheDynamicCache)PreTrainedConfig)create_causal_mask)BaseModelOutputWithPast)RopeParameters)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding
vocab_fileztokenizer.modelu   ▁zgoogle/gemma-7b)
checkpointc                      \ rS rSr% SrSrS/rSSSSSSSS.rS/S	/4S
S/S
/4S
/S
/4S.rSr	\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\
\S'   Sr\\S'   Sr\
\S'   Sr\\S'   S r\\S!'   S"r\\S#'   S$r\
S%-  \S&'   S'r\
\\
   -  S%-  \S('   S)r\
S%-  \S*'   S"r\\S+'   S%r\ \!-  S%-  \S,'   S-r"\\S.'   S/r#\\
-  \S0'   S%r$\S%-  \S1'   S2r%g%)3GemmaConfig1   a  
use_bidirectional_attention (`bool`, *optional*):
    If True, the model will attend to all text tokens instead of using a causal mask.

```python
>>> from transformers import GemmaModel, GemmaConfig
>>> # Initializing a Gemma gemma-7b style configuration
>>> configuration = GemmaConfig()
>>> # Initializing a model from the gemma-7b style configuration
>>> model = GemmaModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi  
vocab_sizei   hidden_sizei `  intermediate_size   num_hidden_layers   num_attention_headsnum_key_value_heads   head_dimgelu_pytorch_tanh
hidden_acti    max_position_embeddingsg{Gz?initializer_rangeư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_idr   bos_token_idtie_word_embeddingsrope_parametersFattention_biasg        attention_dropoutuse_bidirectional_attention )&__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr+   int__annotations__r,   r-   r/   r1   r2   r4   r6   strr7   r8   floatr:   r;   boolr<   r>   listr?   r@   rA   r   dictrB   rC   rD   __static_attributes__rE       x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/gemma/modular_gemma.pyr   r   1   sd    J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!!!Hc)J)#'S'#u#L%It L#* +,L#S	/D(, L#*  $$48O^d*T18 ND %(us{(/33rW   r   c            	       l   ^  \ rS rSrSrSS\S\S\S\4U 4S jjjrS\R                  4U 4S	 jjr
S
rU =r$ )GemmaTextScaledWordEmbeddingi   zT
This module overrides nn.Embeddings' forward by multiplying with embeddings scale.
num_embeddingsembedding_dimpadding_idxembed_scalec                 |   > [         TU ]  XU5        X@l        U R                  S[        R
                  " U5      SS9  g )Nr_   F)
persistent)super__init__scalar_embed_scaleregister_buffertorchtensor)selfr\   r]   r^   r_   	__class__s        rX   rc   %GemmaTextScaledWordEmbedding.__init__n   s7    D"-]ELL,ERWXrW   r$   c                    > [         TU ]  U5      U R                  R                  U R                  R
                  5      -  $ N)rb   forwardr_   toweightdtype)rh   r$   ri   s     rX   rm   $GemmaTextScaledWordEmbedding.forwards   s2    wy)D,<,<,?,?@Q@Q,RRRrW   )rd   )      ?)rF   rG   rH   rI   rJ   rO   rR   rc   rf   Tensorrm   rV   __classcell__ri   s   @rX   rZ   rZ   i   sM    Ys Y3 YS Y_d Y Y
S S SrW   rZ   c                   J   ^  \ rS rSrS	S\S\4U 4S jjjrS rS rS r	Sr
U =r$ )
GemmaRMSNormw   dimepsc                    > [         TU ]  5         X l        [        R                  " [
        R                  " U5      5      U l        g rl   )rb   rc   rz   r   	Parameterrf   zerosro   )rh   ry   rz   ri   s      rX   rc   GemmaRMSNorm.__init__x   s,    ll5;;s#34rW   c                     U[         R                  " UR                  S5      R                  SSS9U R                  -   5      -  $ )Nr   T)keepdim)rf   rsqrtpowmeanrz   )rh   xs     rX   _normGemmaRMSNorm._norm}   s4    5;;quuQx}}R}>IJJJrW   c                     U R                  UR                  5       5      nUSU R                  R                  5       -   -  nUR                  U5      $ )Nrr   )r   rR   ro   type_as)rh   r   outputs      rX   rm   GemmaRMSNorm.forward   sC    AGGI& 3!2!2!445~~a  rW   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tuplero   shaperz   )rh   s    rX   
extra_reprGemmaRMSNorm.extra_repr   s'    ))*+6$((<<rW   )rz   ro   )r9   )rF   rG   rH   rI   rO   rR   rc   r   rm   r   rV   rt   ru   s   @rX   rw   rw   w   s0    5C 5e 5 5
K!= =rW   rw   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )GemmaMLP   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NF)bias)	rb   rc   r   Linearr,   r-   	gate_projup_proj	down_projrh   configri   s     rX   rc   GemmaMLP.__init__   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXrW   )r   r   r   )rF   rG   rH   rI   rc   rV   rt   ru   s   @rX   r   r      s    Y YrW   r   c                       \ rS rSrSrg)GemmaRotaryEmbedding   rE   NrF   rG   rH   rI   rV   rE   rW   rX   r   r          rW   r   c                   8   ^  \ rS rSrSrS\S\4U 4S jjrSrU =r	$ )GemmaAttention   z=Multi-headed attention from 'Attention Is All You Need' paperr   	layer_idxc                 P   > [         TU ]  5         [        USS5      (       + U l        g )NrD   F)rb   rc   getattr	is_causal)rh   r   r   ri   s      rX   rc   GemmaAttention.__init__   s"    $V-JERRrW   )r   )
rF   rG   rH   rI   rJ   r   rO   rc   rV   rt   ru   s   @rX   r   r      s"    GS{ Ss S SrW   r   c                   B    \ rS rSr\R
                  " 5       S 5       rSrg)GemmaPreTrainedModel   c                 (   [         R                  " X5        SUR                  R                  ;   a!  [        R
                  " UR                  5        g [        U[        5      (       a,  [        R                  " UR                  UR                  5        g g )NRMSNorm)r   _init_weightsri   rF   initzeros_ro   
isinstancerZ   	constant_r_   rd   )rh   modules     rX   r   "GemmaPreTrainedModel._init_weights   sa    %%d3((111KK& <==NN6--v/H/HI >rW   rE   N)rF   rG   rH   rI   rf   no_gradr   rV   rE   rW   rX   r   r      s    
]]_J JrW   r   c                      ^  \ rS rSrS\4U 4S jjr      SS\R                  S-  S\R                  S-  S\R                  S-  S\	S-  S	\R                  S-  S
\S-  S\\   S\4S jjrSrU =r$ )
GemmaModel   r   c                    > [         TU ]  U5        [        UR                  UR                  U R
                  U R                  R                  S-  S9U l        g )Ng      ?)r_   )rb   rc   rZ   r+   r,   r^   r   r(   r   s     rX   rc   GemmaModel.__init__   sK     8v1143C3CQUQ\Q\QhQhjmQm
rW   Nr$   r'   position_idsr!   r%   r;   kwargsreturnc           
      T   US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U R                  UUUUS9n	Un
U R                  XS9nU R                  S U R                  R                    H  nU" U
4U	UUUUS.UD6n
M     U R                  U
5      n
[        U
U(       a  US	9$ S S	9$ )
Nz:You must specify exactly one of input_ids or inputs_embeds)r   r   r=   )device)r   r%   r'   r!   r   )r   )r'   r   r!   r;   position_embeddings)last_hidden_stater!   )
ValueErrorr(   r   r   get_seq_lengthrf   aranger   r   	unsqueezer
   
rotary_embr)   r/   r*   r   )rh   r$   r'   r   r!   r%   r;   r   past_seen_tokenscausal_maskr&   r   decoder_layers                rX   rm   GemmaModel.forward   sU    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 &"oomoW![[)H4;;+H+HIM)*) /#$7 M J 		-0&+/8O
 	
>B
 	
rW   )r(   )NNNNNN)rF   rG   rH   rI   r   rc   rf   
LongTensorrs   r   FloatTensorrS   r   r   r   rm   rV   rt   ru   s   @rX   r   r      s    
{ 
 .2.204(,26!%2
##d*2
 t+2
 &&-	2

 2
 ((4/2
 $;2
 +,2
 
!2
 2
rW   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )GemmaForCausalLM   c                  8   > [        5       R                  " S0 U D6$ )a  
Example:

```python
>>> from transformers import AutoTokenizer, GemmaForCausalLM

>>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
>>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

>>> prompt = "What is your favorite condiment?"
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"What is your favorite condiment?"
```rE   )rb   rm   )super_kwargsri   s    rX   rm   GemmaForCausalLM.forward   s    $ w...rW   rE   )rF   rG   rH   rI   rm   rV   rt   ru   s   @rX   r   r      s    / /rW   r   c                       \ rS rSrSrg)GemmaForSequenceClassification   rE   Nr   rE   rW   rX   r   r      r   rW   r   c                       \ rS rSrSrg)GemmaForTokenClassificationi  rE   Nr   rE   rW   rX   r   r     r   rW   r   )r   r   r   r   r   r   )6rf   huggingface_hub.dataclassesr   r    r   r   cache_utilsr   r   configuration_utilsr	   masking_utilsr
   modeling_outputsr   modeling_rope_utilsr   modeling_utilsr   processing_utilsr   utilsr   r   r   llama.modeling_llamar   r   r   r   r   r   r   r   VOCAB_FILES_NAMESSPIECE_UNDERLINE
get_loggerrF   loggerr   	EmbeddingrZ   Modulerw   r   r   r   r   r   r   r   r   __all__rE   rW   rX   <module>r      s!  "  .  & . 3 / 7 1 - & @ @	 	 	 "#45  			H	% ,-34" 34  .34lS2<< S=299 =(Yx Y	/ 	S^ SJ/ J:
 :
z/' /,	%C 		"= 	rW   