
    Z j
                    x   S SK r S SKJr  S SKJr  S SKrS SKJr  SSKJ	r
  SSKJr  SSKJrJrJrJr  SSKJrJrJr  SS	KJrJr  SS
KJrJrJr  SSKJr  SSKJ r   SSK!J"r"J#r#J$r$J%r%J&r&J'r'J(r(  SSK)J*r*J+r+  SSK,J-r-J.r.  SSK/J0r0  SSK1J2r2J3r3J4r4J5r5  SSK6J7r7J8r8  SSK9J:r:J;r;  SSK<J=r=  SSK>J?r?J@r@JArAJBrB   " S S\R                  5      rD " S S\R                  5      rE " S S\R                  5      rFS rG\" S5      SQS  j5       rHS!\R                  S"\JS#\R                  4S$ jrK   SRS%\R                  S&\R                  S'\R                  S(\R                  S)\R                  S-  S*\L\J-  S+\LS-  S,\LS-  S#\M\R                  \R                  4   4S- jjrN\" \H5       " S. S/\R                  5      5       rO\" \H5       " S0 S1\R                  5      5       rP " S2 S3\ 5      rQ " S4 S5\ 5      rR " S6 S7\R                  5      rS " S8 S9\R                  5      rT " S: S;\R                  5      rU " S< S=\R                  5      rW\3 " S> S?\.5      5       rXSSS@\JS#\4SA jjrY " SB SC\X5      rZ " SD SE\X5      r[ " SF SG\X5      r\\3 " SH SI\X5      5       r] " SJ SK\X\5      r^\3 " SL SM\X5      5       r_\3 " SN SO\X5      5       r`/ SPQrag)T    N)Callable)Optional   )initialization)ACT2FN)CacheDynamicCacheEncoderDecoderCacheStaticCache)GenerationConfigGenerationMixinGenerationMode)use_kernel_func_from_hubuse_kernelized_func)create_bidirectional_maskcreate_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GradientCheckpointingLayer)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsBaseModelOutputWithPoolingSeq2SeqLMOutputSeq2SeqModelOutputSequenceClassifierOutputTokenClassifierOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tupletorch_compilable_check)maybe_autocastmerge_with_config_defaults)OutputRecordercapture_outputs   )	AutoModel   )T5Gemma2ConfigT5Gemma2DecoderConfigT5Gemma2EncoderConfigT5Gemma2TextConfigc                   J   ^  \ rS rSrS	S\S\4U 4S jjjrS rS rS r	Sr
U =r$ )
T5Gemma2RMSNorm7   dimepsc                    > [         TU ]  5         X l        [        R                  " [
        R                  " U5      5      U l        g N)super__init__r5   nn	Parametertorchzerosweight)selfr4   r5   	__class__s      /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/t5gemma2/modeling_t5gemma2.pyr9   T5Gemma2RMSNorm.__init__8   s,    ll5;;s#34    c                     U[         R                  " UR                  S5      R                  SSS9U R                  -   5      -  $ )Nr*   T)keepdim)r<   rsqrtpowmeanr5   )r?   xs     rA   _normT5Gemma2RMSNorm._norm=   s4    5;;quuQx}}R}>IJJJrC   c                     U R                  UR                  5       5      nUSU R                  R                  5       -   -  nUR                  U5      $ )N      ?)rK   floatr>   type_as)r?   rJ   outputs      rA   forwardT5Gemma2RMSNorm.forward@   sC    AGGI& 3!2!2!445~~a  rC   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tupler>   shaper5   r?   s    rA   
extra_reprT5Gemma2RMSNorm.extra_reprG   s'    ))*+6$((<<rC   )r5   r>   )gư>)__name__
__module____qualname____firstlineno__intrO   r9   rK   rR   rX   __static_attributes____classcell__r@   s   @rA   r2   r2   7   s0    5C 5e 5 5
K!= =rC   r2   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )T5Gemma2MLPK   configc                   > [         TU ]  5         Xl        UR                  U l        UR                  U l        [
        R                  " U R                  U R                  SS9U l        [
        R                  " U R                  U R                  SS9U l        [
        R                  " U R                  U R                  SS9U l	        [        UR                     U l        [
        R                  " UR                  5      U l        g )NFbias)r8   r9   re   hidden_sizeintermediate_sizer:   Linear	gate_projup_proj	down_projr   hidden_activationact_fnDropoutdropout_ratedropoutr?   re   r@   s     rA   r9   T5Gemma2MLP.__init__L   s    !--!'!9!94#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXV556zz&"5"56rC   c                     U R                  U R                  U5      5      U R                  U5      -  nU R                  U5      nU R	                  U5      nU$ r7   )rp   rl   rm   rs   rn   )r?   rJ   hidden_statesrn   s       rA   rR   T5Gemma2MLP.forwardW   sH    DNN1$56aH]3NN=1	rC   )rp   re   rn   rs   rl   ri   rj   rm   )	rZ   r[   r\   r]   r0   r9   rR   r_   r`   ra   s   @rA   rc   rc   K   s    	71 	7 rC   rc   c                      ^  \ rS rSr% \R
                  \S'   SS\4U 4S jjjr\	    SS\S-  S\
S   S\S-  S	\S-  S
\S\4   4
S jj5       r\R                   " 5       \SS j5       5       rSrU =r$ )T5Gemma2RotaryEmbedding^   inv_freqNre   c                 f  > [         TU ]  5         UR                  U l        UR                  U l        Xl        [        [        UR                  5      5      U l        0 U l	        U R                   H  nU R
                  R                  U   nUc  M!  US   U R                  U'   U R                  nU R                  U   S:w  a  [        U R                  U      nU" U R
                  US9u  pgU R                  U S3USS9  U R                  U S3UR                  5       SS9  [        X S3U5        M     g )	N	rope_typedefault
layer_type	_inv_freqF
persistent_original_inv_freq_attention_scaling)r8   r9   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenre   listsetlayer_typesr~   rope_parameterscompute_default_rope_parametersr   register_bufferclonesetattr)	r?   re   devicer   rope_paramsrope_init_fncurr_inv_freqcurr_attention_scalingr@   s	           rA   r9    T5Gemma2RotaryEmbedding.__init__a   s(   "("@"@$*$B$B!F$6$6 78**J++55jAK")4[)ADNN:&%)%I%IL~~j)Y624>>*3MN4@Yc4d1M  J<y!9=UZ [  J</A!BMDWDWDYfk lDL(:;=ST +rC   r   ztorch.deviceseq_lenr   returnztorch.Tensorc           	         U R                   U   S   n[        U SS5      =(       d    U R                  U R                  -  nSnSU[        R
                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nXv4$ )	a  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
    layer_type (`str`, *optional*):
        The current layer type if the model has different RoPE parameters per type.
        Should not be used unless `config.layer_types is not None`

Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetahead_dimNrN   r   r*   dtyper   r   )	r   getattrri   num_attention_headsr<   arangeint64torO   )re   r   r   r   baser4   attention_factorr|   s           rA   r   7T5Gemma2RotaryEmbedding.compute_default_rope_parametersv   s    2 %%j1,?fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 ))rC   c                 H   [        X S35      n[        X S35      nUS S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS	9   UR                  5       UR                  5       -  R                  SS
5      n	[        R                  " X4SS9n
U
R                  5       U-  nU
R                  5       U-  nS S S 5        WR	                  UR                  S9WR	                  UR                  S94$ ! , (       d  f       N@= f)Nr   r   r   rE   r,   mpscpuF)device_typeenabledr*   r4   r   )r   rO   expandrV   r   r   
isinstancetypestrr&   	transposer<   catcossinr   )r?   rJ   position_idsr   r|   attention_scalinginv_freq_expandedposition_ids_expandedr   freqsembr   r   s                rA   rR   T5Gemma2RotaryEmbedding.forward   sd    4<y!9:#DL8J*KL$T1d]399;BB<CUCUVWCXZ\^_`ccdedldlm ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')//C'')//C	 D vvAGGv$cff177f&;;; DCs   +A.F
F!)re   r   r   r   r~   r7   )NNNN)rZ   r[   r\   r]   r<   Tensor__annotations__r0   r9   staticmethodr   r^   r   rU   rO   r   no_gradr   rR   r_   r`   ra   s   @rA   rz   rz   ^   s    llU1 U U* ,0+/"!%	!*"T)!*(!* t!* $J	!*
 
~u$	%!* !*F ]]_<  <rC   rz   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..NrE   r*   r   )rV   r<   r   )rJ   x1x2s      rA   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''rC   rotary_pos_embc                     UR                  U5      nUR                  U5      nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXV4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)	unsqueezer   )qkr   r   unsqueeze_dimq_embedk_embeds          rA   apply_rotary_pos_embr      sS    & --
&C
--
&Cw;q>C/0Gw;q>C/0GrC   rw   n_repr   c                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r,   N)rV   r   reshape)rw   r   batchnum_key_value_headsslenr   s         rA   	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTrC   modulequerykeyvalueattention_maskrs   scalingsoftcapc                 j   Uc  U R                   S-  n[        X R                  5      n	[        X0R                  5      n
[        R                  " XR                  SS5      5      U-  nUb  X-  n[        R                  " U5      nX-  nUb  X-   n[        R                  R                  US[        R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[        R                  " X5      nUR                  SS5      R                  5       nX4$ )N      r*   r   rE   )r4   r   )ptrainingr,   )r   r   num_key_value_groupsr<   matmulr   tanhr:   
functionalsoftmaxfloat32r   r   rs   r   
contiguous)r   r   r   r   r   rs   r   r   kwargs
key_statesvalue_statesattn_weightsattn_outputs                rA   eager_attention_forwardr      s    //4'3 ; ;<JU$?$?@L<<';';Aq'ABWLL#-zz,/#-!#4 ==((2U]](SVVW\WbWbcL==((6??([L,,|:K''1-88:K$$rC   c                     ^  \ rS rSrSrS\S\4U 4S jjr   SS\R                  S\R                  S	\R                  S-  S
\
S-  S\\   S\\R                  \R                  S-  \\R                     S-  4   4S jjrSrU =r$ )T5Gemma2SelfAttention   z=Multi-headed attention from 'Attention Is All You Need' paperre   	layer_idxc                   > [         TU ]  5         [        US5      (       a  UR                  U   OS U l        Xl        X l        [        USUR                  UR                  -  5      U l
        UR                  UR                  -  U l        UR                  S-  U l        U R
                  R                  U l        SU l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  U R                  -  UR                  UR&                  S9U l        U R
                  R0                  U l        U R                  S:X  a  UR2                  OS U l        U R                  S:H  U l        [7        UR                  UR8                  S9U l        [7        UR                  UR8                  S9U l        g Nr   r   r   Frg   sliding_attention)r4   r5   r8   r9   hasattrr   r   re   r   r   ri   r   r   r   r   query_pre_attn_scalarr   attention_dropout	is_causalr:   rk   attention_biasq_projk_projv_projo_projattn_logit_softcappingsliding_window
is_slidingr2   rms_norm_epsq_normk_normr?   re   r   r@   s      rA   r9   T5Gemma2SelfAttention.__init__      ;B6=;Y;Y&,,Y7_c"
F4F4F&JdJd4de$*$>$>&B\B\$\!33T9!%!>!>ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
 '+kk&H&H#7;J]7]f33cg//-@@%&//v?R?RS%&//v?R?RSrC   Nrw   position_embeddingsr   past_key_valuesr   r   c                 b   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      nU R                  U	5      n	Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [         5      nU" U UU	U
U4U R"                  (       a  U R$                  OSU R&                  U R(                  S.UD6u  pUR*                  " / UQSP76 R-                  5       nU R/                  U5      nX4$ )NrE   r,   r*           )rs   r   r   )rV   r   r   viewr   r   r   r  r  r   updater   r   get_interfacere   _attn_implementationr   r   r   r   r   r   r   r   )r?   rw   r  r   r  r   input_shapehidden_shapequery_statesr   r   r   r   attention_interfacer   r   s                   rA   rR   T5Gemma2SelfAttention.forward  s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST{{<0[[,
&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8
%
 /3mmD**LL..
%
 
%
! "));;;;FFHkk+.((rC   r   r   re   r   r   r   r  r   r   r   r   r   r  r   r   r   r   NNN)rZ   r[   r\   r]   __doc__r0   r^   r9   r<   r   r   r!   r"   rU   rR   r_   r`   ra   s   @rA   r   r      s    GT1 Tc TB -1.2(,*)||*) #\\*) t+	*)
 *) +,*) 
u||U\\D0%2E2LL	M*) *)rC   r   c                   >  ^  \ rS rSrSrS\S\4U 4S jjr SS\R                  S\
\R                  \R                  4   S	\R                  S-  S
\R                  S\S-  S\\   S\
\R                  \R                  S-  \
\R                     S-  4   4S jjrSrU =r$ )T5Gemma2MergedAttentioniK  z6Merged self-attention and cross-attention for decoder.re   r   c                   > [         TU ]  5         [        US5      (       a  UR                  U   OS U l        Xl        X l        [        USUR                  UR                  -  5      U l
        UR                  UR                  -  U l        UR                  S-  U l        U R
                  R                  U l        SU l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  UR                  U R                  -  UR&                  S9U l        ["        R$                  " UR                  U R                  -  UR                  UR&                  S9U l        U R
                  R0                  U l        U R                  S:X  a  UR2                  OS U l        U R                  S:H  U l        [7        UR                  UR8                  S9U l        [7        UR                  UR8                  S9U l        g r   r   r  s      rA   r9    T5Gemma2MergedAttention.__init__O  r  rC   Nrw   r  merged_attention_maskencoder_hidden_statesr  r   r   c                    UR                   S S n/ UQSPU R                  P7nUR                   S S n	/ U	QSPU R                  P7n
U R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      nU R                  U5      nUu  p[        XX5      u  pUb[  UR                  nUR                  XU R                  5      u  pUR                  R                  U R                  5      nUR                  nUb  W(       d  U R                  U5      R                  U
5      R	                  SS5      nU R                  U5      R                  U
5      R	                  SS5      nU R                  U5      nUb9  WR                  UUU R                  5      u  nnSUR                  U R                  '   OFWR                   U R                     R"                  nUR                   U R                     R$                  nUnU	S   n[&        R(                  " UU/SS9n[&        R(                  " UU/SS9n[*        R,                  " U R.                  R0                  [2        5      nU" U UUUU4U R4                  (       a  U R6                  OSU R8                  S.UD6u  nnUR:                  " / UQSP76 R=                  5       nU R?                  U5      nUb  USS U* 24   nUSU* S 24   nOS	u  nnUUU4$ )
NrE   r,   r*   Tr   r
  )rs   r   .)NN) rV   r   r   r  r   r   r   r  r  r   self_attention_cacher  r   
is_updatedgetcross_attention_cachelayerskeysvaluesr<   r   r   r  re   r  r   r   r   r   r   r   r   )r?   rw   r  r  r  r  r   r  r  cross_input_shapecross_hidden_shaper  r   r   r   r   r  r  r!  cross_key_statescross_value_statescross_key_sizer  r   r   self_attn_weightscross_attn_weightss                              rA   rR   T5Gemma2MergedAttention.forwardm  sh    $))#2.88b8$--8177<D0D"DdmmD {{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST{{<0[[,
&#7RU#[ &#2#G#G ';'B'B:]a]k]k'l$J )3377GJ$3$I$I!"*#{{+@AFFGYZddefhij!%-B!C!H!HI[!\!f!fghjk!l#{{+;<*7L7S7S$&8$..84 "4 >B**4>>:4;;DNNKPP!6!=!=dnn!M!T!T $*1-YY
,<=1E
yy,0B!CK(?(M(MKK,,.E)
 %8!	%
 /3mmD**LL	%
 	%
!\ "));;;;FFHkk+. # ,S2BN?2B-B C!-cN?3C.C!D4>11-/AAArC   r  r7   )rZ   r[   r\   r]   r  r0   r^   r9   r<   r   rU   r
   r!   r   rR   r_   r`   ra   s   @rA   r  r  K  s    @T1 Tc TN 7;TB ||TB #5<<#=>	TB
  %||d2TB  %||TB -t3TB -.TB 
u||U\\D0%2E2LL	MTB TBrC   r  c                      ^  \ rS rSrSrS\4U 4S jjr   SS\R                  S\	\R                  \R                  4   S-  S\R                  S-  S	\R                  S-  S
\	\R                  4   4
S jjrSrU =r$ )T5Gemma2EncoderLayeri  zEncoder sub-layer.r   c                 $  > [         TU ]  5         UR                  U l        Xl        X l        UR
                  U   U l        [        UUS9U l        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        [        U5      U l        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        ["        R$                  " UR&                  5      U l        g N)re   r   r5   )r8   r9   ri   re   r   r   attention_typer   	self_attnr2   r  pre_self_attn_layernormpost_self_attn_layernormrc   mlppre_feedforward_layernormpost_feedforward_layernormr:   rq   rr   rs   r  s      rA   r9   T5Gemma2EncoderLayer.__init__  s    !--"$00;.
 (7v7I7IvObOb'c$(78J8JPVPcPc(d%v&)89K9KQWQdQd)e&*9&:L:LRXReRe*f'zz&"5"56rC   Nrw   r  r   r   r   c           	      8   UnU R                  U5      nU R                  " SUUUUS S.UD6u  pU R                  U5      nX`R                  U5      -   nUnU R	                  U5      nU R                  U5      nU R                  U5      nX`R                  U5      -   nU$ )N)rw   r  r   r   r   r4  r3  r5  rs   r7  r6  r8  )r?   rw   r  r   r   r   residual_s           rA   rR   T5Gemma2EncoderLayer.forward  s     !44]C>> 
' 3)% 
 
 55mD <<#>> 66}E/77F <<#>>rC   r2  re   rs   ri   r   r6  r8  r5  r7  r4  r3  r  )rZ   r[   r\   r]   r  r^   r9   r<   r   rU   
LongTensorFloatTensorrR   r_   r`   ra   s   @rA   r.  r.    s    7# 7. IM.204|| #5<<#=>E t+	
 &&- 
u  !	" rC   r.  c                      ^  \ rS rSrSrS\4U 4S jjr     SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\R                  S-  S
\S-  S\S-  S\R                  S-  S\R                  4S jjrSrU =r$ )T5Gemma2DecoderLayeri  zFDecoder sub-layer: merged attention instead of vanilla self-attention.r   c                 $  > [         TU ]  5         UR                  U l        Xl        X l        UR
                  U   U l        [        UUS9U l        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        [        U5      U l        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        ["        R$                  " UR&                  5      U l        g r0  )r8   r9   ri   re   r   r   r2  r  r3  r2   r  r4  r5  rc   r6  r7  r8  r:   rq   rr   rs   r  s      rA   r9   T5Gemma2DecoderLayer.__init__  s    !--"$00; 1
 (7v7I7IvObOb'c$(78J8JPVPcPc(d%v&)89K9KQWQdQd)e&*9&:L:LRXReRe*f'zz&"5"56rC   Nrw   r  r  r   r  	use_cacher  r   c                 @   Un	U R                  U5      nU R                  " SUUUUUUUS.UD6u  n  n
U R                  U5      nXR                  U5      -   nUn	U R	                  U5      nU R                  U5      nU R                  U5      nXR                  U5      -   nU$ )N)rw   r  r  r   r  rG  r  r;  r<  )r?   rw   r  r  r   r  rG  r  r   r=  r>  s              rA   rR   T5Gemma2DecoderLayer.forward  s     !44]C"nn 	
' 3"7%+"7	
 	
q! 55mD <<#>> 66}E/77F <<#>>rC   r@  )NNNFN)rZ   r[   r\   r]   r  r^   r9   r<   r   rU   rA  r
   boolrB  rR   r_   r`   ra   s   @rA   rD  rD    s    P7# 72 6:046:!&59 ||  #5<<#=>   %||d2	 
 &&-  -t3  $;   %||d2  
		   rC   rD  c                   z   ^  \ rS rSrSrSS\S\S\4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )T5Gemma2LMHeadi3  z.Head for language modeling (generation) tasks.ri   
vocab_sizerh   c                 V   > [         TU ]  5         [        R                  " XUS9U l        g )Nrg   )r8   r9   r:   rk   out_proj)r?   ri   rM  rh   r@   s       rA   r9   T5Gemma2LMHead.__init__6  s     		+ErC   rw   r   c                 (    U R                  U5      nU$ r7   rO  )r?   rw   logitss      rA   rR   T5Gemma2LMHead.forward:  s    }-rC   rR  )F)rZ   r[   r\   r]   r  r^   rJ  r9   r<   r   rR   r_   r`   ra   s   @rA   rL  rL  3  sJ    8FC FS F F FU\\ ell  rC   rL  c                   z   ^  \ rS rSrSrSS\S\S\4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )T5Gemma2ClassificationHeadi?  z-Head for sentence-level classification tasks.ri   
num_labelsclassifier_dropout_ratec                    > [         TU ]  5         [        R                  " US9U l        [        R
                  " X5      U l        g )N)r   )r8   r9   r:   rq   rs   rk   rO  )r?   ri   rW  rX  r@   s       rA   r9   #T5Gemma2ClassificationHead.__init__B  s/    zz$;<		+:rC   rw   r   c                 J    U R                  U5      nU R                  U5      nU$ r7   rs   rO  )r?   rw   s     rA   rR   "T5Gemma2ClassificationHead.forwardG  s$    ]3m4rC   r\  )r
  rZ   r[   r\   r]   r  r^   rO   r9   r<   r   rR   r_   r`   ra   s   @rA   rV  rV  ?  sF    7;C ;S ;SX ; ;
U\\ ell  rC   rV  c                   R   ^  \ rS rSrS\4U 4S jjrS\R                  4S jrSr	U =r
$ )T5Gemma2MultiModalProjectoriM  re   c                   > [         TU ]  5         [        R                  " [        R
                  " UR                  R                  UR                  R                  5      5      U l	        [        UR                  R                  UR                  R                  S9U l        [        UR                  R                  UR                  R                  -  5      U l        [        UR"                  S-  5      U l        U R                   U R$                  -  U l        [        R(                  " U R&                  U R&                  S9U l        g )Nr1        ?)kernel_sizestride)r8   r9   r:   r;   r<   r=   vision_configri   text_configmm_input_projection_weightr2   layer_norm_epsmm_soft_emb_normr^   
image_size
patch_sizepatches_per_imagemm_tokens_per_imagetokens_per_siderc  	AvgPool2davg_poolrt   s     rA   r9   $T5Gemma2MultiModalProjector.__init__N  s    *,,,KK,,88&:L:L:X:XY+
' !0  ,,&2F2F2U2U!
 "%V%9%9%D%DH\H\HgHg%g!h"6#=#=s#BC11T5I5II1A1A$JZJZ[rC   vision_outputsc                    UR                   u  p#nUR                  SS5      nUR                  X$U R                  U R                  5      nUR	                  5       nU R                  U5      nUR                  S5      nUR                  SS5      nU R                  U5      n[        R                  " XpR                  5      nUR                  U5      $ )Nr,   r*   )rV   r   r   rl  r   rp  flattenri  r<   r   rg  rP   )	r?   rr  
batch_sizer>  ri   reshaped_vision_outputspooled_vision_outputsnormed_vision_outputsprojected_vision_outputss	            rA   rR   #T5Gemma2MultiModalProjector.forward^  s    %3%9%9"
{"0":":1a"@"9"A"AT%;%;T=S=S#
 #:"D"D"F $.E F 5 = =a @ 5 ? ?1 E $ 5 56K L#(<<0EGfGf#g '//??rC   )rp  rc  rg  ri  rl  rn  )rZ   r[   r\   r]   r/   r9   r<   r   rR   r_   r`   ra   s   @rA   r`  r`  M  s*    \4 \ @ell @ @rC   r`  c                   t   ^  \ rS rSrSr  SS\S\S\S\S\4
U 4S jjjrS	\R                  4U 4S
 jjr
SrU =r$ )T5Gemma2TextScaledWordEmbeddingiq  zCT5Gemma2 Embedding: override to add eoi token embedding separately.num_embeddingsembedding_dimpadding_idxembed_scaleeoi_token_indexc                    > [         TU ]  XU5        X@l        U R                  S[        R
                  " U5      SS9  XPl        [        R                  " [        R                  " U R                  5      5      U l        g )Nr  Fr   )r8   r9   scalar_embed_scaler   r<   tensorr  r:   r;   r=   r~  eoi_embedding)r?   r}  r~  r  r  r  r@   s         rA   r9   (T5Gemma2TextScaledWordEmbedding.__init__t  s_     	D"-]ELL,ERWX.\\%++d6H6H*IJrC   	input_idsc                    > [         TU ]  U5      U R                  R                  U R                  R
                  5      -  nU R                  R                  UR
                  5      X!U R                  :H  '   U$ r7   )r8   rR   r  r   r>   r   r  r  )r?   r  input_embeddingsr@   s      rA   rR   'T5Gemma2TextScaledWordEmbedding.forward  sd     7?958H8H8K8KDKKL]L]8^^>B>P>P>S>STdTjTj>kd&:&::;rC   )r  r  r  )rN     r^  ra   s   @rA   r|  r|  q  sd    M !&KK K 	K
 K K K     rC   r|  c            	          ^  \ rS rSr% \\S'   SrSr/ SQrS/r	Sr
SrSrSrSr\\/\" \SS	S
9\" \SS	S
9\" \SSS
9/S.rSr\R.                  " 5       U 4S j5       rS\R2                  4S jrSrU =r$ )T5Gemma2PreTrainedModeli  re   modelT)r.  rD  SiglipVisionEmbeddingsSiglipEncoderLayer#SiglipMultiheadAttentionPoolingHeadr  Fr,   r3  )index
layer_namer*   
cross_attn)rw   
attentions)imagetextc                   > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g [        U[        5      (       aL  [        R
                  " UR                  5        [        R                  " UR                  UR                  5        g [        U[        5      (       a  UR                  R                  R                  S   S-  n[        R                   " UR                  R                  SU R"                  R$                  U-  S9  ['        UR                  S5      (       aC  UR                  R(                  b+  [        R
                  " UR                  R(                  5        g g g SUR*                  R,                  ;   a!  [        R
                  " UR                  5        g [        U[.        5      (       a  UR0                   H  nUR2                  nUR4                  U   S:w  a  [6        UR4                  U      nU" UR"                  US9u  pV[        R8                  " [;        X S	35      U5        [        R8                  " [;        X S
35      U5        M     g g )Nr   r   r
  )rI   stdrh   RMSNormr   r   r   r   )r8   _init_weightsr   r`  initzeros_rg  r|  r  	constant_r  r  rV  rO  r>   rV   normal_re   initializer_ranger   rh   r@   rZ   rz   r   r   r~   r   copy_r   )r?   r   scaler   r   r   r>  r@   s          rA   r  %T5Gemma2PreTrainedModel._init_weights  s   f%f9::KK99: ?@@KK,,-NN6--v/H/HI :;;OO**003t;ELL//ct{{?\?\_d?dev//FOO4H4H4TFOO001 5U/ &**333KK& 788$00
%EE##J/9<#6v7G7G
7S#TL#/*#U 

76\+CDmT

76\9K+LM}] 1 9rC   labelsc                 >   U R                   R                  nUR                  nUR                  nUc  [	        S5      eUR                  UR                  5      nUSSS24   R                  5       USSS24'   X5S'   Uc  [	        S5      eUR                  US:H  U5        U$ )	z
Shifts input_ids to the right, prepends the decoder_start_token_id, and handles
pad_token_id replacement for labels that were -100.
This is a common preparation step for decoder inputs in sequence-to-sequence models.
Nz:self.model.config.decoder.bos_token_id has to be defined. .rE   r,   ).r   z9self.model.config.decoder.pad_token_id has to be defined.i)	re   decoderbos_token_idpad_token_id
ValueError	new_zerosrV   r   masked_fill_)r?   r  decoder_configdecoder_start_token_idr  shifted_input_idss         rA   %prepare_decoder_input_ids_from_labels=T5Gemma2PreTrainedModel.prepare_decoder_input_ids_from_labels  s     ,,!/!<!<%22!)YZZ #,,V\\:%+C"H%5%;%;%=#qr'"$:&!XYY 	&&'8D'@,O  rC   r;  )rZ   r[   r\   r]   r-   r   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr.  rD  r(   r   r  _can_record_outputsinput_modalitiesr<   r   r  r   r  r_   r`   ra   s   @rA   r  r    s    &*# $5"5 !N!"&.0DE0kR2!T2!U
 )
]]_^ ^0!ELL ! !rC   r  r   c           
      X   ^ ^ S[         S[         S[         S[         S[        4
UU 4S jjnU$ )zD
This creates uni/bidirectional attention mask with sliding window.
	batch_idxhead_idxq_idxkv_idxr   c                 t   > T	(       a  T
SpTOT
S-   S-  T
S-  S-   pTX#-
  nUS:  Xd:  -  nUS:  U* U:  -  nXx-  $ )Nr   r,   r*   r;  )r  r  r  r  left_window_sizeright_window_sizedist	left_mask
right_maskr   r   s            rA   
inner_mask0sliding_window_mask_function.<locals>.inner_mask  sc    2@!/4BQ4F13L~bcNcfgNg/~QY4#:;	QhD5+<#<=
%%rC   )r^   rJ  )r   r   r  s   `` rA   sliding_window_mask_functionr    s8    
	&c 	&S 	& 	&c 	&d 	& 	& rC   c                   *  ^  \ rS rSr% \\S'   \\S.r SS\S\	4U 4S jjjr
\\\     SS\R                  S-  S\R                   S-  S	\R                  S-  S
\R"                  S-  S\R                   S-  S\\   S\4S jj5       5       5       rSrU =r$ )T5Gemma2TextEncoderi  re   )r  rw   r  c           	      Z  > [         TU ]  U5        UR                  U l        UR                  U l        [        UR                  UR                  U R                  UR                  S-  US9U l        [        UR                  UR                  S9U l
        SU l        [        R                  " [        UR                  5       Vs/ s H  n[!        X5      PM     sn5      U l        [        R$                  " UR&                  5      U l        [+        U5      U l        U R/                  5         g s  snf Nrb  )r  r  r1  F)r8   r9   r  r  rM  r|  ri   embed_tokensr2   r  normgradient_checkpointingr:   
ModuleListrangenum_hidden_layersr.  r"  rq   rr   rs   rz   
rotary_emb	post_initr?   re   r  r   r@   s       rA   r9   T5Gemma2TextEncoder.__init__  s    
 	 !.. ++;**C/+
 $F$6$6F<O<OP	&+#mmFKFLdLdFefFe!&4Fef
 zz&"5"561&9 	 g   D(Nr  r   r   inputs_embedstoken_type_idsr   r   c                    US L US L-  (       a  [        S5      eUR                  SS 5        Uc  U R                  U5      nUc<  [        R                  " SUR
                  S   UR                  S9R                  S5      n[        U=n[        5      (       dG  U R                  UUS.n[        S0 UD6[        S0 UDS[        U R                  R                  SS	90D6S
.nUn	0 n
[        U R                  R                  5       H  nU R!                  XU5      X'   M     U R#                  U	5      n	[%        U R&                  S U R                  R(                   5       HC  u  pU" U	XR                  R                  U      XpR                  R                  U      U40 UD6n	ME     U R+                  U	5      n	U R#                  U	5      n	[-        U	S9$ )N:You must specify exactly one of input_ids or inputs_embedsr  r   r,   r   )re   r  r   and_mask_functionF)r   full_attentionr   )last_hidden_stater;  )r  popr  r<   r   rV   r   r   r   dictre   r   r  r   r   r   r  rs   	enumerater"  r  r  r   )r?   r  r   r   r  r  r   self_attn_mask_mappingmask_kwargsrw   r  r   ilayer_modules                 rA   rR   T5Gemma2TextEncoder.forward  s    -t";<YZZ 	

$d+  --i8M <<=+>+>q+A-J^J^_iijklLNB0DII++!."0K #<"Jk"J%> &!&&B4;;C]C]in&o&&" & !dkk556J.2oom[e.f+ 7 ]3(5Tt{{7T7T)UVOA(#KK$;$;A$>?&{{'>'>q'AB	
 M  W 		-0]3+
 	
rC   rs   r  r  r"  r  r  r  rM  r  )NNNNN)rZ   r[   r\   r]   r0   r   r   r.  r  r^   r9   r'   r)   r#   r<   rA  r   rB  r!   r"   r   rR   r_   r`   ra   s   @rA   r  r    s    +-  '"  8   .2.20426.2<
##d*<
 t+<
 &&-	<

 ((4/<
 t+<
 +,<
 
<
    <
rC   r  c                     ^  \ rS rSr% \\S'    SS\S\4U 4S jjjrS rS r	\
\S\R                  S\\   S	\\-  4S
 j5       5       rS\R&                  S-  S\R(                  S-  S\R(                  4S jr\      SS\R&                  S-  S\R                  S-  S\R&                  S-  S\R(                  S-  S\R(                  S-  S\R                  S-  S\\   S	\4S jj5       rSrU =r$ )T5Gemma2EncoderiW  re   r  c                    > [         TU ]  U5        [        R                  UR                  US9U l        [        R                  " UR                  S9U l	        [        U5      U l        U R                  5         g )N)r  re   )r8   r9   r  _from_configrf  
text_modelr+   from_configre  vision_towerr`  multi_modal_projectorr  )r?   re   r  r@   s      rA   r9   T5Gemma2Encoder.__init__Z  sb    
 	 -::6;M;M_n:o%119M9MN%@%H" 	rC   c                 6    U R                   R                  5       $ r7   )r  get_input_embeddingsrW   s    rA   r  $T5Gemma2Encoder.get_input_embeddingsh  s    3355rC   c                 8    U R                   R                  U5      $ r7   )r  set_input_embeddingsr?   new_embeddingss     rA   r  $T5Gemma2Encoder.set_input_embeddingsk  s    33NCCrC   pixel_valuesr   r   c                 v    U R                   " SUSS.UD6nUR                  nU R                  U5      nXSl        U$ )NT)r  return_dictr;  )r  r  r  pooler_output)r?   r  r   rr  r  image_featuress         rA   get_image_features"T5Gemma2Encoder.get_image_featuresn  sI     **aRVaZ`a*<<334EF'5$rC   r  Nr  r  c           	      >   U R                   R                  nUcd  Uc  [        S5      eX R                  5       " [        R
                  " U[        R                  UR                  S95      :H  nUR                  S5      nOX:H  nUR                  5       nUR                  S5      R                  U5      R                  UR                  5      nUR                  S   UR                  S   -  n[        X%   R                  5       UR                  5       :H  SU SU 35        U$ )z
Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
equal to the length of multimodal features. If the lengths are different, an error is raised.
z9Either `input_ids` or `inputs_embeds` has to be provided.)r   r   rE   r   r,   z6Image features and image tokens do not match: tokens: z, features )re   image_token_idr  r  r<   r  longr   allsumr   	expand_asr   rV   r%   numel)r?   r  r  r  r  special_image_maskn_image_tokensn_image_featuress           rA   get_image_placeholder_mask*T5Gemma2Encoder.get_image_placeholder_mask|  s    33$ !\]]!.2K2K2M^5::mFZFZ[3 " "4!7!7!;!*!<+//1/99"=GGVYYZgZnZno)//2^5I5I!5LL-3359M9M9OOD^DTT_`p_qr	
 "!rC   r   r   r  c                 h   US L US L-  (       a  [        S5      eUc  U R                  R                  U5      nUba  U R                  USS9R                  nUR                  UR                  UR                  5      nU R                  XUS9n	UR                  X5      nU R                  " SUUUS.UD6n
U
$ )Nr  T)r   )r  r  )r  r   r   r;  )
r  r  r  r  r  r   r   r   r  masked_scatter)r?   r  r   r   r  r  r  r   r  
image_maskoutputss              rA   rR   T5Gemma2Encoder.forward  s     -t";<YZZ  OO88CM#!44\t4TbbN+..}/C/C]EXEXYN88~ 9 J *88TM// 
')%
 	
 rC   )r  r  r  r  )NNNNNN)rZ   r[   r\   r]   r/   r   r^   r9   r  r  r$   r#   r<   r   r!   r"   rU   r   r  rA  rB  r  r   rR   r_   r`   ra   s   @rA   r  r  W  sp   !!
  '%  6D 
!LL
4:;M4N
	+	+
  
"##d*" ((4/" ))	"<  .2.2042615.2!##d*! t+! &&-	!
 ((4/! ''$.! t+! +,! 
! !rC   r  c                   v  ^  \ rS rSr% \\S'   \" \SS9\" \SS9\S.r	SS\S\
4U 4S jjjr\\\        SS
\R                   S	-  S\R"                  S	-  S\R                   S	-  S\S	-  S\R&                  S	-  S\S	-  S\R"                  S	-  S\R"                  S	-  S\\   S\4S jj5       5       5       rSrU =r$ )T5Gemma2Decoderi  re   r,   )r  r*   )r  cross_attentionsrw   r  c           	      Z  > [         TU ]  U5        UR                  U l        UR                  U l        [        UR                  UR                  UR                  UR                  S-  US9U l        [        UR                  UR                  S9U l
        SU l        [        R                  " [        UR                  5       Vs/ s H  n[!        X5      PM     sn5      U l        [        R$                  " UR&                  5      U l        [+        U5      U l        U R/                  5         g s  snf r  )r8   r9   r  r  rM  r|  ri   r  r2   r  r  r  r:   r  r  r  rD  r"  rq   rr   rs   rz   r  r  r  s       rA   r9   T5Gemma2Decoder.__init__  s     !.. ++;**C/+
 $F$6$6F<O<OP	&+#mmFKFLdLdFefFe!&4Fef
 zz&"5"561&9	 gr  Nr  r   r   r  r  rG  r  encoder_attention_maskr   r   c	           
         US L US L-  (       a  [        S5      eUc  [        S5      eUc  U R                  U5      nU R                  (       d/  U(       a(  Uc%  [        [	        U R
                  S9[	        5       5      nUcU  Ub  UR                  5       OSn
[        R                  " UR                  S   UR                  S9U
-   nUR                  S5      n[        U=n[        5      (       d<  S nU R
                  UUUb  UR                  OS UUS.n[        S0 UD6[!        S0 UD6S	.n[        U=n[        5      (       d  S
[#        U R
                  UUUWS90n[        R$                  " US
   US
   /SS9[        R$                  " US   US
   /SS9S	.nUn0 n['        U R
                  R(                  5       H  nU R+                  UUU5      UU'   M     U R-                  U5      n[/        U R0                  S U R
                  R2                   5       HH  u  nnU" UUU R
                  R(                  U      XR
                  R(                  U      UUUU40 U	D6nMJ     U R5                  U5      nU R-                  U5      n[7        UUS9$ )Nr  z0`encoder_hidden_states` must be given in decoderr  r   r,   r  c                  H    [         R                  " S[         R                  S9$ )NTr   )r<   r  rJ  )argss    rA   <lambda>)T5Gemma2Decoder.forward.<locals>.<lambda>  s    ELLUZZ4XrC   )re   r  r   r  r   r  r  r  )re   r  r   r  r  rE   r   r   )r  r  r;  )r  r  r   r
   r	   re   get_seq_lengthr<   r   rV   r   r   r   r  r  r   r   r   r   r   r   r  rs   r  r"  r  r  r   )r?   r  r   r   r  r  rG  r  r  r   past_seen_tokensr  dummy_and_mask_functionr  cross_attn_mask_mappingmerged_attn_mask_mappingrw   r  r   r  r  s                        rA   rR   T5Gemma2Decoder.forward  s    -t";<YZZ (OPP  --i8M}}/F1,dkk2RT`TbcOCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4LNB0DII 'Y#++!."0KZKf?#G#Glp ,%<K #5"C{"C%F%U%U&"
 5KK1TRR ";;;"/#9*?&=#'# $ii'(89;RSc;dekm "''(;<>UVf>ghnp"	$
  & !dkk556J.2oom\[e.f
+ 7 ]3(5Tt{{7T7T)UVOA|(#DKK$;$;A$>?()@)@)CD%	 	M  W 		-0]38++
 	
rC   r  r  )NNNNNNNN)rZ   r[   r\   r]   r.   r   r(   r  rD  r  r^   r9   r'   r)   r#   r<   rA  r   r
   rB  rJ  r!   r"   r   rR   r_   r`   ra   s   @rA   r  r    s:   !!$%<AF*+B!L-4 s  ,   .2.2046:26!%596:]
##d*]
 t+]
 &&-	]

 -t3]
 ((4/]
 $;]
  %||d2]
 !&t 3]
 +,]
 
3]
    ]
rC   r  c                     ^  \ rS rSrSSS.rS\4U 4S jjrS rS rS	 r	S
 r
\\            SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                   S-  S\R                  S-  S\S-  S\S-  S\R&                  S-  S\R&                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ )T5Gemma2Modeli@  z&encoder.text_model.embed_tokens.weightz-encoder.text_model.embed_tokens.eoi_embedding)zdecoder.embed_tokens.weightz"decoder.embed_tokens.eoi_embeddingre   c                    > [         TU ]  U5        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        U R                  5         g r7   )r8   r9   r  encoderr  r  r  r  rt   s     rA   r9   T5Gemma2Model.__init__G  sL      'v~~v7M7MN&v~~v7M7MNrC   c                     U R                   $ r7   )r*  rW   s    rA   get_encoderT5Gemma2Model.get_encoderP      ||rC   c                     U R                   $ r7   r  rW   s    rA   get_decoderT5Gemma2Model.get_decoderS  r/  rC   c                 6    U R                   R                  5       $ r7   )r*  r  rW   s    rA   r  "T5Gemma2Model.get_input_embeddingsV  s    ||0022rC   c                 8    U R                   R                  U5      $ r7   )r*  r  r  s     rA   r  "T5Gemma2Model.set_input_embeddingsY  s    ||00@@rC   Nr  r  r   r   decoder_input_idsdecoder_attention_maskdecoder_position_idsencoder_outputsr  r  decoder_inputs_embedsrG  r   r   c                 J   Uc  U R                   " SUUUU
USS.UD6nUR                  nU R                  " SUUUUU	UUUSS.	UD6n[        UR                  UR                  UR
                  UR                  UR                  UR                  UR
                  UR                  S9$ )a8  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
T)r  r   r   r  r  r   )	r  r   r   r  r  r  r  rG  r   )r  r  decoder_hidden_statesdecoder_attentionsr  encoder_last_hidden_stater  encoder_attentionsr;  )r*  r  r  r   r  rw   r  r  )r?   r  r  r   r   r8  r9  r:  r;  r  r  r<  rG  r   r  decoder_outputss                   rA   rR   T5Gemma2Model.forward\  s    6 ""ll #-)+)  O !0 A A ,, 
'1-/+"7#1
 
 "-??+;;"1"?"?.99,==&5&G&G"1"?"?.99	
 		
rC   )r  r*  )NNNNNNNNNNNN)rZ   r[   r\   r]   _tied_weights_keysr-   r9   r-  r2  r  r  r$   r#   r<   rA  rB  
BoolTensorr   r
   r   rJ  r!   r"   r   rR   r_   r`   ra   s   @rA   r(  r(  @  sv    (P.]
~ 3A  .215370459:>8<266:-159!%!=
 ##d*=
 ''$.	=

 ))D0=
 &&-=
 !++d2=
 !& 0 04 7=
 $..5=
 )4/=
 -t3=
 ||d*=
  %||d2=
  $;!=
" +,#=
$ 
%=
  =
rC   r(  c            $         ^  \ rS rSrSS0rSS0rSS/S/40rS\4U 4S	 jjrS
 r	S r
S rS rS rS r\\S\R$                  S\\   S\\-  4S j5       5       r\S 5       r\\              S+S\R4                  S-  S\R6                  S-  S\R6                  S-  S\R4                  S-  S\R4                  S-  S\R8                  S-  S\R4                  S-  S\S-  S\S-  S\R6                  S-  S\R6                  S-  S \R4                  S-  S!\S-  S"\ \R$                  -  S\\   S\\R6                     \!-  4 S# jj5       5       r"S$\#S%\$S&\%S'\ S(\ S\4U 4S) jjr&S*r'U =r($ ), T5Gemma2ForConditionalGenerationi  zlm_head.out_proj.weightz,model.encoder.text_model.embed_tokens.weightzlm_head.out_projcolwise_gather_outputrw   rS  re   c                   > [         TU ]  U5        [        U5      U l        UR                  R
                  U l        [        UR                  R                  U R
                  5      U l        SU l	        U R                  5         g )NForMaskedLM)r8   r9   r(  r  r  rM  rL  ri   lm_head	loss_typer  rt   s     rA   r9   )T5Gemma2ForConditionalGeneration.__init__  sZ     "6*
 ..33%fnn&@&@$//R&rC   c                 $    XR                   l        g r7   rK  rO  r  s     rA   set_output_embeddings6T5Gemma2ForConditionalGeneration.set_output_embeddings  s     .rC   c                 .    U R                   R                  $ r7   rO  rW   s    rA   get_output_embeddings6T5Gemma2ForConditionalGeneration.get_output_embeddings  s    ||$$$rC   c                 6    U R                   R                  5       $ r7   r  r  rW   s    rA   r  5T5Gemma2ForConditionalGeneration.get_input_embeddings      zz..00rC   c                 :    U R                   R                  U5        g r7   r  r  r?   r   s     rA   r  5T5Gemma2ForConditionalGeneration.set_input_embeddings      

''.rC   c                 6    U R                   R                  5       $ r7   )r  r-  rW   s    rA   r-  ,T5Gemma2ForConditionalGeneration.get_encoder      zz%%''rC   c                 6    U R                   R                  5       $ r7   )r  r2  rW   s    rA   r2  ,T5Gemma2ForConditionalGeneration.get_decoder  r`  rC   r  r   r   c                 D    U R                  5       R                  " U40 UD6$ r7   )r-  r  )r?   r  r   s      rA   r  3T5Gemma2ForConditionalGeneration.get_image_features  s#    
 !44\LVLLrC   c                 6    U R                  5       R                  $ r7   )r-  r  rW   s    rA   r  -T5Gemma2ForConditionalGeneration.vision_tower  s    !...rC   Nr  r   r   r8  r9  r:  r;  r  r  r<  r  rG  logits_to_keepc                    Ub  Uc  Uc  U R                  U5      nU R                  " SUUUUUUUUU	U
UUS.UD6nUR                  n[        U[        5      (       a  [        U* S5      OUnU R                  USS2USS24   5      nU R                  R                  nUR                  b4  UUR                  -  n[        R                  " U5      nUUR                  -  nSnUb  U R                  " UXR                  40 UD6n[        UUUR                  UR                   UR"                  UR$                  UR&                  UR(                  UR*                  S9	$ )a  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
N)r  r  r   r   r8  r9  r:  r;  r  r  r<  rG  )	lossrS  r  r>  r?  r  r@  r  rA  r;  )r  r  r  r   r^   slicerK  re   r  final_logit_softcappingr<   r   loss_functionrM  r   r  r>  r?  r  r@  r  rA  )r?   r  r  r   r   r8  r9  r:  r;  r  r  r<  r  rG  rg  r   rB  rw   slice_indicesrS  r  ri  s                         rA   rR   (T5Gemma2ForConditionalGeneration.forward  so   B "3";@U@] $ J J6 R.2jj /
%)%/#9!5++'"7/
 /
  (998B>SV8W8W~ot4]kmA}a,?@A,,11=nDDDFZZ'FnDDDF%%ffooPPD+;;"1"G"G.AA,==&5&O&O"1"G"G.AA

 
	
rC   generation_configmodel_kwargsgeneration_moderu  max_cache_lengthc           
        > [         TU ]  UUUUU5        UR                  SL a  gUR                  nUc  SnOSUR                  ;   n[        R
                  " U R                  R                  SS95      nSUl        S/UR                  -  Ul
        UUS.n	UR                  S5      n
U
b  [        U
[        5      (       d  [        S	5      e[        U
R                   5      S
:  a!  U
R                   R                  S
5      (       a  g[#        U
R$                  5      nU[&        :X  a  US   S
   R(                  S   U	S'   U" S0 U	D6U
l        O:[        [+        S0 U R                  R                  SS9US.D6[+        5       5      US'   [-        U S5      (       aC  U R.                  b5  [        U R.                  [        5      (       d  [        S5      eUS   U l        ggg)zMOverride cache preparation to support T5Gemma2-specific EncoderDecoder Cache.FN	offloadedTr1  r  )re   
offloadingr  zaThe `past_key_values` in `model_kwargs` must be of type `EncoderDecoderCache` for T5Gemma2 model.r   r;  r,   max_cache_len_cachezLThe internal cache must be of type `EncoderDecoderCache` for T5Gemma2 model.r;  )r8   _prepare_cache_for_generationrG  cache_implementationcopydeepcopyre   get_text_configr   r  r   r   r   r
   r  lenr  r   r!  r   rV   r	   r   rw  )r?   ro  rp  rq  ru  rr  ry  offload_cachecross_attn_configcross_attn_cache_kwargsr  cross_attn_clsr@   s               rA   rx  >T5Gemma2ForConditionalGeneration._prepare_cache_for_generation  s    	-	
 &&%/0EE'!M'+<+Q+QQM !MM$++*E*Ed*E*ST ,0()9(:=N=`=`(`% ('#

 '**+<=&o/BCC w 
 ?--.27Q7Q7U7UVW7X7X!/"G"GHN,;GHY;Z[\;];c;cde;f'84B4]E\4]O1 /B "&++"="=d"="K&3 /L*+ 4""t{{'>dkk+>?? !opp&'89DK	 (?"rC   )rw  rK  rL  r  rM  )NNNNNNNNNNNNNr   ))rZ   r[   r\   r]   rD  _tp_plan_pp_planr-   r9   rP  rS  r  r  r-  r2  r$   r#   r<   r   r!   r"   rU   r   r  propertyr  rA  rB  rE  r   r
   rJ  r^   r   rR   r   r  r   rx  r_   r`   ra   s   @rA   rG  rG    s   !#Q #$;<H"o%6
$CDH~ /%1/(( M!LLM4:;M4NM	+	+M  M
 / /  .215370459:>8<266:26:>*.!%-.%M
 ##d*M
 ''$.	M

 ))D0M
 &&-M
 !++d2M
 !& 0 04 7M
 $..5M
 )4/M
 -t3M
 ((4/M
  %0047M
    4'!M
" $;#M
$ ell*%M
& +,'M
( 
u  	!O	3)M
  M
^I:+I: I: (	I:
 I: I: 
I: I:rC   rG  c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	           SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )!T5Gemma2ForSequenceClassificationii  re   c                 "  > [         TU ]  U5        UR                  U l        UR                  R                  U l        [        U5      U l        [        USS5      n[        U R                  U R                  U5      U l	        U R                  5         g NrX  g?r8   r9   rW  r  ri   r(  r  r   rV  scorer  r?   re   classifier_dropoutr@   s      rA   r9   *T5Gemma2ForSequenceClassification.__init__k  sp      ++!>>55"6*
$V-FL/0@0@$//Sef
rC   c                 6    U R                   R                  5       $ r7   rV  rW   s    rA   r  6T5Gemma2ForSequenceClassification.get_input_embeddingsv  rX  rC   c                 :    U R                   R                  U5        g r7   rZ  r[  s     rA   r  6T5Gemma2ForSequenceClassification.set_input_embeddingsy  r]  rC   Nr  r  r   r   r8  r9  r:  r;  r  r<  r  r   r   c                 f   U	c  U
b#  [        SU R                  R                   S35      eUc  [        S5      eUc  U R	                  U5      nU R
                  " U4UUUUUUUU	U
SS.
UD6nUR                  nUR                  nUR                  nU R                  U5      nUR                  S   nXPR                  R                  :g  R                  UR                  [        R                   5      n[        R"                  " UR                  S   UR                  [        R                   S	9nUU-  R%                  S5      n[        R&                  " UUR                  S   S
-
  S9nU[        R"                  " UUR                  S9U4   nSnUb  U R)                  UUUU R                  S9n[+        UUUUS9$ )  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N8Passing input embeddings is currently not supported for .You have to specify input_idsF
r  r   r   r8  r9  r:  r;  r  r<  rG  r   rE   r   r,   )maxr  )rS  r  pooled_logitsre   ri  rS  rw   r  )NotImplementedErrorr@   rZ   r  r  r  r  r>  r?  r  rV   re   r  r   r   r<   int32r   argmaxclamprl  r   )r?   r  r  r   r   r8  r9  r:  r;  r  r<  r  r   r  r  rw   r  rS  ru  non_pad_masktoken_indiceslast_non_pad_tokenr  ri  s                           rA   rR   )T5Gemma2ForSequenceClassification.forward|  s   4 $(=(I%J4>>KbKbJccde  <==$ $ J J9 U&*jj'
%)%/#9!5+'"7'
 '
 $5555//
-.__Q'
)[[-E-EEII&--Y^YdYde%6%<%<R%@^c^i^ij+l:BB2F"[[);ARAXAXY[A\_`A`au||Jv}}MOaab%%VFR_hlhshs%tD' '!	
 	
rC   ri   r  rW  r  NNNNNNNNNNN)rZ   r[   r\   r]   r-   r9   r  r  r$   r#   r<   rA  rB  r   r   r!   r"   r   rR   r_   r`   ra   s   @rA   r  r  i  s\   	~ 	1/  .215.204596:8<2626:>*.J
##d*J
 ''$.J
 t+	J

 &&-J
 !++d2J
 !&t 3J
 $..5J
 )4/J
 ((4/J
  %0047J
   4'J
 +,J
 
"J
  J
rC   r  c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	           SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )T5Gemma2ForTokenClassificationi  re   c                 "  > [         TU ]  U5        UR                  U l        UR                  R                  U l        [        U5      U l        [        USS5      n[        U R                  U R                  U5      U l	        U R                  5         g r  r  r  s      rA   r9   'T5Gemma2ForTokenClassification.__init__  sp      ++!>>55"6*
$V-FL/0@0@$//Sef
rC   c                 6    U R                   R                  5       $ r7   rV  rW   s    rA   r  3T5Gemma2ForTokenClassification.get_input_embeddings  rX  rC   c                 :    U R                   R                  U5        g r7   rZ  r[  s     rA   r  3T5Gemma2ForTokenClassification.set_input_embeddings  r]  rC   Nr  r  r   r   r8  r9  r:  r;  r  r<  r  r   r   c                    U	c  U
b#  [        SU R                  R                   S35      eUc  [        S5      eUc  U R	                  U5      nU R
                  " U4UUUUUUUU	U
SS.
UD6nUR                  nUR                  nUR                  nU R                  U5      nSnUb  U R                  UXR                  5      n[        UUUUS9$ )r  Nr  r  r  Fr  r  )r  r@   rZ   r  r  r  r  r>  r?  r  rl  re   r   )r?   r  r  r   r   r8  r9  r:  r;  r  r<  r  r   r  r  rw   r  rS  ri  s                      rA   rR   &T5Gemma2ForTokenClassification.forward  s   4 $(=(I%J4>>KbKbJccde  <==$ $ J J9 U&*jj'
%)%/#9!5+'"7'
 '
 $5555//
-.%%ffkkBD$'!	
 	
rC   r  r  )rZ   r[   r\   r]   r-   r9   r  r  r$   r#   r<   rA  rB  r   r   r!   r"   r   rR   r_   r`   ra   s   @rA   r  r    s\   
~ 
1/  .215.204596:8<2626:>*.@
##d*@
 ''$.@
 t+	@

 &&-@
 !++d2@
 !&t 3@
 $..5@
 )4/@
 ((4/@
  %0047@
   4'@
 +,@
 
@
  @
rC   r  )rG  r(  r  r  r  r  )r,   )r
  NN)T)brz  collections.abcr   typingr   r<   torch.nnr:    r   r  activationsr   cache_utilsr   r	   r
   r   
generationr   r   r   integrationsr   r   masking_utilsr   r   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r    processing_utilsr!   utilsr"   r#   r$   r%   utils.genericr&   r'   utils.output_capturingr(   r)   autor+   configuration_t5gemma2r-   r.   r/   r0   Moduler2   rc   rz   r   r   r   r^   r   rO   rU   r   r   r  r.  rD  rL  rV  r`  	Embeddingr|  r  r  r  r  r  r(  rG  r  r  __all__r;  rC   rA   <module>r     s  *  $    & ! P P K K I m m B 9   L F & a a G E  t t=bii =(")) &L<bii L<^( *+ ,2	UU\\ 	U# 	U%,, 	U$   %II%<<% 
% <<	%
 LL4'% S[% T\% T\% 5<<%&%D )*K)BII K) +K)\ )*uBbii uB +uBp15 1h85 8v	RYY 	 !@")) !@H bll  . S!o S! S!l  &b
1 b
Je- eP~
- ~
B Z
+ Z
 Z
zH:'> H:V ^
(? ^
 ^
B U
%< U
 U
prC   