
    Z j                     n   S SK r S SKJr  S SKJrJr  S SKrS SKJr  S SK	J
r
  SSKJr  SSKJrJrJr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJrJrJrJ r J!r!J"r"J#r#  SSK$J%r%  SSK&J'r'J(r(  SSK)J*r*  SSK+J,r,J-r-J.r.J/r/J0r0  SSK1J2r2  SSK3J4r4J5r5  SSK6J7r7  SSK8J9r9J:r:  SSK;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrDJErEJFrF  SSKGJHrH  SSKIJJrJJKrKJLrL  \/R                  " \N5      rO\-" SS9\
 " S S\:\5      5       5       rP\-" SS9\
 " S S\95      5       5       rQ\-" SS9\
 " S S \:\5      5       5       rR\-" SS9\
 " S! S"\5      5       5       rS " S# S$\@5      rT " S% S&\=5      rU " S' S(\A5      rV " S) S*\<5      rW " S+ S,\<5      rXSMS-\YS.\4S/ jjrZ " S0 S1\K5      r[ " S2 S3\K5      r\ " S4 S5\L5      r] " S6 S7\J5      r^ " S8 S9\>5      r_ " S: S;\B5      r`\- " S< S=\?5      5       ra " S> S?\a5      rb " S@ SA\a5      rc " SB SC\a5      rd\- " SD SE\a5      5       re " SF SG\a\5      rf\- " SH SI\a5      5       rg\- " SJ SK\a5      5       rh/ SLQrig)N    N)Callable)AnyOptional)strict   )initialization)DynamicCacheEncoderDecoderCacheStaticCache)PreTrainedConfig)GenerationConfigGenerationMixinGenerationMode)create_bidirectional_mask)FlashAttentionKwargs)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsBaseModelOutputWithPoolingSeq2SeqLMOutputSeq2SeqModelOutputSequenceClassifierOutputTokenClassifierOutput)ROPE_INIT_FUNCTIONS)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingtorch_compilable_check)merge_with_config_defaults)OutputRecordercapture_outputs   )	AutoModel)Gemma3ConfigGemma3TextConfig)Gemma3Attention	Gemma3MLPGemma3MultiModalProjectorGemma3PreTrainedModelGemma3RMSNormGemma3RotaryEmbeddingGemma3TextScaledWordEmbeddingapply_rotary_pos_embcreate_causal_mask!create_sliding_window_causal_maskeager_attention_forward)SiglipVisionConfig)T5GemmaClassificationHeadT5GemmaEncoderLayerT5GemmaLMHeadzgoogle/t5gemma-2-270m-270m)
checkpointc                   0    \ rS rSrSrSr\" 5       rS rSr	g)T5Gemma2TextConfigL   X  
query_pre_attn_scalar (`float`, *optional*, defaults to 256):
    Scaling factor used on the attention scores
final_logit_softcapping (`float`, *optional*):
    Scaling factor when applying tanh softcapping on the logits.
attn_logit_softcapping (`float`, *optional*):
    Scaling factor when applying tanh softcapping on the attention scores.
t5gemma2_textc                     UR                  SS5      nU R                  cC  [        U R                  5       Vs/ s H  n[	        US-   U-  5      (       a  SOSPM     snU l        [
        R                  " S0 UD6  g s  snf Nsliding_window_pattern      sliding_attentionfull_attention poplayer_typesrangenum_hidden_layersboolr   __post_init__selfkwargs_sliding_window_patternis       ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/t5gemma2/modular_t5gemma2.pyrL    T5Gemma2TextConfig.__post_init__[       "(**-Eq"I# t556 6A (,QU6M,M'N'N#Tdd6 D
 	&&00    $A:rH   N
__name__
__module____qualname____firstlineno____doc__
model_typeAttributeErroruse_bidirectional_attentionrL   __static_attributes__rE       rR   r:   r:   L   s     !J"0"2	1ra   r:   c                   "    \ rS rSrSr\\S.rSrg)T5Gemma2EncoderConfigg   t5gemma2_encoder)text_configvision_configrE   N)	rX   rY   rZ   r[   r]   r:   r4   sub_configsr`   rE   ra   rR   rc   rc   g   s     $J *+Kra   rc   c                   0    \ rS rSrSrSr\" 5       rS rSr	g)T5Gemma2DecoderConfigr   r<   t5gemma2_decoderc                     UR                  SS5      nU R                  cC  [        U R                  5       Vs/ s H  n[	        US-   U-  5      (       a  SOSPM     snU l        [
        R                  " S0 UD6  g s  snf r?   rF   rM   s       rR   rL   #T5Gemma2DecoderConfig.__post_init__   rT   rU   rV   NrW   rE   ra   rR   rj   rj   r   s     $J"0"2	1ra   rj   c                   "  ^  \ rS rSr% SrSrS/r\\S.r	SSS.r
S	r\\\\4   -  S	-  \S
'   S	r\\\\4   -  S	-  \S'   Sr\\S'   Sr\\-  \S'   Sr\\-  \S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   S	r\S	-  \S'   Sr\\S'   U 4S jrS rSrU =r$ )T5Gemma2Config   a,  
encoder (`Union[T5Gemma2EncoderConfig, dict]`, optional, *optional*):
    Configuration for the encoder.
decoder (`Union[T5Gemma2DecoderConfig, dict]`, optional, *optional*):
    Configuration for the decoder.
eoi_token_index (`int`, *optional*):
    The end-of-image token index to wrap the image prompt. Will be same as
    `self.encoder.eoi_token_index`

```python
>>> from transformers import T5Gemma2Config, T5Gemma2Model
>>> t5gemma2_config = T5Gemma2Config.from_pretrained("google/t5gemma-270m-270m")
>>> model = T5Gemma2Model(t5gemma2_config)
```
t5gemma2past_key_values)encoderdecoderimage_token_indexeoi_token_index)image_token_ideoi_token_idNrt   ru   Tis_encoder_decoder        dropout_rateattention_dropoutclassifier_dropout_rateg{Gz?initializer_rangei tie_word_embeddingsc                   > [        U R                  [        5      (       a  [        S0 U R                  D6U l        O1U R                  c$  [        5       U l        [        R                  S5        [        U R                  [        5      (       a  [        S0 U R                  D6U l        O1U R                  c$  [        5       U l        [        R                  S5        U R                  U R                  R                  l        U R                  U R                  R                  l
        U R                  U R                  R                  l
        U R                  U R                  l        U R                  U R                  l        U R                  U R                  l
        U R                  R                  U l        S H"  nX!;  d  M
  [        U R                  U5      X'   M$     [        TU ]@  " S0 UD6  g )NzDencoder is None, using default T5Gemma2EncoderConfig encoder config.zDdecoder is None, using default T5Gemma2DecoderConfig decoder config.)bos_token_idpad_token_ideos_token_id
vocab_sizerE   )
isinstancert   dictrc   loggerinforu   rj   r|   rf   r}   rg   rv   rw   getattrsuperrL   )rN   rO   special_token_key	__class__s      rR   rL   T5Gemma2Config.__post_init__   sU   dllD))0@4<<@DL\\!02DLKK^_dllD))0@4<<@DL\\!02DLKK^_040A0A  -595K5K  27;7M7M""4)-)?)?&$($5$5!)-)?)?&#||;;!_ .,3DLLBS,T) "` 	''ra   c                 ,   U R                   R                  R                  U R                  R                  :w  aD  [	        SU R                   R                  R                   SU R                  R                   S35      eU R
                  (       d  [	        S5      eU R                   R                  R                  U R                  R                  :w  aD  [	        SU R                   R                  R                   SU R                  R                   S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.zBImbalanced encoder-decoder is not supported in T5Gemma2: encoder (z) vs decoder (z).z4T5Gemma2Model only support encoder-decoder modeling.zRImbalanced encoder-decoder vocabulary size is not supported in T5Gemma2: encoder (N)rt   rf   hidden_sizeru   
ValueErrorrz   r   rN   s    rR   validate_architecture$T5Gemma2Config.validate_architecture   s    <<##//4<<3K3KK LL44@@APTP\P\PhPhOiikm 
 &&STT<<##..$,,2I2II LL44??@t||OfOfNggik  Jra   )ru   rt   rw   ) rX   rY   rZ   r[   r\   r]   keys_to_ignore_at_inferencerc   rj   rh   attribute_maprt   r   strr   __annotations__ru   rz   rK   r|   floatintr}   r~   r   rv   rw   r   rL   r   r`   __classcell__r   s   @rR   rp   rp      s      J#4"5 )(K .)M
 >BG"T#s(^3d:A=AG"T#s(^3d:A## #L%#+#%(us{(+.US[.#u#$s$"&OS4Z& $$(8 ra   rp   c                       \ rS rSrSrg)T5Gemma2RMSNorm   rE   NrX   rY   rZ   r[   r`   rE   ra   rR   r   r          ra   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )T5Gemma2MLP   configc                 n   > [         TU ]  U5        [        R                  " UR                  5      U l        g N)r   __init__nnDropoutr|   dropoutrN   r   r   s     rR   r   T5Gemma2MLP.__init__   s&     zz&"5"56ra   c                     U R                  U R                  U5      5      U R                  U5      -  nU R                  U5      nU R	                  U5      nU$ r   )act_fn	gate_projup_projr   	down_proj)rN   xhidden_statesr   s       rR   forwardT5Gemma2MLP.forward   sH    DNN1$56aH]3NN=1	ra   )r   )	rX   rY   rZ   r[   r:   r   r   r`   r   r   s   @rR   r   r      s    71 7 ra   r   c                      ^  \ rS rSrSS\4U 4S jjjr\    SS\S-  S\S   S\S-  S\	S-  S	\
S
\4   4
U 4S jjj5       rSrU =r$ )T5Gemma2RotaryEmbedding   Nr   c                 $   > [         TU ]  X5        g r   r   r   )rN   r   devicer   s      rR   r    T5Gemma2RotaryEmbedding.__init__   s    (ra   r   ztorch.deviceseq_len
layer_typereturnztorch.Tensorc                 $   > [         TU ]  XX#5      $ r   )r   compute_default_rope_parameters)r   r   r   r   r   s       rR   r   7T5Gemma2RotaryEmbedding.compute_default_rope_parameters   s     w6vw[[ra   rE   r   )NNNN)rX   rY   rZ   r[   r:   r   staticmethodr   r   r   tupler   r   r`   r   r   s   @rR   r   r      s    )1 ) ) ,0+/"!%	\"T)\(\ t\ $J	\
 
~u$	%\ \ra   r   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )T5Gemma2SelfAttentioni  r   	layer_idxc                 2   > [         TU ]  X5        SU l        g NFr   r   	is_causalrN   r   r   r   s      rR   r   T5Gemma2SelfAttention.__init__      +ra   r   )	rX   rY   rZ   r[   r:   r   r   r`   r   r   s   @rR   r   r     s    1 c  ra   r   c                   >  ^  \ rS rSrSrS\S\4U 4S jjr SS\R                  S\
\R                  \R                  4   S	\R                  S-  S
\R                  S\S-  S\\   S\
\R                  \R                  S-  \
\R                     S-  4   4S jjrSrU =r$ )T5Gemma2MergedAttentioni
  z6Merged self-attention and cross-attention for decoder.r   r   c                 2   > [         TU ]  X5        SU l        g r   r   r   s      rR   r    T5Gemma2MergedAttention.__init__  r   ra   Nr   position_embeddingsmerged_attention_maskencoder_hidden_statesrs   rO   r   c                    UR                   S S n/ UQSPU R                  P7nUR                   S S n	/ U	QSPU R                  P7n
U R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      nU R                  U5      nUu  p[        XX5      u  pUb[  UR                  nUR                  XU R                  5      u  pUR                  R                  U R                  5      nUR                  nUb  W(       d  U R                  U5      R                  U
5      R	                  SS5      nU R                  U5      R                  U
5      R	                  SS5      nU R                  U5      nUb9  WR                  UUU R                  5      u  nnSUR                  U R                  '   OFWR                   U R                     R"                  nUR                   U R                     R$                  nUnU	S   n[&        R(                  " UU/SS9n[&        R(                  " UU/SS9n[*        R,                  " U R.                  R0                  [2        5      nU" U UUUU4U R4                  (       a  U R6                  OSU R8                  S.UD6u  nnUR:                  " / UQSP76 R=                  5       nU R?                  U5      nUb  USS U* 24   nUSU* S 24   nOS	u  nnUUU4$ )
NrB   r%   Tdimr{   )r   scaling.)NN) shapehead_dimq_projview	transposek_projv_projq_normk_normr0   self_attention_cacheupdater   
is_updatedgetcross_attention_cachelayerskeysvaluestorchcatr   get_interfacer   _attn_implementationr3   trainingr}   r   reshape
contiguouso_proj)rN   r   r   r   r   rs   rO   input_shapehidden_shapecross_input_shapecross_hidden_shapequery_states
key_statesvalue_statescossinr   r   r   cross_key_statescross_value_statescross_key_sizeattention_interfaceattn_outputattn_weightsself_attn_weightscross_attn_weightss                              rR   r   T5Gemma2MergedAttention.forward  sh    $))#2.88b8$--8177<D0D"DdmmD {{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST{{<0[[,
&#7RU#[ &#2#G#G ';'B'B:]a]k]k'l$J )3377GJ$3$I$I!"*#{{+@AFFGYZddefhij!%-B!C!H!HI[!\!f!fghjk!l#{{+;<*7L7S7S$&8$..84 "4 >B**4>>:4;;DNNKPP!6!=!=dnn!M!T!T $*1-YY
,<=1E
yy,0B!CK(?(M(MKK,,.E)
 %8!	%
 /3mmD**LL	%
 	%
!\ "));;;;FFHkk+. # ,S2BN?2B-B C!-cN?3C.C!D4>11-/AAAra   r   r   )rX   rY   rZ   r[   r\   r:   r   r   r   Tensorr   r
   r   r   r   r`   r   r   s   @rR   r   r   
  s    @1 c  7;TB ||TB #5<<#=>	TB
  %||d2TB  %||TB -t3TB -.TB 
u||U\\D0%2E2LL	MTB TBra   r   sliding_windowr   c           
      X   ^ ^ S[         S[         S[         S[         S[        4
UU 4S jjnU$ )zD
This creates uni/bidirectional attention mask with sliding window.
	batch_idxhead_idxq_idxkv_idxr   c                 t   > T	(       a  T
SpTOT
S-   S-  T
S-  S-   pTX#-
  nUS:  Xd:  -  nUS:  U* U:  -  nXx-  $ )Nr   rB   r%   rE   )r  r  r  r  left_window_sizeright_window_sizedist	left_mask
right_maskr   r	  s            rR   
inner_mask0sliding_window_mask_function.<locals>.inner_maskm  sc    2@!/4BQ4F13L~bcNcfgNg/~QY4#:;	QhD5+<#<=
%%ra   )r   rK   )r	  r   r  s   `` rR   sliding_window_mask_functionr  h  s8    
	&c 	&S 	& 	&c 	&d 	& 	& ra   c                       \ rS rSrSrg)T5Gemma2EncoderLayeri{  rE   Nr   rE   ra   rR   r  r  {  r   ra   r  c                      ^  \ rS rSrSrS\4U 4S jjr     SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\R                  S-  S
\S-  S\S-  S\R                  S-  S\R                  4S jjrSrU =r$ )T5Gemma2DecoderLayeri  zFDecoder sub-layer: merged attention instead of vanilla self-attention.r   c                 B   > [         TU ]  X5        [        UUS9U l        g )N)r   r   )r   r   r   	self_attnr   s      rR   r   T5Gemma2DecoderLayer.__init__  s$    + 1
ra   Nr   r   r   position_idsrs   	use_cacher   r   c                 @   Un	U R                  U5      nU R                  " SUUUUUUUS.UD6u  n  n
U R                  U5      nXR                  U5      -   nUn	U R	                  U5      nU R                  U5      nU R                  U5      nXR                  U5      -   nU$ )N)r   r   r   r  rs   r   r   rE   )pre_self_attn_layernormr  post_self_attn_layernormr   pre_feedforward_layernormmlppost_feedforward_layernorm)rN   r   r   r   r  rs   r   r   rO   residual_s              rR   r   T5Gemma2DecoderLayer.forward  s     !44]C"nn 	
' 3"7%+"7	
 	
q! 55mD <<#>> 66}E/77F <<#>>ra   )r  )NNNFN)rX   rY   rZ   r[   r\   r   r   r   r  r   
LongTensorr
   rK   FloatTensorr   r`   r   r   s   @rR   r  r    s    P
# 
 6:046:!&59 ||  #5<<#=>   %||d2	 
 &&-  -t3  $;   %||d2  
		   ra   r  c                       \ rS rSrSrg)T5Gemma2LMHeadi  rE   Nr   rE   ra   rR   r-  r-    r   ra   r-  c                       \ rS rSrSrg)T5Gemma2ClassificationHeadi  rE   Nr   rE   ra   rR   r/  r/    r   ra   r/  c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )T5Gemma2MultiModalProjectori  r   c                 $   > [         TU ]  U5        g r   r   r   s     rR   r   $T5Gemma2MultiModalProjector.__init__  s     ra   rE   )rX   rY   rZ   r[   rc   r   r`   r   r   s   @rR   r1  r1    s    !4 ! !ra   r1  c                   t   ^  \ rS rSrSr  SS\S\S\S\S\4
U 4S jjjrS	\R                  4U 4S
 jjr
SrU =r$ )T5Gemma2TextScaledWordEmbeddingi  zCT5Gemma2 Embedding: override to add eoi token embedding separately.num_embeddingsembedding_dimpadding_idxembed_scalerw   c                    > [         TU ]  XX45        XPl        [        R                  " [
        R                  " U R                  5      5      U l        g r   )	r   r   rw   r   	Parameterr   zerosr7  eoi_embedding)rN   r6  r7  r8  r9  rw   r   s         rR   r   (T5Gemma2TextScaledWordEmbedding.__init__  s:     	Q.\\%++d6H6H*IJra   	input_idsc                    > [         TU ]  U5      U R                  R                  U R                  R
                  5      -  nU R                  R                  UR
                  5      X!U R                  :H  '   U$ r   )r   r   r9  toweightdtyper=  rw   )rN   r?  input_embeddingsr   s      rR   r   'T5Gemma2TextScaledWordEmbedding.forward  sd     7?958H8H8K8KDKKL]L]8^^>B>P>P>S>STdTjTj>kd&:&::;ra   )r=  rw   )g      ?  )rX   rY   rZ   r[   r\   r   r   r   r   r  r   r`   r   r   s   @rR   r5  r5    sd    M !&
K
K 
K 	
K
 
K 
K 
K     ra   r5  c            	           \ rS rSr% \\S'   SrSrSrSr	/ SQr
\\/\" \SSS	9\" \SSS	9\" \S
SS	9/S.rS rS\R&                  4S jrSrg)T5Gemma2PreTrainedModeli  r   modelTF)r  r  SiglipVisionEmbeddingsSiglipEncoderLayer#SiglipMultiheadAttentionPoolingHeadrB   r  )index
layer_namer%   
cross_attn)r   
attentionsc                    [         R                  " X5        [        U[        5      (       a!  [        R
                  " UR                  5        g [        U[        5      (       aL  [        R
                  " UR                  5        [        R                  " UR                  UR                  5        g [        U[        5      (       a  UR                  R                  R                  S   S-  n[        R                   " UR                  R                  SU R"                  R$                  U-  S9  ['        UR                  S5      (       aC  UR                  R(                  b+  [        R
                  " UR                  R(                  5        g g g SUR*                  R,                  ;   a!  [        R
                  " UR                  5        g [        U[.        5      (       a  UR0                   H  nUR2                  nUR4                  U   S:w  a  [6        UR4                  U      nU" UR"                  US9u  pV[        R8                  " [;        X S	35      U5        [        R8                  " [;        X S
35      U5        M     g g )Nr   g      r{   )meanstdbiasRMSNormdefault)r   	_inv_freq_original_inv_freq)r   _init_weightsr   r1  initzeros_mm_input_projection_weightr5  r=  	constant_r9  scalar_embed_scaler/  out_projrB  r   normal_r   r   hasattrrT  r   rX   r   rH   r   	rope_typer   copy_r   )rN   modulescaler   rope_init_fncurr_inv_freqr(  s          rR   rY  %T5Gemma2PreTrainedModel._init_weights  s   %%d3f9::KK99: ?@@KK,,-NN6--v/H/HI :;;OO**003t;ELL//ct{{?\?\_d?dev//FOO4H4H4TFOO001 5U/ &**333KK& 788$00
%EE##J/9<#6v7G7G
7S#TL#/*#U 

76\+CDmT

76\9K+LM}] 1 9ra   labelsc                 >   U R                   R                  nUR                  nUR                  nUc  [	        S5      eUR                  UR                  5      nUSSS24   R                  5       USSS24'   X5S'   Uc  [	        S5      eUR                  US:H  U5        U$ )	z
Shifts input_ids to the right, prepends the decoder_start_token_id, and handles
pad_token_id replacement for labels that were -100.
This is a common preparation step for decoder inputs in sequence-to-sequence models.
Nz:self.model.config.decoder.bos_token_id has to be defined. .r   rB   ).r   z9self.model.config.decoder.pad_token_id has to be defined.i)	r   ru   r   r   r   	new_zerosr   clonemasked_fill_)rN   ri  decoder_configdecoder_start_token_idr   shifted_input_idss         rR   %prepare_decoder_input_ids_from_labels=T5Gemma2PreTrainedModel.prepare_decoder_input_ids_from_labels  s     ,,!/!<!<%22!)YZZ #,,V\\:%+C"H%5%;%;%=#qr'"$:&!XYY 	&&'8D'@,O  ra   rE   N)rX   rY   rZ   r[   rp   r   base_model_prefixsupports_gradient_checkpointing_supports_flash_attn_supports_flex_attn_no_split_modulesr  r  r#   r   r   _can_record_outputsrY  r   r  rq  r`   rE   ra   rR   rH  rH    s    &*# ! /0DE0kR2!T2!U
^0!ELL !ra   rH  c                   *  ^  \ rS rSr% \\S'   \\S.r SS\S\	4U 4S jjjr
\\\     SS\R                  S-  S\R                   S-  S	\R                  S-  S
\R"                  S-  S\R                   S-  S\\   S\4S jj5       5       5       rSrU =r$ )T5Gemma2TextEncoderi   r   )rP  r   rw   c           	      Z  > [         TU ]  U5        UR                  U l        UR                  U l        [        UR                  UR                  U R                  UR                  S-  US9U l        [        UR                  UR                  S9U l
        SU l        [        R                  " [        UR                  5       Vs/ s H  n[!        X5      PM     sn5      U l        [        R$                  " UR&                  5      U l        [+        U5      U l        U R/                  5         g s  snf Ng      ?)r9  rw   )epsF)r   r   r   r8  r   r5  r   embed_tokensr   rms_norm_epsnormgradient_checkpointingr   
ModuleListrI   rJ   r  r   r   r|   r   r   
rotary_emb	post_initrN   r   rw   r   r   s       rR   r   T5Gemma2TextEncoder.__init__'  s    
 	 !.. ++;**C/+
 $F$6$6F<O<OP	&+#mmFKFLdLdFefFe!&4Fef
 zz&"5"561&9 	 g   D(Nr?  attention_maskr  inputs_embedstoken_type_idsrO   r   c                    US L US L-  (       a  [        S5      eUR                  SS 5        Uc  U R                  U5      nUc<  [        R                  " SUR
                  S   UR                  S9R                  S5      n[        U=n[        5      (       dG  U R                  UUS.n[        S0 UD6[        S0 UDS[        U R                  R                  SS	90D6S
.nUn	0 n
[        U R                  R                  5       H  nU R!                  XU5      X'   M     U R#                  U	5      n	[%        U R&                  S U R                  R(                   5       HC  u  pU" U	XR                  R                  U      XpR                  R                  U      U40 UD6n	ME     U R+                  U	5      n	U R#                  U	5      n	[-        U	S9$ )N:You must specify exactly one of input_ids or inputs_embedsrs   r   rB   r   )r   r  r  and_mask_functionFr   rD   rC   )last_hidden_staterE   )r   rG   r~  r   aranger   r   	unsqueezer   r   r   r   r  r	  setrH   r  r   	enumerater   rJ   r  r   )rN   r?  r  r  r  r  rO   self_attn_mask_mappingmask_kwargsr   r   r   rQ   layer_modules                 rR   r   T5Gemma2TextEncoder.forwardC  s    -t";<YZZ 	

$d+  --i8M <<=+>+>q+A-J^J^_iijklLNB0DII++!."0K #<"Jk"J%> &!&&B4;;C]C]in&o&&" & !dkk556J.2oom[e.f+ 7 ]3(5Tt{{7T7T)UVOA(#KK$;$;A$>?&{{'>'>q'AB	
 M  W 		-0]3+
 	
ra   r   r~  r  r   r  r8  r  r   rF  )NNNNN)rX   rY   rZ   r[   r:   r   r   r  rx  r   r   r"   r$   r   r   r*  r  r+  r   r   r   r   r`   r   r   s   @rR   rz  rz     s    +-  '"  8   .2.20426.2<
##d*<
 t+<
 &&-	<

 ((4/<
 t+<
 +,<
 
<
    <
ra   rz  c                     ^  \ rS rSr% \\S'    SS\S\4U 4S jjjrS rS r	\
\S\R                  S\\   S	\\-  4S
 j5       5       rS\R&                  S-  S\R(                  S-  S\R(                  4S jr\      SS\R&                  S-  S\R                  S-  S\R&                  S-  S\R(                  S-  S\R(                  S-  S\R                  S-  S\\   S	\4S jj5       rSrU =r$ )T5Gemma2Encoderi  r   rw   c                    > [         TU ]  U5        [        R                  UR                  US9U l        [        R                  " UR                  S9U l	        [        U5      U l        U R                  5         g )N)rw   r   )r   r   rz  _from_configrf   
text_modelr&   from_configrg   vision_towerr1  multi_modal_projectorr  )rN   r   rw   r   s      rR   r   T5Gemma2Encoder.__init__  sb    
 	 -::6;M;M_n:o%119M9MN%@%H" 	ra   c                 6    U R                   R                  5       $ r   )r  get_input_embeddingsr   s    rR   r  $T5Gemma2Encoder.get_input_embeddings  s    3355ra   c                 8    U R                   R                  U5      $ r   )r  set_input_embeddingsrN   new_embeddingss     rR   r  $T5Gemma2Encoder.set_input_embeddings  s    33NCCra   pixel_valuesrO   r   c                 v    U R                   " SUSS.UD6nUR                  nU R                  U5      nXSl        U$ )NT)r  return_dictrE   )r  r  r  pooler_output)rN   r  rO   vision_outputsr  image_featuress         rR   get_image_features"T5Gemma2Encoder.get_image_features  sI     **aRVaZ`a*<<334EF'5$ra   r?  Nr  r  c           	      >   U R                   R                  nUcd  Uc  [        S5      eX R                  5       " [        R
                  " U[        R                  UR                  S95      :H  nUR                  S5      nOX:H  nUR                  5       nUR                  S5      R                  U5      R                  UR                  5      nUR                  S   UR                  S   -  n[        X%   R                  5       UR                  5       :H  SU SU 35        U$ )z
Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
equal to the length of multimodal features. If the lengths are different, an error is raised.
z9Either `input_ids` or `inputs_embeds` has to be provided.)rC  r   r   r   rB   z6Image features and image tokens do not match: tokens: z, features )r   rx   r   r  r   tensorlongr   allsumr  	expand_asrA  r   r!   numel)rN   r?  r  r  rx   special_image_maskn_image_tokensn_image_featuress           rR   get_image_placeholder_mask*T5Gemma2Encoder.get_image_placeholder_mask  s    33$ !\]]!.2K2K2M^5::mFZFZ[3 " "4!7!7!;!*!<+//1/99"=GGVYYZgZnZno)//2^5I5I!5LL-3359M9M9OOD^DTT_`p_qr	
 "!ra   r  r  r  c                 h   US L US L-  (       a  [        S5      eUc  U R                  R                  U5      nUba  U R                  USS9R                  nUR                  UR                  UR                  5      nU R                  XUS9n	UR                  X5      nU R                  " SUUUS.UD6n
U
$ )Nr  T)r  )r  r  )r  r  r  rE   )
r   r  r~  r  r  rA  r   rC  r  masked_scatter)rN   r?  r  r  r  r  r  rO   r  
image_maskoutputss              rR   r   T5Gemma2Encoder.forward  s     -t";<YZZ  OO88CM#!44\t4TbbN+..}/C/C]EXEXYN88~ 9 J *88TM// 
')%
 	
 ra   )r  r  r  r  )NNNNNN)rX   rY   rZ   r[   rc   r   r   r   r  r  r   r   r   r  r   r   r   r   r  r*  r+  r  r   r   r`   r   r   s   @rR   r  r    sp   !!
  '%  6D 
!LL
4:;M4N
	+	+
  
"##d*" ((4/" ))	"<  .2.2042615.2!##d*! t+! &&-	!
 ((4/! ''$.! t+! +,! 
! !ra   r  c                   v  ^  \ rS rSr% \\S'   \" \SS9\" \SS9\S.r	SS\S\
4U 4S jjjr\\\        SS
\R                   S	-  S\R"                  S	-  S\R                   S	-  S\S	-  S\R&                  S	-  S\S	-  S\R"                  S	-  S\R"                  S	-  S\\   S\4S jj5       5       5       rSrU =r$ )T5Gemma2Decoderi  r   rB   )rM  r%   )rP  cross_attentionsr   rw   c           	      Z  > [         TU ]  U5        UR                  U l        UR                  U l        [        UR                  UR                  UR                  UR                  S-  US9U l        [        UR                  UR                  S9U l
        SU l        [        R                  " [        UR                  5       Vs/ s H  n[!        X5      PM     sn5      U l        [        R$                  " UR&                  5      U l        [+        U5      U l        U R/                  5         g s  snf r|  )r   r   r   r8  r   r5  r   r~  r   r  r  r  r   r  rI   rJ   r  r   r   r|   r   r   r  r  r  s       rR   r   T5Gemma2Decoder.__init__  s     !.. ++;**C/+
 $F$6$6F<O<OP	&+#mmFKFLdLdFefFe!&4Fef
 zz&"5"561&9	 gr  Nr?  r  r  rs   r  r   r   encoder_attention_maskrO   r   c	           
         US L US L-  (       a  [        S5      eUc  [        S5      eUc  U R                  U5      nU R                  (       d/  U(       a(  Uc%  [        [	        U R
                  S9[	        5       5      nUcU  Ub  UR                  5       OSn
[        R                  " UR                  S   UR                  S9U
-   nUR                  S5      n[        U=n[        5      (       d<  S nU R
                  UUUb  UR                  OS UUS.n[        S0 UD6[!        S0 UD6S	.n[        U=n[        5      (       d  S
[#        U R
                  UUUWS90n[        R$                  " US
   US
   /SS9[        R$                  " US   US
   /SS9S	.nUn0 n['        U R
                  R(                  5       H  nU R+                  UUU5      UU'   M     U R-                  U5      n[/        U R0                  S U R
                  R2                   5       HH  u  nnU" UUU R
                  R(                  U      XR
                  R(                  U      UUUU40 U	D6nMJ     U R5                  U5      nU R-                  U5      n[7        UUS9$ )Nr  z0`encoder_hidden_states` must be given in decoderr  r   rB   r  c                  H    [         R                  " S[         R                  S9$ )NT)rC  )r   r  rK   )argss    rR   <lambda>)T5Gemma2Decoder.forward.<locals>.<lambda>-  s    ELLUZZ4Xra   )r   r  r  rs   r  r  r  rD   )r   r  r  r   r  r   r   rC   )r  rs   rE   )r   r~  r   r
   r	   r   get_seq_lengthr   r  r   r   r  r   r   r   r1   r2   r   r   r  rH   r  r   r  r   rJ   r  r   )rN   r?  r  r  rs   r  r   r   r  rO   past_seen_tokensr  dummy_and_mask_functionr  cross_attn_mask_mappingmerged_attn_mask_mappingr   r   r   rQ   r  s                        rR   r   T5Gemma2Decoder.forward  s    -t";<YZZ (OPP  --i8M}}/F1,dkk2RT`TbcOCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4LNB0DII 'Y#++!."0KZKf?#G#Glp ,%<K #5"C{"C%F%U%U&"
 5KK1TRR ";;;"/#9*?&=#'# $ii'(89;RSc;dekm "''(;<>UVf>ghnp"	$
  & !dkk556J.2oom\[e.f
+ 7 ]3(5Tt{{7T7T)UVOA|(#DKK$;$;A$>?()@)@)CD%	 	M  W 		-0]38++
 	
ra   r  r  )NNNNNNNN)rX   rY   rZ   r[   rj   r   r#   r   r  rx  r   r   r"   r$   r   r   r*  r  r
   r+  rK   r   r   r   r   r`   r   r   s   @rR   r  r    s:   !!$%<AF*+B!L-4 s  ,   .2.2046:26!%596:]
##d*]
 t+]
 &&-	]

 -t3]
 ((4/]
 $;]
  %||d2]
 !&t 3]
 +,]
 
3]
    ]
ra   r  c                     ^  \ rS rSrSSS.rS\4U 4S jjrS rS rS	 r	S
 r
\\            SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                   S-  S\R                  S-  S\S-  S\S-  S\R&                  S-  S\R&                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ )T5Gemma2Modelin  z&encoder.text_model.embed_tokens.weightz-encoder.text_model.embed_tokens.eoi_embedding)zdecoder.embed_tokens.weightz"decoder.embed_tokens.eoi_embeddingr   c                    > [         TU ]  U5        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        U R                  5         g r   )r   r   r  rt   rw   r  ru   r  r   s     rR   r   T5Gemma2Model.__init__u  sL      'v~~v7M7MN&v~~v7M7MNra   c                     U R                   $ r   )rt   r   s    rR   get_encoderT5Gemma2Model.get_encoder~      ||ra   c                     U R                   $ r   ru   r   s    rR   get_decoderT5Gemma2Model.get_decoder  r  ra   c                 6    U R                   R                  5       $ r   )rt   r  r   s    rR   r  "T5Gemma2Model.get_input_embeddings  s    ||0022ra   c                 8    U R                   R                  U5      $ r   )rt   r  r  s     rR   r  "T5Gemma2Model.set_input_embeddings  s    ||00@@ra   Nr?  r  r  r  decoder_input_idsdecoder_attention_maskdecoder_position_idsencoder_outputsrs   r  decoder_inputs_embedsr   rO   r   c                 J   Uc  U R                   " SUUUU
USS.UD6nUR                  nU R                  " SUUUUU	UUUSS.	UD6n[        UR                  UR                  UR
                  UR                  UR                  UR                  UR
                  UR                  S9$ )a8  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
T)r?  r  r  r  r  r  )	r?  r  r  r  rs   r   r  r   r  )r  rs   decoder_hidden_statesdecoder_attentionsr  encoder_last_hidden_stater   encoder_attentionsrE   )rt   r  ru   r   rs   r   rP  r  )rN   r?  r  r  r  r  r  r  r  rs   r  r  r   rO   r   decoder_outputss                   rR   r   T5Gemma2Model.forward  s    6 ""ll #-)+)  O !0 A A ,, 
'1-/+"7#1
 
 "-??+;;"1"?"?.99,==&5&G&G"1"?"?.99	
 		
ra   )ru   rt   )NNNNNNNNNNNN)rX   rY   rZ   r[   _tied_weights_keysrp   r   r  r  r  r  r   r   r   r*  r+  
BoolTensorr   r
   r  rK   r   r   r   r   r`   r   r   s   @rR   r  r  n  sv    (P.]
~ 3A  .215370459:>8<266:-159!%!=
 ##d*=
 ''$.	=

 ))D0=
 &&-=
 !++d2=
 !& 0 04 7=
 $..5=
 )4/=
 -t3=
 ||d*=
  %||d2=
  $;!=
" +,#=
$ 
%=
  =
ra   r  c            $         ^  \ rS rSrSS0rSS0rSS/S/40rS\4U 4S	 jjrS
 r	S r
S rS rS rS r\\S\R$                  S\\   S\\-  4S j5       5       r\S 5       r\\              S+S\R4                  S-  S\R6                  S-  S\R6                  S-  S\R4                  S-  S\R4                  S-  S\R8                  S-  S\R4                  S-  S\S-  S\S-  S\R6                  S-  S\R6                  S-  S \R4                  S-  S!\S-  S"\ \R$                  -  S\\   S\\R6                     \!-  4 S# jj5       5       r"S$\#S%\$S&\%S'\ S(\ S\4U 4S) jjr&S*r'U =r($ ), T5Gemma2ForConditionalGenerationi  zlm_head.out_proj.weightz,model.encoder.text_model.embed_tokens.weightzlm_head.out_projcolwise_gather_outputr   logitsr   c                   > [         TU ]  U5        [        U5      U l        UR                  R
                  U l        [        UR                  R                  U R
                  5      U l        SU l	        U R                  5         g )NForMaskedLM)r   r   r  rI  ru   r   r-  r   lm_head	loss_typer  r   s     rR   r   )T5Gemma2ForConditionalGeneration.__init__  sZ     "6*
 ..33%fnn&@&@$//R&ra   c                 $    XR                   l        g r   r  r_  r  s     rR   set_output_embeddings6T5Gemma2ForConditionalGeneration.set_output_embeddings  s     .ra   c                 .    U R                   R                  $ r   r  r   s    rR   get_output_embeddings6T5Gemma2ForConditionalGeneration.get_output_embeddings  s    ||$$$ra   c                 6    U R                   R                  5       $ r   rI  r  r   s    rR   r  5T5Gemma2ForConditionalGeneration.get_input_embeddings      zz..00ra   c                 :    U R                   R                  U5        g r   rI  r  rN   values     rR   r  5T5Gemma2ForConditionalGeneration.set_input_embeddings      

''.ra   c                 6    U R                   R                  5       $ r   )rI  r  r   s    rR   r  ,T5Gemma2ForConditionalGeneration.get_encoder      zz%%''ra   c                 6    U R                   R                  5       $ r   )rI  r  r   s    rR   r  ,T5Gemma2ForConditionalGeneration.get_decoder  r  ra   r  rO   r   c                 D    U R                  5       R                  " U40 UD6$ r   )r  r  )rN   r  rO   s      rR   r  3T5Gemma2ForConditionalGeneration.get_image_features  s#    
 !44\LVLLra   c                 6    U R                  5       R                  $ r   )r  r  r   s    rR   r  -T5Gemma2ForConditionalGeneration.vision_tower  s    !...ra   Nr?  r  r  r  r  r  r  rs   r  r  ri  r   logits_to_keepc                    Ub  Uc  Uc  U R                  U5      nU R                  " SUUUUUUUUU	U
UUS.UD6nUR                  n[        U[        5      (       a  [        U* S5      OUnU R                  USS2USS24   5      nU R                  R                  nUR                  b4  UUR                  -  n[        R                  " U5      nUUR                  -  nSnUb  U R                  " UXR                  40 UD6n[        UUUR                  UR                   UR"                  UR$                  UR&                  UR(                  UR*                  S9	$ )a  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
N)r?  r  r  r  r  r  r  r  rs   r  r  r   )	lossr  rs   r  r  r  r  r   r  rE   )rq  rI  r  r   r   slicer  r   ru   final_logit_softcappingr   tanhloss_functionr   r   rs   r  r  r  r  r   r  )rN   r?  r  r  r  r  r  r  r  rs   r  r  ri  r   r  rO   r  r   slice_indicesr  rn  r  s                         rR   r   (T5Gemma2ForConditionalGeneration.forward  so   B "3";@U@] $ J J6 R.2jj /
%)%/#9!5++'"7/
 /
  (998B>SV8W8W~ot4]kmA}a,?@A,,11=nDDDFZZ'FnDDDF%%ffooPPD+;;"1"G"G.AA,==&5&O&O"1"G"G.AA

 
	
ra   generation_configmodel_kwargsgeneration_mode
batch_sizemax_cache_lengthc           
        > [         TU ]  UUUUU5        UR                  SL a  gUR                  nUc  SnOSUR                  ;   n[        R
                  " U R                  R                  SS95      nSUl        S/UR                  -  Ul
        UUS.n	UR                  S5      n
U
b  [        U
[        5      (       d  [        S	5      e[        U
R                   5      S
:  a!  U
R                   R                  S
5      (       a  g[#        U
R$                  5      nU[&        :X  a  US   S
   R(                  S   U	S'   U" S0 U	D6U
l        O:[        [+        S0 U R                  R                  SS9US.D6[+        5       5      US'   [-        U S5      (       aC  U R.                  b5  [        U R.                  [        5      (       d  [        S5      eUS   U l        ggg)zMOverride cache preparation to support T5Gemma2-specific EncoderDecoder Cache.FN	offloadedTr  rD   )r   
offloadingrs   zaThe `past_key_values` in `model_kwargs` must be of type `EncoderDecoderCache` for T5Gemma2 model.r   r  rB   max_cache_len_cachezLThe internal cache must be of type `EncoderDecoderCache` for T5Gemma2 model.rE   )r   _prepare_cache_for_generationr   cache_implementationcopydeepcopyr   get_text_configr	  rJ   rH   r   r   r
   r   lenr   typer   r   r   r	   ra  r*  )rN   r!  r"  r#  r$  r%  r,  offload_cachecross_attn_configcross_attn_cache_kwargsrs   cross_attn_clsr   s               rR   r+  >T5Gemma2ForConditionalGeneration._prepare_cache_for_generationK  s    	-	
 &&%/0EE'!M'+<+Q+QQM !MM$++*E*Ed*E*ST ,0()9(:=N=`=`(`% ('#

 '**+<=&o/BCC w 
 ?--.27Q7Q7U7UVW7X7X!/"G"GHN,;GHY;Z[\;];c;cde;f'84B4]E\4]O1 /B "&++"="=d"="K&3 /L*+ 4""t{{'>dkk+>?? !opp&'89DK	 (?"ra   )r*  r  r  rI  r   )NNNNNNNNNNNNNr   ))rX   rY   rZ   r[   r  _tp_plan_pp_planrp   r   r   r  r  r  r  r  r   r   r   r  r   r   r   r   r  propertyr  r*  r+  r  r   r
   rK   r   r   r   r   r   r   r+  r`   r   r   s   @rR   r  r    s   !#Q #$;<H"o%6
$CDH~ /%1/(( M!LLM4:;M4NM	+	+M  M
 / /  .215370459:>8<266:26:>*.!%-.%M
 ##d*M
 ''$.	M

 ))D0M
 &&-M
 !++d2M
 !& 0 04 7M
 $..5M
 )4/M
 -t3M
 ((4/M
  %0047M
    4'!M
" $;#M
$ ell*%M
& +,'M
( 
u  	!O	3)M
  M
^I:+I: I: (	I:
 I: I: 
I: I:ra   r  c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	           SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )!T5Gemma2ForSequenceClassificationi  r   c                 "  > [         TU ]  U5        UR                  U l        UR                  R                  U l        [        U5      U l        [        USS5      n[        U R                  U R                  U5      U l	        U R                  5         g Nr~   g?r   r   
num_labelsru   r   r  rI  r   r/  scorer  rN   r   classifier_dropoutr   s      rR   r   *T5Gemma2ForSequenceClassification.__init__  sp      ++!>>55"6*
$V-FL/0@0@$//Sef
ra   c                 6    U R                   R                  5       $ r   r  r   s    rR   r  6T5Gemma2ForSequenceClassification.get_input_embeddings  r  ra   c                 :    U R                   R                  U5        g r   r
  r  s     rR   r  6T5Gemma2ForSequenceClassification.set_input_embeddings  r  ra   Nr?  r  r  r  r  r  r  r  r  r  ri  rO   r   c                 f   U	c  U
b#  [        SU R                  R                   S35      eUc  [        S5      eUc  U R	                  U5      nU R
                  " U4UUUUUUUU	U
SS.
UD6nUR                  nUR                  nUR                  nU R                  U5      nUR                  S   nXPR                  R                  :g  R                  UR                  [        R                   5      n[        R"                  " UR                  S   UR                  [        R                   S	9nUU-  R%                  S5      n[        R&                  " UUR                  S   S
-
  S9nU[        R"                  " UUR                  S9U4   nSnUb  U R)                  UUUU R                  S9n[+        UUUUS9$ )  
decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
    Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
    config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N8Passing input embeddings is currently not supported for .You have to specify input_idsF
r  r  r  r  r  r  r  r  r  r   r   r   )r   rC  rB   )maxr  )r  ri  pooled_logitsr   r  r  r   rP  )NotImplementedErrorr   rX   r   rq  rI  r  r  r  r@  r   r   r   rA  r   r   int32r  argmaxclampr  r   )rN   r?  r  r  r  r  r  r  r  r  r  ri  rO   r  r  r   rP  r  r$  non_pad_masktoken_indiceslast_non_pad_tokenrO  r  s                           rR   r   )T5Gemma2ForSequenceClassification.forward  s   4 $(=(I%J4>>KbKbJccde  <==$ $ J J9 U&*jj'
%)%/#9!5+'"7'
 '
 $5555//
-.__Q'
)[[-E-EEII&--Y^YdYde%6%<%<R%@^c^i^ij+l:BB2F"[[);ARAXAXY[A\_`A`au||Jv}}MOaab%%VFR_hlhshs%tD' '!	
 	
ra   r   rI  r?  r@  NNNNNNNNNNN)rX   rY   rZ   r[   rp   r   r  r  r   r   r   r*  r+  r  r   r   r   r   r   r`   r   r   s   @rR   r;  r;    s\   	~ 	1/  .215.204596:8<2626:>*.J
##d*J
 ''$.J
 t+	J

 &&-J
 !++d2J
 !&t 3J
 $..5J
 )4/J
 ((4/J
  %0047J
   4'J
 +,J
 
"J
  J
ra   r;  c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	           SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )T5Gemma2ForTokenClassificationi  r   c                 "  > [         TU ]  U5        UR                  U l        UR                  R                  U l        [        U5      U l        [        USS5      n[        U R                  U R                  U5      U l	        U R                  5         g r=  r>  rA  s      rR   r   'T5Gemma2ForTokenClassification.__init__  sp      ++!>>55"6*
$V-FL/0@0@$//Sef
ra   c                 6    U R                   R                  5       $ r   r  r   s    rR   r  3T5Gemma2ForTokenClassification.get_input_embeddings  r  ra   c                 :    U R                   R                  U5        g r   r
  r  s     rR   r  3T5Gemma2ForTokenClassification.set_input_embeddings
  r  ra   Nr?  r  r  r  r  r  r  r  r  r  ri  rO   r   c                    U	c  U
b#  [        SU R                  R                   S35      eUc  [        S5      eUc  U R	                  U5      nU R
                  " U4UUUUUUUU	U
SS.
UD6nUR                  nUR                  nUR                  nU R                  U5      nSnUb  U R                  UXR                  5      n[        UUUUS9$ )rI  NrJ  rK  rL  FrM  rP  )rQ  r   rX   r   rq  rI  r  r  r  r@  r  r   r   )rN   r?  r  r  r  r  r  r  r  r  r  ri  rO   r  r  r   rP  r  r  s                      rR   r   &T5Gemma2ForTokenClassification.forward  s   4 $(=(I%J4>>KbKbJccde  <==$ $ J J9 U&*jj'
%)%/#9!5+'"7'
 '
 $5555//
-.%%ffkkBD$'!	
 	
ra   rY  rZ  )rX   rY   rZ   r[   rp   r   r  r  r   r   r   r*  r+  r  r   r   r   r   r   r`   r   r   s   @rR   r\  r\    s\   
~ 
1/  .215.204596:8<2626:>*.@
##d*@
 ''$.@
 t+	@

 &&-@
 !++d2@
 !&t 3@
 $..5@
 )4/@
 ((4/@
  %0047@
   4'@
 +,@
 
@
  @
ra   r\  )
rp   r:   rc   rj   r  r  r  rH  r;  r\  )T)jr-  collections.abcr   typingr   r   r   torch.nnr   huggingface_hub.dataclassesr    r   rZ  cache_utilsr	   r
   r   configuration_utilsr   
generationr   r   r   masking_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   r   r   r   r   r   r   modeling_rope_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r    r!   utils.genericr"   utils.output_capturingr#   r$   autor&   gemma3.configuration_gemma3r'   r(   gemma3.modeling_gemma3r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   siglipr4   t5gemma.modeling_t5gemmar5   r6   r7   
get_loggerrX   r   r:   rc   rj   rp   r   r   r   r   r   r   r  r  r  r-  r/  r1  r5  rH  rz  r  r  r  r  r;  r\  __all__rE   ra   rR   <module>r}     s    $     . & I I 3 K K 6 B   7 F &  8 E  H    (  
		H	% 781)+; 1  912 78L   9 781,.> 1  912 78T% T  9Tn	m 		) 	\3 \O [Bo [B|  &	. 	,. ,^	] 		!: 	!"; !
 &C  * L!3 L! L!^b
1 b
Je- eP~
- ~
B Z
+ Z
 Z
zH:'> H:V ^
(? ^
 ^
B U
%< U
 U
pra   