
    Z j,4                     P   S SK Jr  S SKrS SKJs  Jr  S SKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%  SSK&J'r'  \" 5       (       a	  S SK(J)r)J*r*  OSu  r)r*\)\*4r+\," \+5      r-\R\                  " \/5      r0 " S S\#5      r1 " S S\5      r2 " S S\Rf                  5      r4 " S S\5      r5 " S S\Rf                  5      r6 " S S \5      r7 " S! S"\"5      r8 " S# S$\!5      r9 " S% S&\ 5      r:/ S'Qr;g)(    )CallableN)nn   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)is_causal_conv1d_availableis_torchdynamo_compiling   )apply_mask_to_padding_states)Gemma2RotaryEmbedding)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_updateNNc                       \ rS rSrSrg)Lfm2RMSNorm7    N__name__
__module____qualname____firstlineno____static_attributes__r#       v/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/lfm2/modular_lfm2.pyr!   r!   7       r*   r!   c                       \ rS rSrSrg)Lfm2RotaryEmbedding;   r#   Nr$   r#   r*   r+   r.   r.   ;   r,   r*   r.   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Lfm2MLP?   configc                   > [         TU ]  5         UR                  nUR                  (       aa  [	        SU-  S-  5      nUR
                  bC  [	        UR
                  U-  5      nUR                  X!R                  -   S-
  UR                  -  -  n[        R                  " UR                  USS9U l
        [        R                  " UR                  USS9U l        [        R                  " X!R                  SS9U l        g )Nr   r   r   Fbias)super__init__intermediate_sizeblock_auto_adjust_ff_dimintblock_ffn_dim_multiplierblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr3   r9   	__class__s      r+   r8   Lfm2MLP.__init__@   s    "44** #A(9$9A$= >..:$'(G(GJ[([$\!$*$<$<&)A)AAAE&JbJbb%! ))F..0AN))F..0AN))-/A/ANr*   c                     U R                  [        R                  " U R                  U5      5      U R	                  U5      -  5      $ N)rB   Fsilur@   rA   )rC   xs     r+   forwardLfm2MLP.forwardO   s/    wwqvvdggaj)DGGAJ677r*   )r@   rB   rA   )	r%   r&   r'   r(   r   r8   rK   r)   __classcell__rD   s   @r+   r1   r1   ?   s    Oz O8 8r*   r1   c                      ^  \ rS rSrS\S\4U 4S jjr SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\
S-  S
\	\R                  \R                  S-  4   4
S jjrSrU =r$ )Lfm2AttentionS   r3   	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        [        U R                  UR                  S9U l        [        U R                  UR                  S9U l        U ?U ?g )NFr5   eps)r7   r8   r   r>   r?   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projout_projr!   norm_epsq_layernormk_layernormo_projattention_dropoutrC   r3   rR   rD   s      r+   r8   Lfm2Attention.__init__T   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejk		&"<"<t}}"LfN`N`glm&t}}&//J&t}}&//JK"r*   Nhidden_statesposition_embeddingsattention_maskpast_key_valuesreturnc                     UR                   S S n/ UQSPU R                  P7nU R                  U R                  U5      R                  " U6 5      R                  SS5      nU R                  U R                  U5      R                  " U6 5      R                  SS5      n	U R                  U5      R                  " U6 R                  SS5      n
Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [         5      nU" U UU	U
U4SU R"                  S.UD6u  pUR$                  " / UQSP76 R'                  5       nU R)                  U5      nUU4$ )Nr   r   g        )dropoutscaling)shaperW   r^   rX   view	transposer_   rZ   r[   r   updaterR   r   get_interfacer3   _attn_implementationr   rl   reshape
contiguousr\   )rC   rd   re   rf   rg   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightsoutputs                    r+   rK   Lfm2Attention.forward_   s    $))#2.88b8$--8''M(B(G(G(VWaabcefg%%dkk-&@&E&E|&TU__`acde
{{=166EOOPQSTU&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8	%
 LL	%
 	%
! "));;;;FFH{+|##r*   )r_   rZ   r\   r^   rX   r[   rG   )r%   r&   r'   r(   r   r;   r8   torchTensortupler   rK   r)   rM   rN   s   @r+   rP   rP   S   s    	#z 	#c 	#  )-%$||%$ #5<<#=>%$ t+	%$
 %$ 
u||U\\D00	1%$ %$r*   rP   c                   *  ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\	S-  S\R                  S-  4S	 jjr
  SS\R                  S\	S-  S\R                  S-  4S
 jjr  SS\R                  S\	S-  S\R                  S-  4S jjrSrU =r$ )Lfm2ShortConv   r3   rR   c           	      "  > [         TU ]  5         Xl        X l        UR                  U l        UR                  U l        [        R                  " UR                  UR                  U R
                  UR                  U R                  U R
                  S-
  S9U l        [        R                  " UR                  SUR                  -  U R                  S9U l        [        R                  " UR                  UR                  U R                  S9U l        g )Nr   )in_channelsout_channelskernel_sizegroupsr6   paddingr   r5   )r7   r8   r3   rR   conv_L_cacheL_cache	conv_biasr6   r   Conv1dr?   convr>   in_projr\   rb   s      r+   r8   Lfm2ShortConv.__init__   s    
 	"**$$	II**++%%LL1$
	 yy!3!3Q9K9K5KRVR[R[\		&"4"4f6H6HtyyYr*   NrJ   rg   rf   c                    [        X5      nU R                  U5      R                  SS5      nUR                  SSS9u  pVnXQ-  nU R                  R
                  R                  U R                  R
                  R                  S5      U R                  R
                  R                  S5      5      nUb  UR                  U R                  5      (       ae  [        UR                  S5      UR                  U R                     R                  UU R                  R                  S 5      n	U	R                  S5      n	OxUbV  [         R"                  R%                  XpR&                  UR(                  S   -
  S45      n
UR+                  XR                  5      n
[-        XxU R                  R                  S S9n	Xi-  nU R/                  UR                  SS5      R1                  5       5      nU$ )Nrj   r   dimr   r   )
activation)r   r   ro   chunkr   weightrn   sizehas_previous_staterR   r   squeezelayersconv_statesr6   	unsqueezer   
functionalpadr   rm   update_conv_stater   r\   rt   )rC   rJ   rg   rf   BCxBCBxconv_weightsconv_out
conv_stateys               r+   cuda_kernels_forward"Lfm2ShortConv.cuda_kernels_forward   s    );ll1o''B/))A2)&aUyy'',,TYY-=-=-B-B1-EtyyGWGWG\G\]^G_`&?+M+Mdnn+]+]+

2&&t~~6BB		H  ))"-H*]]..rLL288B<4OQR3ST
,>>z>>Z
'$))..UYZHLMM!++b"-88:;r*   c                    UR                   S   n[        X5      nU R                  U5      R                  SS5      nUR	                  SSS9u  pgnXa-  nUb  UR                  U R                  5      (       a  UR                  XR                  5      n	[        R                  " U	R                  UR                  5      U R                  R                  S S 2SS S 24   -  SS9n
U R                  (       a  XR                  R                  -  n
U
R                  S5      n
OqUbV  [         R"                  R%                  XR&                  UR                   S   -
  S45      n	UR                  XR                  5      n	U R                  U5      SS U24   n
Xz-  nUR                  SS5      R)                  5       nU R+                  U5      nU$ )Nr   rj   r   r   r   r   .)rm   r   r   ro   r   r   rR   r   r   sumtodevicer   r   r6   r   r   r   r   r   rt   r\   )rC   rJ   rg   rf   seqlenr   r   r   r   r   r   r   s               r+   slow_forwardLfm2ShortConv.slow_forward   ss    (;ll1o''B/))A2)&aU&?+M+Mdnn+]+](::2~~NJyyryy!9DII<L<LQPQSTW<U!U[]^HyyIINN*))"-H*]]..rLL288B<4OQR3ST
,>>z>>Z
yy}S'6'\2HLKKB**,MM!r*   rd   c                     [         (       a;  SUR                  R                  ;   a!  [        5       (       d  U R	                  XU5      $ U R                  XU5      $ )Ncuda)is_fast_path_availabler   typer   r   r   )rC   rd   rg   rf   s       r+   rK   Lfm2ShortConv.forward   sL     "!f0D0D0I0I&IRjRlRl,,]^\\  PPr*   )r   r6   r3   r   r   rR   r\   r   )r%   r&   r'   r(   r   r;   r8   r   r   r   r   r   rK   r)   rM   rN   s   @r+   r   r      s    ZZ Z2 )-.2	<<  t+	H )-.2	<<  t+	H )-.2	Q||Q Q t+	Q Qr*   r   c                      ^  \ rS rSrS\S\4U 4S jjr    SS\R                  S\	\R                  \R                  4   S-  S\R                  S-  S	\R                  S-  S
\S-  S\R                  4S jjrSrU =r$ )Lfm2DecoderLayer   r3   rR   c                 `  > [         TU ]  5         UR                  U   S:H  U l        U R                  (       a  [	        X5      U l        O[        X5      U l        [        U5      U l	        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        g )Nfull_attentionrT   )r7   r8   layer_typesis_attention_layerrP   	self_attnr   r   r1   feed_forwardr!   r?   r]   operator_normffn_normrb   s      r+   r8   Lfm2DecoderLayer.__init__   s    "("4"4Y"?CS"S""*6=DN%f8DI#FO(););Q#F$6$6FOOLr*   Nrd   re   rf   position_idsrg   rh   c           	         UnU R                   (       a*  U R                  " SU R                  U5      UUUUS.UD6u  pO U R                  U R                  U5      UUS9nX-   nXR	                  U R                  U5      5      -   nU$ )N)rd   re   rf   r   rg   )rd   rg   rf   r#   )r   r   r   r   r   r   )	rC   rd   re   rf   r   rg   ru   residual_s	            r+   rK   Lfm2DecoderLayer.forward   s     !""#~~  "00?$7-) /   M1 !II"00? /- & M
 &0%(9(9$--:V(WWr*   )r   r   r   r   r   r   )NNNN)r%   r&   r'   r(   r   r;   r8   r   r   r   
LongTensorr   rK   r)   rM   rN   s   @r+   r   r      s    
Mz 
Mc 
M IM.204(,|| #5<<#=>E t+	
 &&-  
 r*   r   c                       \ rS rSrSrSrg)Lfm2PreTrainedModeli  Fr#   N)r%   r&   r'   r(   _can_compile_fullgraphr)   r#   r*   r+   r   r     s    "r*   r   c                      ^  \ rS rSrS\4U 4S jjr      SS\R                  S-  S\R                  S-  S\R                  S-  S\	S-  S	\R                  S-  S
\S-  S\\   S\4S jjrSrU =r$ )	Lfm2Modeli  r3   c                 n   > [         TU ]  U5        [        UR                  UR                  S9U l        U ?g )NrT   )r7   r8   r!   r?   r]   embedding_normnorm)rC   r3   rD   s     r+   r8   Lfm2Model.__init__  s.     )&*<*<&//RIr*   N	input_idsrf   r   rg   inputs_embeds	use_cacheru   rh   c           	         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U R                  UUUUS9n	UR                  S   S:w  a  UOS n
UnU R                  XS9n[        U R                  S U R                  R                   5       H4  u  pU R                  R                  U   S:X  a  U	OU
nU" U4UUUUS	.UD6nM6     U R!                  U5      n[#        UUS
9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)r3   r   r   )r   )r3   r   rf   rg   r   )r   r   )rf   re   r   rg   )last_hidden_staterg   )
ValueErrorembed_tokensr   r3   get_seq_lengthr   arangerm   r   r   r   
rotary_emb	enumerater   num_hidden_layersr   r   r
   )rC   r   rf   r   rg   r   r   ru   past_seen_tokenscausal_masklinear_attentionrd   re   idecoder_layer
layer_masks                   r+   rK   Lfm2Model.forward!  s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 .;-@-@-Cq-H>d%"oomoW !*$++6U8U8U*V WA(,(?(?(BFV(V\lJ))$7) / M !X ++M:&++
 	
r*   )r   )NNNNNN)r%   r&   r'   r(   r   r8   r   r   r   r   FloatTensorboolr   r   r
   rK   r)   rM   rN   s   @r+   r   r     s    z  .2.204(,26!%6
##d*6
 t+6
 &&-	6

 6
 ((4/6
 $;6
 +,6
 
!6
 6
r*   r   c                       \ rS rSrSrg)Lfm2ForCausalLMiZ  r#   Nr$   r#   r*   r+   r   r   Z  r,   r*   r   )r   r   r   )<collections.abcr   r   torch.nn.functionalr   r   rH   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   utils.import_utilsr   r   bamba.modeling_bambar   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   configuration_lfm2r   causal_conv1dr   r   kernel_modulesallr   
get_loggerr%   loggerr!   r.   Moduler1   rP   r   r   r   r   r   __all__r#   r*   r+   <module>r     s   %     . / 9 7 5 & 0 V ? :   + DD-7** #$89^,  
		H	%	, 		/ 	8bii 8(1$N 1$haQBII aQH)1 )X#. #<

 <
~	& 	 Br*   