
    Z j                        S SK Jr  S SKrS SKJr  SSKJrJr  SSKJrJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJr  SSKJr  SSKJr  SSKJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  SSK)J*r*  \RV                  " \,5      r- " S S\#5      r. " S S\5      r/ " S S\5      r0 " S S\5      r1 " S S\$5      r2 " S S\(5      r3 " S  S!\5      r4 " S" S#\!5      r5 " S$ S%\"5      r6 " S& S'\ 5      r7/ S(Qr8g))    )CallableN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )Gemma2RotaryEmbedding)
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                   (   ^  \ rS rSrU 4S jrSrU =r$ )Qwen2MLP%   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/qwen2/modular_qwen2.pyr)   Qwen2MLP.__init__&   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r/   r-   r.   )__name__
__module____qualname____firstlineno__r)   __static_attributes____classcell__r3   s   @r4   r#   r#   %   s    Y Yr6   r#   c                       \ rS rSrSrg)Qwen2RotaryEmbedding-    Nr7   r8   r9   r:   r;   rA   r6   r4   r?   r?   -       r6   r?   c                      ^  \ rS rSrS\S\4U 4S jjr SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\
S-  S
\\   S\	\R                  \R                  S-  4   4S jjrSrU =r$ )Qwen2Attention1   r2   	layer_idxc                   > [        US5      (       a  UR                  U   OS U l        [        TU ]  X5        [
        R                  " UR                  UR                  U R                  -  SS9U l
        [
        R                  " UR                  UR                  U R                  -  SS9U l        [
        R                  " UR                  UR                  U R                  -  SS9U l        [
        R                  " UR                  U R                  -  UR                  SS9U l        U R                  S:X  a  UR                  U l        g S U l        g )Nlayer_typesTr&   Fsliding_attention)hasattrrI   
layer_typer(   r)   r   r*   r+   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projsliding_window)r1   r2   rG   r3   s      r4   r)   Qwen2Attention.__init__2   s   ;B6=;Y;Y&,,Y7_c+ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7;J]7]f33cgr6   Nhidden_statesposition_embeddingsattention_maskpast_key_valueskwargsreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                   U R"                  U R$                  S.UD6u  pUR&                  " / UQSP76 R)                  5       nU R+                  U5      nX4$ )Nr    r   g        )dropoutscalingrT   )shaperN   rO   view	transposerQ   rR   r   updaterG   r   get_interfacer2   _attn_implementationr   trainingattention_dropoutr_   rT   reshape
contiguousrS   )r1   rV   rW   rX   rY   rZ   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                   r4   forwardQwen2Attention.forward;   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL..
%
 
%
! "));;;;FFHkk+.((r6   )rQ   rL   rS   rO   rT   rR   )N)r7   r8   r9   r:   r!   intr)   torchTensortupler   r   r
   rt   r;   r<   r=   s   @r4   rE   rE   1   s    h{ hs h )-')||') #5<<#=>') t+	')
 ') -.') 
u||U\\D00	1') ')r6   rE   c                       \ rS rSrSrg)Qwen2DecoderLayere   rA   NrB   rA   r6   r4   r{   r{   e   rC   r6   r{   c                       \ rS rSrSrg)Qwen2PreTrainedModeli   rA   NrB   rA   r6   r4   r~   r~   i   rC   r6   r~   c                     ^  \ rS rSrS\4U 4S jjr\\\      SS\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S	\	R                  S-  S
\S-  S\\   S\4S jj5       5       5       rSrU =r$ )
Qwen2Modelm   r2   c                 `   > [         TU ]  U5        SU R                  R                  ;   U l        g )NrJ   )r(   r)   r2   rI   has_sliding_layersr0   s     r4   r)   Qwen2Model.__init__n   s'     "59P9P"Pr6   N	input_idsrX   position_idsrY   inputs_embeds	use_cacherZ   r[   c           
         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U=n	[        5      (       d>  U R                  UUUUS.n
S[        S0 U
D60n	U R                  (       a  [        S0 U
D6U	S'   UnU R                  X5      n[!        U R"                  S U R                  R$                   5       H-  u  pU" U4XR                  R&                  U      UUUUS	.UD6nM/     U R)                  U5      n[+        UU(       a  US
9$ S S
9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)r2   r   r    )device)r2   r   rX   rY   r   full_attentionrJ   )rX   rW   r   rY   r   )last_hidden_staterY   rA   )
ValueErrorembed_tokensr   r2   get_seq_lengthrw   aranger`   r   	unsqueeze
isinstancedictr   r   r	   
rotary_emb	enumeratelayersnum_hidden_layersrI   normr   )r1   r   rX   r   rY   r   r   rZ   past_seen_tokenscausal_mask_mappingmask_kwargsrV   rW   idecoder_layers                  r4   rt   Qwen2Model.forwardr   s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L ?-FF ++!."0#2 ,K !"4"C{"C# &&;\;k_j;k#$78%"oomJ )$++6U8U8U*V WA)2;;3J3J13MN$7) /# M !X 		-0&+/8O
 	
>B
 	
r6   )r   )NNNNNN)r7   r8   r9   r:   r!   r)   r   r   r   rw   
LongTensorrx   r   FloatTensorboolr   r   r   rt   r;   r<   r=   s   @r4   r   r   m   s    Q{ Q   .2.204(,26!%<
##d*<
 t+<
 &&-	<

 <
 ((4/<
 $;<
 +,<
 
!<
    <
r6   r   c                       \ rS rSrSrg)Qwen2ForCausalLM   rA   NrB   rA   r6   r4   r   r      rC   r6   r   c                       \ rS rSrSrg)Qwen2ForSequenceClassification   rA   NrB   rA   r6   r4   r   r      rC   r6   r   c                       \ rS rSrSrg)Qwen2ForTokenClassification   rA   NrB   rA   r6   r4   r   r      rC   r6   r   c                       \ rS rSrSrg)Qwen2ForQuestionAnswering   rA   NrB   rA   r6   r4   r   r      rC   r6   r   )r~   r   r   Qwen2RMSNormr   r   r   )9collections.abcr   rw   r   cache_utilsr   r   masking_utilsr   r	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r!   
get_loggerr7   loggerr#   r?   rE   r{   r~   r   r   r   r   r   __all__rA   r6   r4   <module>r      s    $   . R B 6 & @ @ 7 5 :   4 , 
		H	%Yx Y	0 	1)^ 1)h	) 		/ 	D
 D
N	' 		%C 		"= 		 9 	r6   