
    Z j                        S SK Jr  S SKrSSKJr  SSKJr  SSKJrJ	r	J
r
  SSKJr  SSKJr  SS	KJrJr  S
SKJrJrJrJrJrJrJr  \R4                  " \5      rS\R:                  S\S\S\R:                  4S jr  " S S\5      r! " S S\5      r"\ " S S\5      5       r#\ " S S\5      5       r$\ " S S\5      5       r% " S S\
\#5      r& " S S\	\#5      r' " S S \\#5      r(/ S!Qr)g)"    )CallableN   )Cache)FlashAttentionKwargs)GenericForQuestionAnswering GenericForSequenceClassificationGenericForTokenClassification)ALL_ATTENTION_FUNCTIONS)Unpack)auto_docstringlogging   )MistralAttentionMistralDecoderLayerMistralForCausalLMMistralModelMistralPreTrainedModelapply_rotary_pos_embeager_attention_forwardpositions_idsbetamax_position_embeddingsreturnc           	          SU[         R                  " S[         R                  " X-  5      -   5      -  -   nUS S 2S S S 2S 4   $ )N   )torchlogfloor)r   r   r   scalings       ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/ministral3/modular_ministral3.pyget_llama_4_attn_scaler!      s?    $1u{{=3Z'[#[\\\G1dAt#$$    c                       \ rS rSr SS\R
                  S\\R
                  \R
                  4   S\R
                  S-  S\R
                  S\S-  S\\	   S	\\R
                  \R
                  S-  4   4S
 jjr
Srg)Ministral3Attention"   Nhidden_statesposition_embeddingsattention_maskposition_idspast_key_valueskwargsr   c           
         UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  U5      R                  U5      R	                  SS5      nUu  p[        XX5      u  pU	[        UU R                  R                  R                  S5      U R                  R                  R                  S5      5      R                  U	R                  5      -  n	Ub  UR                  XU R                  5      u  p[         R"                  " U R                  R$                  [&        5      nU" U U	U
UU4U R(                  (       d  SOU R*                  U R,                  [/        U R                  SS 5      S.UD6u  nnUR0                  " / UQSP76 R3                  5       nU R5                  U5      nUU4$ )	Nr   r   llama_4_scaling_beta original_max_position_embeddingsg        sliding_window)dropoutr   r0   )shapehead_dimq_projview	transposek_projv_projr   r!   configrope_parametersgettodtypeupdate	layer_idxr
   get_interface_attn_implementationr   trainingattention_dropoutr   getattrreshape
contiguouso_proj)selfr&   r'   r(   r)   r*   r+   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                    r    forwardMinistral3Attention.forward#   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ #&<KK''++,BCKK''++,NO'
 "\
 	! &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ "));;;;FFHkk+.L((r"    )N)__name__
__module____qualname____firstlineno__r   Tensortupler   r   r   rS   __static_attributes__rU   r"   r    r$   r$   "   s     )--)||-) #5<<#=>-) t+	-)
 ll-) -) -.-) 
u||U\\D00	1-) -)r"   r$   c                       \ rS rSrSrg)Ministral3DecoderLayerS   rU   NrV   rW   rX   rY   r\   rU   r"   r    r^   r^   S       r"   r^   c                       \ rS rSrSrg)Ministral3PreTrainedModelW   rU   Nr`   rU   r"   r    rc   rc   W       r"   rc   c                       \ rS rSrSrg)Ministral3Model\   rU   Nr`   rU   r"   r    rg   rg   \   re   r"   rg   c                       \ rS rSrSrg)Ministral3ForCausalLMa   rU   Nr`   rU   r"   r    rj   rj   a   re   r"   rj   c                       \ rS rSrSrg) Ministral3ForTokenClassificationf   rU   Nr`   rU   r"   r    rm   rm   f   ra   r"   rm   c                       \ rS rSrSrg)#Ministral3ForSequenceClassificationj   rU   Nr`   rU   r"   r    rp   rp   j   ra   r"   rp   c                       \ rS rSrSrg)Ministral3ForQuestionAnsweringn   rU   Nr`   rU   r"   r    rs   rs   n   ra   r"   rs   )rj   rs   rg   rc   rp   rm   )*collections.abcr   r   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr   r   r	   modeling_utilsr
   processing_utilsr   utilsr   r   mistral.modeling_mistralr   r   r   r   r   r   r   
get_loggerrV   loggerrZ   floatintr!   r$   r^   rc   rg   rj   rm   rp   rs   __all__rU   r"   r    <module>r      s   $    B 
 6 & ,   
		H	%%%,, %e %^a %fkfrfr %
.)* .)b	0 	 	 6 	 	 	l 	 	 	. 	 		'DF_ 		*JLe 		%@B[ 	r"   