
    Z jy                        S SK r SSKJrJr  SSKJr  \R                  " \5      r\" 5       r	S\ R                  S\ R                  R                  S\ R                  4S jr      SS\ R                  R                  S\ R                  S	\ R                  S
\ R                  S\ R                  S-  S\S\S-  S\S-  S\S-  S\S-  S\ R                  S-  S\\ R                  S4   4S jjrg)    N   )_flash_attention_forward!flash_attn_supports_top_left_mask)loggingquerymodulereturnc                 l   U R                   [        R                  :X  a  [        R                  " S5      (       a  [        R                  " S5      $ [        UR                  S5      (       a  UR                  R                   $ [        S UR                  5        5       5      R                  R                   $ g)ziIf the query is in float32, return a target dtype compatible with flash attention. Return None otherwise.cuda_is_quantizedc              3   |   #    U  H2  n[        U[        R                  R                  5      (       d  M.  Uv   M4     g 7f)N)
isinstancetorchnnLinear).0layers     z/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/flash_attention.py	<genexpr>#get_target_dtype.<locals>.<genexpr>   s*     b+;%z%QVQYQYQ`Q`?a+;s   -<	<N)
dtyper   float32is_autocast_enabledget_autocast_dtypehasattrconfignextmodulesweight)r   r   s     r   get_target_dtyper       s|    {{emm#$$V,,++F33V]]O44==&&&b6>>+;bbiiooo    keyvalueattention_maskdropoutscalingsliding_windowsoftcap	is_causals_auxc                 X   UR                  SS5      (       a  [        R                  S5        UR                  S   n[	        S UR                   5       5      (       a  [        S5      eUR                  SS5      nUR                  SS5      nUR                  SS5      n[        X5      nU	b  U	OU R                  n	[        UUUU4UU	UUUU[        UU R                  R                  [        U S5      (       a  U R                  OS U
b  U
R                  UR                   5      OS S	.UD6nUS 4$ )
Noutput_attentionsFzFlash Attention does not support `output_attentions=True`. Please set your attention to `eager` if you want any of these features.r   c              3   *   #    U  H	  oS :H  v   M     g7f)r   N )r   dims     r   r   *flash_attention_forward.<locals>.<genexpr>0   s     
+{!8{s   zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   	layer_idx)query_lengthr)   r%   softmax_scaler'   r(   use_top_left_masktarget_dtypeattn_implementationr2   r*   )getloggerwarning_onceshapeany
ValueError	transposer    r)   r   _use_top_left_maskr   _attn_implementationr   r2   tor   )r   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   kwargsseq_lenr6   attn_outputs                  r   flash_attention_forwardrE      s@    zz%u--W	
 kk!nG

+u{{
+++B
 	
 OOAq!E
--1
COOAq!E $E2L '2	8H8HI*	
 %,!"MM>>&-fk&B&B&""   HHU[[!%( )K. r!   )g        NNNNN)r   modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r9   r?   Tensorr   Moduler   r    floatintbooltuplerE   r.   r!   r   <module>rP      s1    h  
		H	%68 
ELL 
%((// 
ekk 
&  !% !!%CHHOOC<<C 
C <<	C
 LL4'C C T\C $JC T\C d{C <<$C 5<<Cr!   