
    Z ju	                     *   S SK r SSKJr  S\ R                  S\S\ R                  4S jr  SS\ R                  R                  S	\ R                  S
\ R                  S\ R                  S\ R                  S-  S\S\S-  S\	\ R                  S4   4S jjr
g)    N   )PagedAttentionCachehidden_statesn_repreturnc                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
   N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         u/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/sdpa_paged.py	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTT    modulequerykeyvalueattention_maskdropoutscalingc           
      v   UR                  SS 5      nUbg  UR                  UUU R                  US   US   S9u  p#UR                  SS5      R	                  S5      nUR                  SS5      R	                  S5      n[        U S5      (       a*  [        X R                  5      n[        X0R                  5      nUn	UR                  5       nUR                  5       nUR                  5       n[        R                  R                  R                  UUUU	UUSS	9n
U
R                  SS
5      R                  5       n
U
S 4$ )Ncache
read_indexwrite_index)
key_statesvalue_states	layer_idxr   r   r   r	   num_key_value_groupsF)	attn_mask	dropout_pscale	is_causalr   )popupdater!   	transpose	unsqueezehasattrr   r"   
contiguoustorchnn
functionalscaled_dot_product_attention)r   r   r   r   r   r   r   kwargsr   causal_maskattn_outputs              r   sdpa_attention_paged_forwardr4      sD    )/

7D(AE\\&&l+}- " 

 mmAq!++A.1%//2 v-..889%!<!<= !K E
..
CE((%%BB C 	K ''1-88:Kr   )g        N)r-   $generation.continuous_batching.cacher   Tensorintr   r.   Modulefloattupler4    r   r   <module>r<      s     F	UU\\ 	U# 	U%,, 	U$  0HHOO0<<0 
0 <<	0
 LL4'0 0 T\0 5<<0r   