
    Z j                         S SK r S SK Jr  SSKJr  S\ R                  S\S\ R                  4S jrS	\R                  S
\ R                  S\ R                  S\ R                  S\ R                  S-  S\4S jr	g)    N)nn   )PagedAttentionCachehidden_statesn_repreturnc                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
   N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         v/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/eager_paged.py	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTT    modulequerykeyvalueattention_maskscalingc                    UR                  SS 5      nUbg  UR                  UUU R                  US   US   S9u  p#UR                  SS5      R	                  S5      nUR                  SS5      R	                  S5      n[        U S5      (       a*  [        X R                  5      n[        X0R                  5      n[        U[        5      (       a  [        U SS5      nUS:X  d  Uc  S	OS
n	XI   n
OUn
[        R                  " XR                  SS5      5      U-  nU
b  X-   n[        U S5      (       a  U R                  R                  SSSS5      R                  UR                   S   SUR                   S   S5      n[        R"                  " X/SS9nXR%                  SSS9R&                  -
  n[(        R*                  R-                  US[        R.                  S9R1                  UR2                  5      nUSS S24   nOF[(        R*                  R-                  US[        R.                  S9R1                  UR2                  5      n[        R                  " X5      nUR                  SS5      R5                  5       nX4$ )Ncache
read_indexwrite_index)
key_statesvalue_states	layer_idxr   r   r   r
   num_key_value_groupssliding_windowfull_attentionsliding_attentionr      sinks)dimT)r*   keepdim)r*   dtype.)popupdater!   	transpose	unsqueezehasattrr   r"   
isinstancedictgetattrtorchmatmulr'   r   r   r   catmaxvaluesr   
functionalsoftmaxfloat32tor,   
contiguous)r   r   r   r   r   r   kwargsr   r#   
layer_typecausal_maskattn_weightsr'   attn_outputs                 r   eager_paged_attention_forwardrD      sE    )/

7D(AE\\&&l+}- " 

 mmAq!++A.1%//2 v-..889%!<!<= .$'' )91=)71)<@V%\o
$0$<<}}Q':;gEL#1 vw$$QAq188QU[[Y[_^`ayy,!6B?#&6&62t&6&L&S&SS}},,\r,WZZ[`[f[fg#C"H-}},,\r,WZZ[`[f[fg,,|3K''1-88:K$$r   )
r5   r   $generation.continuous_batching.cacher   Tensorintr   ModulefloatrD    r   r   <module>rK      s      F	UU\\ 	U# 	U%,, 	U8%II8%<<8% 
8% <<	8%
 LL4'8% 8%r   