
    N j                    f   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	J
r
  S SKJr  S SKrS SKJrJr  \
(       a  S SKJrJr  \" S5      rSS	 jq    S                   SS
 jjrSS jrSS jrSS jrSS jrSS jr/ qS\S'   SS jr   S             SS jjrg)    )annotationsN)AbstractContextManager)AnyTYPE_CHECKING)TypeVar)profileProfilerActivity)CallableSequence_Rc                     g )N r       q/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/_functorch/benchmark_utils.pysynchronizer      s    r   c	                b   Uc  S/nUS/:w  a=  [         R                  R                  5       (       a  [         R                  R                  qUc  0 nUc  0 nU   [         R                  " S5        [        S5       H  n	U " U40 UD6  [        5         M     [         R                  " S5        [        R                  " 5       n
[        U5       H  n	U " U40 UD6  [        5         M     [        R                  " 5       nSSS5        WW
-
  n[        SSU0UD6 nU   [        5         [         R                  " S5        [        U5       H  n	U " U40 UD6  [        5         M     SSS5        SSS5        WR                  U5        U$ ! , (       d  f       N= f! , (       d  f       N:= f! , (       d  f       NC= f)a  
Output the chrome trace of running f(input_, **kwargs_for_f) with [optimize_ctx]
[num_runs] times to [trace_filename].

[activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
Return total runtime without the profiler

Outputs to trace_filename
Ncudacpui9     
activitiesr   )
torchr   is_availabler   manual_seedrangetimeperf_counterr   export_chrome_trace)finput_trace_filenameoptimize_ctxr   num_runsdeviceskwargs_for_fkwargs_for_profiler_t0t1timingprofs                 r   dump_chrome_tracer+      sd   * ( 5'ejj5577jj,," 	$qAf%%M  	$ xAf%%M !   
 "WF		>J	>*=	>$Md#8_&)L) %  
? 	^,M- 
 \ 
?	>s2   B!E>F AFF >
F
F	F  
F.c                    [        U 5       n[        R                  " U5      nS S S 5        WS   nU$ ! , (       d  f       N= f)NtraceEvents)openjsonload)filenamer   dataeventss       r   get_chrome_trace_eventsr4   U   s4    	h1yy| 
- FM 
s	   2
A c                h    SU ;   =(       a'    U S   [         ;   =(       a    SU ;   =(       a    U S   S:H  $ )NpidphX)gpu_pidsevents    r   is_gpu_compute_eventr<   \   s@     	 	%LH$	EM	 $K3	r   c                    / nU  H&  n[        U5      (       d  M  UR                  U5        M(     [        U[        R                  " S5      S9$ )Nts)key)r<   appendsortedoperator
itemgetter)r3   sorted_gpu_eventsr;   s      r   get_sorted_gpu_eventsrE   f   sI    .0#E**  '  #)<)<T)BCCr   c                    [        U 5      S:X  a  gU S   nUS   US   -   nUS   nU SS   H9  n[        US   U5      nUS   US   -   nU[        XT-
  S5      -   n[        X%5      nM;     U$ )Nr   r>   dur   )lenmax)rD   r;   current_end_timetotal_duration
start_timeend_times         r   get_durationrO   o   s    
"a ET{U5\15\N"12&t&67
;u-'#h.CQ*GG/:	 '
 r   c                |    SS jn[        U 5      n/ nU H#  nU" U5      (       d  M  UR                  U5        M%     U$ )Nc                    SU ;   =(       a5    SU S   ;   =(       d&    SU S   ;   =(       d    SU S   ;   =(       d    SU S   ;   $ )Nnamegemmconvcutlasswgradr   r:   s    r   is_mm_conv_event7get_sorted_gpu_mm_conv_events.<locals>.is_mm_conv_event~   sT     
eFm# (v&(E&M)( %-'		
r   r;   zdict[str, Any]returnbool)rE   r@   )r3   rW   
gpu_eventssorted_eventsr;   s        r   get_sorted_gpu_mm_conv_eventsr^   }   sF    
 'v.J*,M&&U#  r   z	list[Any]r9   c                   [        U 5      n/ qU H<  nSU;  a  M  US   S:X  d  M  SUS   S   ;   d  M$  [        R                  US   5        M>     US-  n[        U5      n[	        U5      U-  n[        U5      n[	        U5      U-  nXW4$ )a  
Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
and percent of times spent on matmul and convolution

Args:
    filename(str): Name of chrome traces file produced by pytorch profiler

    total_length(float): total length of the process without profiler in second

Return:
    tuple: (GPU Utilization, percent of time spent on matmul and convolution)
rR   process_labelsGPUargslabelsr6   g    .A)r4   r9   r@   rE   rO   r^   )r1   total_lengthr3   r;   rD   utilizationsorted_gpu_mm_conv_eventsmm_conv_utilizations           r   compute_utilizationrh      s     %X.F H=,,%-:Q1QOOE%L)	   #%L-f501L@K =f E&'@ALP++r   c           
     h   [         R                  R                  U5      nU(       d$  [         R                  " U5        [	        SU-   5        Uc  [
        R                  " 5       n[         R                  R                  X$S-   5      n[        U UUU[        R                  /US/S9n[        Xx5      u  pX4$ )au  
Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
running f(input_, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
It will produce a chrome trace file in trace_folder/trace_file_name.json

Example:

```
def f(a):
    return a.sum()


a = torch.rand(2**20, device="cuda")
utilization, mm_conv_utilization = benchmark_utilization(
    f, a, "tmp", trace_file_name="tmp_chrome_trace"
)
```

Args:
    f: function to benchmark

    input_: input to :attr:`f`

    trace_folder: name of the folder to store the chrome trace

    optimize_ctx: the context in which f will run

    trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"

    num_runs: number of times to run f, excluding the warm-up runs, default to 1.

Return:
    tuple: (GPU Utilization, percent of time spent on matmul and convolution)

zcreate folder z.jsonr   )r"   r#   )ospathexistsmakedirsprint
contextlibnullcontextjoinr+   r	   CUDArh   )r   r   trace_folderr!   trace_file_namer"   isExistchrome_trace_file_namerd   re   rg   s              r   benchmark_utilizationrw      s    V ggnn\*G
L!-.!--/WW\\,'8QR$				L (;($K ++r   )rZ   None)rH   NNN)r   Callable[[tuple[Any, ...]], _R]r   tuple[Any, ...]r    strr!   zAbstractContextManager[Any]r   zSequence[ProfilerActivity]r"   intr#   zlist[str] | Noner$   dict[str, Any] | Noner%   r}   rZ   float)r1   r{   rZ   list[dict[str, Any]]rY   )r3   r   rZ   r   )rD   r   rZ   r|   )r1   r{   rd   r~   rZ   tuple[float, float])Ntmp_chrome_tracerH   )r   ry   r   rz   rs   r{   r!   z"AbstractContextManager[Any] | Nonert   r{   r"   r|   rZ   r   ) 
__future__r   ro   r/   rB   rj   r   r   typingr   r   typing_extensionsr   r   torch.profilerr   r	   collections.abcr
   r   r   r   r+   r4   r<   rE   rO   r^   r9   __annotations__rh   rw   r   r   r   <module>r      s8   "    	  - % %  4 2 T]	  $*.157&77 7 .	7
 +7 7 7 (7 /7 7tD$ ) ,L 8<-A,&A,A, A, 5	A,
 A, A, A,r   