
    Z jC                        S SK r S SKJr  S SKJrJr  S SKJr  S SKrSSK	J
r
Jr  SSKJr  SSKJr  \
" 5       (       a  S SKrS	r\R$                  " S
5      r\R(                  (       aJ  \R*                  " 5       r\R/                  \R0                  " S5      5        \R3                  \5        S\l        S\\R6                  \\\4   4S jr " S S\5      r\ " S S5      5       r\ " S S5      5       r  " S S5      r!g)    N)deepcopy)	dataclassfield)IntEnum   )is_psutil_availableis_torch_xpu_available)logging)tracedContinuousBatchingLoggerz4%(asctime)s - %(name)s - %(levelname)s - %(message)sFreturnc                     [         R                  R                  5       (       a  [         R                  " S5      n [         R                  R	                  5         [         R                  R                  5         [         R                  R                  U 5      u  p[         R                  R                  U 5      nX!-
  nGO[        5       (       a  [         R                  " S5      n [         R                  R	                  5         [         R                  R                  5         [         R                  R                  U 5      R                  n[         R                  R                  U 5      n[         R                  R                  U 5      nGOB[         R                  R                  R                  5       (       a  [         R                  R                  R                  5       (       aY  [         R                  " S5      n [         R                  R!                  5       nU[#        [         R                  S5      " 5       -
  nSnO[         R                  " S5      n [%        5       (       aO  [&        R(                  " 5       R*                  n[&        R,                  " 5       R/                  5       R0                  nUnO[2        R5                  S5        SnSnSnXX44$ )Ncudaxpumpsrecommended_max_memoryr   cpuzCannot get memory breakdown on CPU without psutil: returning 0 for all memory values. Please install psutil to get an actual memory breakdown.)torchr   is_availabledeviceempty_cachesynchronizemem_get_infomemory_reservedr	   r   get_device_propertiestotal_memorymemory_allocatedbackendsr   is_builtdriver_allocated_memorygetattrr   psutilvirtual_memorytotalProcessmemory_inforssloggererror)r   free_memoryr   reserved_memoryallocated_memorys        څ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr/   ,   s   zz  f%

 

  %*JJ$;$;F$C!**44V<'5		!	!e$				yy66v>KK))33F; 9955f=				(	(	*	*u~~/A/A/J/J/L/Le$yy88:''%))=U*V*XXe$  !00288L%~~/;;=AA.OLL< LO BB    c                   ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)
RequestStatusU   z5Status of a generation request through its lifecycle.r         r       N)__name__
__module____qualname____firstlineno____doc__PENDING
PREFILLINGDECODINGFINISHEDFAILED__static_attributes__r7   r0   r.   r2   r2   U   s    ?GJHHFr0   r2   c                      \ rS rSr% Sr\\S'   \" \S9r	\\
   \S'   \" \S9r\\
   \S'   \" \S9r\\   \S'   Sr\S-  \S	'   \R                   r\\S
'   \" \R&                  S9r\\S'   Sr\\\4   \S'   Sr\\   S-  \S'   S\4S jrSrg)GenerationOutput_   a  Tracks the output of a generation request.

Attributes:
    request_id (str): The ID of the generation request.
    prompt_ids (list[int]): The IDs of the prompt tokens.
    generated_tokens (list[int]): The generated tokens.
    logprobs (list[float]): The log probabilities of the generated tokens.
    error (Optional[str]): Any error message associated with the request. When None, the request was successful.
    status (RequestStatus): The status of the request.
    created_time (float): The time the request was created.
    lifespan (tuple[float, float]): The time the request was no longer pending and the time the request finished.

request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNr*   statuscreated_timer   r   lifespan
timestampsr   c                 <    U R                   [        R                  :H  $ N)rL   r2   r@   selfs    r.   is_finishedGenerationOutput.is_finishedx   s    {{m4444r0   r7   )r8   r9   r:   r;   r<   str__annotations__r   listrI   intrJ   rK   floatr*   r2   r=   rL   timeperf_counterrM   rO   tuplerP   boolrU   rB   r7   r0   r.   rD   rD   _   s     O!$7JS	7"'"=d3i=!$7Hd5k7E3:)11FM10A0ABL%B$,HeE5L!,%)JUd")5T 5r0   rD   c                      \ rS rSr% Sr\\S'   \\   \S'   Sr	\
\S'   Sr\
\S'   Sr\S	-  \S
'   S	r\\\   -  S	-  \S'   Sr\\S'   \" \S9r\\S'   \" \S9r\\   \S'   \" \S9r\\   \S'   \" \S9r\\   \S'   Sr\\S'   Sr\\S'   \R2                  r\\S'   \" \S9r\\   \S'   \" \R<                  S9r\\S'   S	r \S	-  \S'   Sr!\"\\4   \S'   \" \S9r#\\   \S'   Sr$\\S'   Sr%\\S'   \" \S9r&\\   \S'   Sr'\
\S '   S! r(\)S"\4S# j5       r*\*RV                  S$\4S% j5       r*\)S"\\   S	-  4S& j5       r,S' r-S"\4S( jr.S"\4S) jr/\0S*\S+\S	-  S"\
4S, j5       r1S- r2S. r3S/\S"S 4S0 jr4S"\4S1 jr5S4S2 jr6S3r7g	)5RequestState|   aw  Tracks the state of a generation request through its lifecycle.

Attributes:
    request_id (str): The ID of the generation request.
    initial_tokens (list[int]): The initial prompt tokens.
    num_children (int): The number of children requests
    full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
    prompt_ids (list[int] | None): The tokens IDs currently being processed.
    remaining_prompt_ids (list[int]): The initial tokens IDs remaining to be processed.
    static_outputs (list[int]): The generated tokens.
    allocated_blocks (int): The number of blocks allocated to the request.
    position_offset (int): The current position in the sequence for position_ids.
    status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                            SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
    max_new_tokens (int | None): The maximum number of new tokens to generate.
    eos_token_id (None | int | list[int]): The ID(s) of the end-of-sequence tokens. Only used in post-init.
    _eos_token_ids (set[int]): The IDs of the end-of-sequence tokens, formatted as a set.
    streaming (bool): Whether to stream tokens as they're generated
    created_time (float): The time the request was created.
    error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
rF   initial_tokensF	streamingrecord_timestamps   Nmax_new_tokenseos_token_idr   num_childrenrG   logit_processor_kwargstokens_to_processrJ   rK   position_offsetallocated_blocks_status_eos_token_idsrM   r*   rN   rO   _timestamps_true_initial_tokens_new_tokens_limitremaining_prefill_tokensis_cpu_offloadedc                    U R                   c  SOU R                   U l        U R                  S S  U l        U R                  c  g [        U R                  [        5      (       a7  U R                  S:  a&  U R                  R                  U R                  5        g g U R                   H&  nUS:  d  M  U R                  R                  U5        M(     g )Nrr   r   )	rg   rs   rc   rt   rh   
isinstancerZ   ro   add)rT   token_ids     r.   __post_init__RequestState.__post_init__   s    /3/B/B/JPTPcPc(,(;(;A(>%$))3//  A%##''(9(9: & !--q=''++H5 .r0   r   c                     U R                   $ rR   )rn   rS   s    r.   rL   RequestState.status   s    ||r0   valuec                     U R                   [        R                  :X  a  [        R                  " 5       S4U l        OMU[        R                  :X  a9  U R
                  S   [        R                  " 5       4U l        U R                  5         Xl         g )Nr   r   )rn   r2   r=   r\   r]   rO   r@   log_end_of_request)rT   r~   s     r.   rL   r}      sf    <<=000!..0"5DMm,,,!]]1-t/@/@/BCDM##%r0   c                 @    U R                   (       a  U R                  $ S $ rR   )re   rp   rS   s    r.   rP   RequestState.timestamps   s    #'#9#9tCtCr0   c                 "   [        U R                  5      nU R                  5       nU R                  S   U R                  -
  nU R                  S   U R                  -
  n[
        R                  SU R                   SU< SU< SU< SU< 3
5        g )Nr   r4   Request z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrc   generated_lenrO   rM   r)   inforF   )rT   prefill_len
decode_len
start_timeend_times        r.   r   RequestState.log_end_of_request   s    $--.'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r0   c                     U R                   $ )zCGet the current length of the sequence (prompt + generated tokens).)rl   rS   s    r.   current_lenRequestState.current_len   s    ###r0   c                 ,    [        U R                  5      $ )z*Get the number of tokens generated so far.)r   rJ   rS   s    r.   r   RequestState.generated_len   s    4(())r0   ry   logprobc                 X   U R                   [        R                  :w  a  gU R                  (       a.  U R                  R                  [        R                  " 5       5        XR                  ;   nU R                  5       nU(       d  X@R                  :  aG  U R                  R                  U5        U/U l        US-  nUb  U R                  R                  U5        O%[        R                  SU R                    SU 35        U(       d  X@R                  :  a  [        R"                  U l         gg)zUpdate the request with a newly generated token (and optional log probability of the token) and check for
completion. Returns True if the request is now complete, False otherwise.Fr4   r   z generated a useless token: T)rL   r2   r?   re   rp   appendr\   r]   ro   r   rs   rJ   rk   rK   r)   warningrF   r@   )rT   ry   r   is_eosr   s        r.   update_and_check_completion(RequestState.update_and_check_completion   s    
 ;;-000 !!##D$5$5$78 000((* k$:$::!!((2&.ZD"1K"$$W-NNXdoo%66RS[R\]^[$:$::'00DKr0   c                    SU R                    3SU R                   3SU R                  5        3S[        U R                  5       3S[        U R
                  5       3SU R                   3S[        U R                  5       3SU R                   3S	U R                   3S
U R                   3/
nSSR                  U5      -   S-   $ )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zlogit_processor_kwargs=zRequestState(
	z,
	z
))rF   rn   r   r   rk   rt   rl   rc   rm   rJ   rj   join)rT   msgs     r.   __repr__RequestState.__repr__  s    $//*+dll^$$,,./0C 6 6789D$A$A BCD--./!#d&9&9":!;< 5 567 5 567%d&A&A%BC
 #W\\#%66>>r0   c                    U R                   (       aI  U R                  U R                   S U R                  -   U l        U R                  SU R                    U l        [        U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S9	$ )z7Convert the request state to a GenerationOutput object.N)	rF   rI   rJ   rK   r*   rL   rM   rO   rP   )rq   rc   rJ   rD   rF   rK   r*   rL   rM   rO   rP   rS   s    r.   to_generation_output!RequestState.to_generation_output  s    $$$($7$78Q8Q8S$TW[WlWl$lD!"&"5"56Q8Q8Q"RD**!22]]**;;**]]

 
	
r0   new_request_idc                     [        U 5      nXl        [        R                  " 5       Ul        UR                  S4Ul        / Ul        U R                  SS Ul        U$ )ziFork the request into a new request with the same state except for request_id, created_time and lifespan.r   N)r   rF   r\   r]   rM   rO   rp   rt   )rT   r   new_requests      r.   forkRequestState.fork.  sV    tn!/#'#4#4#6  + 8 8"="$/3/L/LQ/O,r0   c                     U R                   U R                  U R                  U R                  U R                  [        U R                  5      S.$ )zXGet all the fields necessary to create a request that would have the same configuration.)rd   re   rg   rh   ri   rj   )rd   re   rg   rh   ri   r   rj   rS   s    r.   get_request_configRequestState.get_request_config:  sG     !%!7!7"11 -- --&.t/J/J&K
 	
r0   c           
         U R                  5       nU R                  b%  U R                  [        U R                  5      -
  US'   [	        SU R
                  U R                  U R                  -   U R                  SS U R                  [        U R                  5      -   S.UD6nU R                  (       a  U R                  Ul        U$ [        U R                  5      Ul        U$ )aD  Creates an equivalent new request by removing the generated tokens and adding them to the initial prompt. The
created request has THE SAME request_id. Notably, we can retrieve the original request from the created one with
the _true_initial_tokens attribute. The logprobs of the generated tokens are kept in the new request.Nrg   )rF   rc   rK   rq   r7   )	r   rg   r   rJ   ra   rF   rc   rK   rq   )rT   request_config	new_states      r.   !create_equivalent_initial_request.RequestState.create_equivalent_initial_requestE  s    
 002*/3/B/BSI^I^E_/_N+,  
..1F1FF]]1%!%!:!:SATAT=U!U	

 
	 $$-1-F-FI*  .11D1D-EI*r0   )rs   rn   rJ   rc   rO   rt   rL   rk   )r   ra   )8r8   r9   r:   r;   r<   rW   rX   rY   rZ   rd   r_   re   rg   rh   ri   r   dictrj   rk   rJ   rK   r[   rl   rm   r2   r=   rn   setro   r\   r]   rM   r*   rO   r^   rp   rq   rs   rt   ru   rz   propertyrL   setterrP   r   r   r   r   r   r   r   r   r   r   rB   r7   r0   r.   ra   ra   |   sF   . OI It#t# "$NC$J#+/L#S	/D(/L##(#>D> $)#>tCy>"'"=d3i=!$7Hd5k7OSc*22G]2$S9NCH9  0A0ABL%BE3:$,HeE5L!,$T:Ke: !#! (s'*/*Ed3iE"d"6$    ]]M   DDK$. D D
$S $*s *
 C %$, SW  >?
"
3 
> 
	
D 	
r0   ra   c            
       :    \ rS rSrSrSrS\S\S\S\SS	4
S
 jr	Sr
g	)FutureRequestStatei_  zPTracks the current state of a request and the relevant information to update it.statehas_new_tokencomplete_blocksquery_lengthr   r   r   r   r   Nc                 4    Xl         X l        X0l        X@l        g rR   r   )rT   r   r   r   r   s        r.   __init__FutureRequestState.__init__e  s    
*.(r0   )r   r   r   r   )r8   r9   r:   r;   r<   	__slots__ra   r_   rZ   r   rB   r7   r0   r.   r   r   _  s8    Z NI)l )4 )RU )eh )mq )r0   r   )"r\   copyr   dataclassesr   r   enumr   r   utilsr   r	   utils.loggingr
   utils.metricsr   r#   TMP_TOKEN_ID	getLoggerr)   	propagateStreamHandlerhandlersetFormatter	Formatter
addHandlerr^   r   rZ   r/   r2   rD   ra   r   r7   r0   r.   <module>r      s      (   @ $ #   
		5	6	##%G**+abc
gF&Cu||S#s/J)K &CRG  5 5 58 _ _ _D
) 
)r0   