
    Z jp>                     d   S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
  \
" 5       (       a4  SSKJrJr  SS	KJrJrJr  SS
KJr  SSKJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJrJ r J!r!J"r"  \(       a  SSK#J$r$J%r%J&r&J'r'   " S S\SS9r(1 Skr)\RT                  " \+5      r, " S S\5      r-g)z
Handler for the /v1/chat/completions endpoint.

Supports streaming (SSE via DirectStreamer) and non-streaming (JSON) responses.
    N)AsyncGenerator)TYPE_CHECKING   )logging)is_serve_available)JSONResponseStreamingResponse)ChatCompletionChatCompletionMessageChatCompletionMessageToolCall)Choice)ChatCompletionChunkChoiceDeltaChoiceDeltaToolCall)CompletionCreateParamsStreaming)CompletionUsage   )BaseGenerateManagerBaseHandlerModality_StreamErrorget_tool_call_configparse_tool_calls)GenerationConfigPreTrainedModelPreTrainedTokenizerFastProcessorMixinc                   *    \ rS rSr% \\S'   \\S'   Srg)+TransformersCompletionCreateParamsStreaming5   generation_configseed N)__name__
__module____qualname____firstlineno__str__annotations__int__static_attributes__r#       y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/cli/serving/chat_completion.pyr   r   5   s    
Ir,   r   F)total>   nuseraudiostorelogprobsmetadata	functions
modalities
predictiontool_choiceservice_tiertop_logprobsfunction_callstream_optionsresponse_formatpresence_penaltyreasoning_effortweb_search_optionsparallel_tool_callsmax_completion_tokensc                   x  ^  \ rS rSrSr\r\rS\	S\
S\\-  4S jr SS\
SS	S
SS\
S\	SSS\S\	S-  S\4S jjr SS\
SS	S
SS\
S\	SSS\S\	S-  S\4S jjrS S\	SSS\4U 4S jjjr  S!S\
S\
S\
S\
S\S-  S\\	   S-  S\	4S jjr       S"S\
S\
S-  S\
S-  S\
S-  S\
S-  S\S-  S\S-  S\
4S jjrSrU =r$ )#ChatCompletionHandlerX   ztHandler for the `/v1/chat/completions` endpoint.

Supports both streaming (SSE) and non-streaming (JSON) responses.
body
request_idreturnc                   #    U R                  U5        U R                  U5      u  p4nU R                  R                  XES9nU R                  R                  XF5      n[        R                  SU SU 35        U R                  R                  X7S9nU R                  US   U5      n	[        S U	 5       5      n
0 nU
(       a  SUS'   UR                  " U	4S	UR                  S
5      U(       a  SOSS	S	U[        R                  :H  =(       a    U
S.UD6nU(       d  UR                  UR                   5      nU R#                  XR$                  US9nU(       a  UR'                  XM5        UR                  S
5      (       a  [)        XT5      OSnUR                  S5      nU(       a  U R+                  UUUUUUUUS9$ U R-                  UUUUUUUUS9I Sh  vN $  N7f)a`  Validate the request, load the model, and dispatch to streaming or non-streaming.

Args:
    body (`dict`): The raw JSON request body (OpenAI chat completion format).
    request_id (`str`): Unique request identifier (from header or auto-generated).

Returns:
    `StreamingResponse | JSONResponse`: SSE stream or JSON depending on ``body["stream"]``.
)	processorz[Request received] Model: z, CB: use_cbmessagesc              3      #    U  HX  n[        UR                  S 5      [        5      (       a  UR                  S 5      O/   H  nUR                  S5      S:H  v   M     MZ     g7f)contenttypevideoN)
isinstancegetlist).0msgcs      r-   	<genexpr>7ChatCompletionHandler.handle_request.<locals>.<genexpr>t   sU      
',6swwy7I4,P,Pcggi(VXX EE&MW$X %'s   A A"    
num_framesTtoolsNpt)add_generation_promptr\   return_tensorsreturn_dicttokenizeload_audio_from_videostream)gen_managertool_config)_validate_request_resolve_modelmodel_managerget_model_modalitygeneration_stateuse_continuous_batchingloggerwarningget_manager"get_processor_inputs_from_messagesanyapply_chat_templaterS   r   
MULTIMODALtodevice_build_generation_configr!   init_cbr   
_streaming_non_streaming)selfrF   rG   model_idmodelrJ   modalityrL   rd   processor_inputs	has_videochat_template_kwargsinputs
gen_configre   	streamings                   r-   handle_request$ChatCompletionHandler.handle_requesta   s     	t$%)%8%8%>"%%888T&&>>uO3H:VF8LM++777PBB4
CSU]^ 
'
 
	  "13 ...	
"&((7##)4t"*h.A.A"A"Oi	
 #	
 YYu||,F2249P9PY_2`
2@D@Q@Q*9<W[HHX&	??'' # 	 	 ,,'' - 	 	 	 	s   GGGGNr{   r   rJ   z(ProcessorMixin | PreTrainedTokenizerFastrz   r   r   r   rd   re   c	           
        ^ ^^^^^^^^ UR                  UTUTTTS9u  mmUS   n	[        U	[        5      (       a  [        U	5      OU	R                  S   mS[
        [        S4   4UUUUUUU UU4	S jjn
[        U
" 5       SS9$ )	z(Stream tokens as SSE via DirectStreamer.)rG   re   	input_idsrH   Nc                 t  >	#     TR                  TSTS97v   Sn U (       d>  TR                  5       I S h  vN nU/n  UR                  TR                  5       5        M!  SnT(       ad  [        TTR                  TS	   5      nU(       aC  Sn[        U5       H2  u  pgTR                  TT[        US
T SU 3US   US   S.S9/S97v   M4     TR                  S L=(       a    TR                  TR                  :  nU(       a  Sn	OU(       a  Sn	OSn	[!        TTR                  TTR                  -   S9n
TR                  TU	TU
S97v   g  GN! [        R
                   a     Of = f/ nU Ht  nUc  Sn   On[        U[        5      (       a5  UR                  SUR                   S35        SR                  U5      7v     g UR                  TR                  TTUS95        Mv     U(       a  SR                  U5      7v   U (       d  GM  GN! ["        [        R$                  4 a    TR'                  5         e f = f7f)N	assistant)roler{   FTzdata: {"error": "z"}

 )r{   rO   schemafunction_tool_call_name	argumentsr   r   )indexrP   idr   )r{   
tool_callsr   lengthstopprompt_tokenscompletion_tokenstotal_tokens)finish_reasonr{   usage)_build_chunk_sserS   append
get_nowaitasyncio
QueueEmptyrR   r   rV   joinr   generated_token_ids	enumerater   max_new_tokensr   r   GeneratorExitCancelledErrorcancel)donetextbatch	sse_partshas_tool_callsparseditchit_maxr   r   r   	input_lenrz   rJ   queuerG   ry   streamerre   s              r-   sse_gen1ChatCompletionHandler._streaming.<locals>.sse_gen   s]    G++J[PX+YY!&,D!FE"!LL)9)9);< #, "'-i9U9UWbckWlmF)-%.v%6EA"&"7"7 *&.$7./-7.8\QC+H:<V*SUVaSb1c	%&," #8 #  &7 %334?vHDYDY]g]v]vDv!$0M$,M$*M'"+&.&;&;!*X-B-B!B
 ++"/"	 ,  q -
 #--  ,.I %<#'D!%dL99%,,/A$((7-ST"$'')"44"!(()>)>zQYcg)>)hi !& ! ggi00- $~ "7#9#9:  !	s^   H80H	 E	H	 "E  C(H	 H8	H	 E# H	 "E##AH	 =H8>AH	 	,H55H8ztext/event-stream
media_type)generate_streamingrR   rT   lenshaper   r(   r	   )ry   rG   r{   rJ   rz   r   r   rd   re   r   r   r   r   r   s   `` `` ` `  @@@r-   rw    ChatCompletionHandler._streaming   s     &88!# 9 
x ;'	&0D&A&AC	NyWYGZ	H	~c4i8 H	 H	T !7JKKr,   c	                   #    UR                  X#XVUS9I Sh  vN u  pnUR                  SL=(       a    [        U5      UR                  :  n[        U5      n[        U
UX-   S9nSnUbM  [	        X;US   5      nU(       a7  [        U5       VVs/ s H  u  nn[        U SU 3SUS   US   S	.S
9PM!     nnnUb  SnOU(       a  SnOSn[        U R                  UU	UUUUS9SS9$  Ns  snnf 7f)z)Run generation and return a JSONResponse.)rG   Nr   r   r   r   r   r   r   )r   rP   r   r   r   r   )r   r   r   zapplication/jsonr   )	generate_non_streamingr   r   r   r   r   r   r   _build_completion)ry   rG   r{   rJ   rz   r   r   rd   re   rO   r   generated_idsr   r   r   r   r   r   r   r   s                       r-   rx   $ChatCompletionHandler._non_streaming  sI     3>2T2TfZ 3U 3
 -
)M ++47kC<NR\RkRk<k.#/"6
 
"%iH@UVF "+6!2 "32 2(\QC8'*,V*2k?!S
 "3   !(M$M"M""+% #  *

 
	
A-
 s"   C,C$A5C,&C&32C,&C,model_generation_configrL   c                   > [         TU ]  XUS9nUR                  S5      b  [        US   5      Ul        UR                  S5      b  S[        US   5      -   Ul        UR                  S5      b8  US   R                  5        VVs0 s H  u  pV[        U5      4U_M     snnUl        UR                  S5      b
  US   Ul	        U$ s  snnf )zApply Chat Completions params (``max_tokens``, ``frequency_penalty``, ``logit_bias``,
``stop``) on top of the base generation config.rK   
max_tokensfrequency_penaltyg      ?
logit_biasr   )
superru   rS   r*   r   floatrepetition_penaltyitemssequence_biasstop_strings)ry   rF   r   rL   r!   kv	__class__s          r-   ru   .ChatCompletionHandler._build_generation_configK  s     "G<Tci<j88L!-/243E/F,88'(436tDW?X9Y3Y088L!-HL\HZH`H`Hb.cHbAy!|Hb.c+88F'-1&\*  	 /ds   CrO   r   r   r   c                     [        USUS9n[        U[        [        R                  " 5       5      SU[	        SUUS9/US9nUR                  SS9$ )	a  Build a non-streaming ChatCompletion response dict.

Args:
    request_id (`str`): Unique request identifier.
    content (`str`): The generated text.
    model_id (`str`): Model ID to include in the response.
    finish_reason (`str`): Why generation stopped (``"stop"``, ``"length"``, ``"tool_calls"``).
    usage (`CompletionUsage`, *optional*): Token usage statistics.
    tool_calls (`list[dict]`, *optional*): Parsed tool calls, if any.

Returns:
    `dict`: Serialized ``ChatCompletion`` ready for JSON response.
r   rO   r   r   zchat.completionr   )r   messager   )r   createdobjectr{   choicesr   T)exclude_none)r   r
   r*   timer   
model_dump)	ry   rG   rO   rz   r   r   r   r   results	            r-   r   'ChatCompletionHandler._build_completion]  se    , (kV`a		$$#"/ 
   d 33r,   r   c                     [        U[        [        R                  " 5       5      U[        [	        X$US9SUS9/USSS9nU R                  U5      $ )a*  Build a streaming ``ChatCompletionChunk`` and format it as an SSE ``data:`` line.

Args:
    request_id (`str`): Unique request identifier.
    content (`str`, *optional*): Text content delta.
    model (`str`, *optional*): Model ID.
    role (`str`, *optional*): Role (only sent in the first chunk).
    finish_reason (`str`, *optional*): Set on the final chunk.
    tool_calls (`list`, *optional*): Tool call deltas.
    usage (`CompletionUsage`, *optional*): Token usage (sent with the final chunk).

Returns:
    `str`: A formatted SSE event string.
r   r   )deltar   r   r   zchat.completion.chunk)r   r   r{   r   r   system_fingerprintr   )r   r*   r   ChoiceChunkr   chunk_to_sse)	ry   rG   rO   r{   r   r   r   r   chunks	            r-   r   &ChatCompletionHandler._build_chunk_sse  s]    0 $		$%gZX"/ !*
   ''r,   r#   )N)F)NN)r   NNNNNN)r$   r%   r&   r'   __doc__r   _valid_params_classUNUSED_CHAT_COMPLETION_FIELDS_unused_fieldsdictr(   r	   r   r   r   rw   rx   boolru   r   rT   r   r   r+   __classcell__)r   s   @r-   rD   rD   X   s'   
 F2NF F3 FCTWcCc Ff $(bLbL !bL >	bL
 bL bL 'bL )bL D[bL 
bL^ $(6
6
 !6
 >	6

 6
 6
 '6
 )6
 D[6
 
6
t!T !L^ !hl ! !0 )-(,%4%4 %4 	%4
 %4 %%4 J%%4 
%4R " $("&(,'('( t'( Tz	'(
 Dj'( Tz'( 4K'( %'( 
'( '(r,   rD   ).r   r   r   collections.abcr   typingr   utilsr   utils.import_utilsr   fastapi.responsesr   r	   openai.types.chatr
   r   r   !openai.types.chat.chat_completionr   'openai.types.chat.chat_completion_chunkr   r   r   r   *openai.types.chat.completion_create_paramsr   openai.types.completion_usager   r   r   r   r   r   r   transformersr   r   r   r   r   r   
get_loggerr$   rl   rD   r#   r,   r-   <module>r      s      *    4 Aff8mmMZ=  gg2QY^ ! 0 
		H	%S(K S(r,   