
    Z jc                        S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
  \
" 5       (       aD  SSKJr  SS	KJrJr  SS
KJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!  SSK"J#r#  SSK$J%r%J&r&J'r'  SSKJ(r(J)r)J*r*J+r+J,r,J-r-  \(       a  SSK.J/r/J0r0J1r1J2r2  \Rf                  " \45      r5 " S S\#SS9r61 Skr7 " S S\)5      r8S\9S\9S\'4S jr:g)z~
Handler for the /v1/responses endpoint (OpenAI Responses API).

Supports streaming (SSE) and non-streaming (JSON) responses.
    N)AsyncGenerator)TYPE_CHECKING   )logging)is_serve_available)HTTPException)JSONResponseStreamingResponse)ResponseResponseCompletedEventResponseContentPartAddedEventResponseContentPartDoneEventResponseCreatedEventResponseErrorResponseErrorEventResponseFailedEvent&ResponseFunctionCallArgumentsDoneEventResponseFunctionToolCallResponseInProgressEventResponseOutputItemAddedEventResponseOutputItemDoneEventResponseOutputMessageResponseOutputTextResponseTextDeltaEventResponseTextDoneEvent)ResponseCreateParamsStreaming)InputTokensDetailsOutputTokensDetailsResponseUsage   )BaseGenerateManagerBaseHandlerModality_StreamErrorget_tool_call_configparse_tool_calls)GenerationConfigPreTrainedModelPreTrainedTokenizerFastProcessorMixinc                   *    \ rS rSr% \\S'   \\S'   Srg))TransformersResponseCreateParamsStreamingH   generation_configseed N)__name__
__module____qualname____firstlineno__str__annotations__int__static_attributes__r0       r/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/cli/serving/response.pyr,   r,   H   s    
Ir9   r,   F)total>   textuserstorepromptinclude	reasoning
background
truncationtool_choiceservice_tiertop_logprobsmax_tool_callsprevious_response_idc                   ^  ^  \ rS rSrSr\r\rS\	S\
S\\-  4S jr\S\\	   S-  S\\	   S-  4S	 j5       r\S\	S\\	   4S
 j5       r\S\\	   S\\	   4S j5       r SS\
SSSSS\
S\	S\	SSS\S\	S-  S\4S jjr SS\
SSSSS\
S\	S\	SSS\S\	S-  S\4S jjrSS\	SSS\4U 4S jjjrSrU =r$ )ResponseHandler^   z+Handler for the ``/v1/responses`` endpoint.body
request_idreturnc                   #    U R                  U5        U R                  U5      u  p4nU R                  R                  XES9nU R                  R                  XF5      n[        R                  SU SU 35        U R                  R                  X7S9nU R                  U5      n	U R                  X5      n
[        S U
 5       5      n0 nU(       a  SUS'   U R                  UR                  S5      5      nUR                  " U
4S	UU(       a  S
OSS	S	U[        R                   :H  =(       a    US.UD6nU(       d  UR#                  UR$                  5      nU R'                  XR(                  US9nU(       a  UR+                  XO5        UR                  S5      (       a  [-        XT5      OS
nUR                  SS	5      nU(       a  U R/                  UUUUUUUUUS9	$ U R1                  UUUUUUUUUS9	I S
h  vN $  N7f)aJ  Validate, load model, dispatch to streaming or non-streaming.

Args:
    body (`dict`): The raw JSON request body (OpenAI Responses API format).
    request_id (`str`): Unique request identifier (from header or auto-generated).

Returns:
    `StreamingResponse | JSONResponse`: SSE stream or JSON depending on ``body["stream"]``.
)	processorz[Request received] Model: z, CB: use_cbc              3      #    U  HX  n[        UR                  S 5      [        5      (       a  UR                  S 5      O/   H  nUR                  S5      S:H  v   M     MZ     g7f)contenttypevideoN)
isinstancegetlist).0msgcs      r:   	<genexpr>1ResponseHandler.handle_request.<locals>.<genexpr>|   sU      
',6swwy7I4,P,Pcggi(VXX EE&MW$X %'s   A A"    
num_framestoolsTNpt)add_generation_promptra   return_tensorsreturn_dicttokenizeload_audio_from_videostream)gen_managertool_config)_validate_request_resolve_modelmodel_managerget_model_modalitygeneration_stateuse_continuous_batchingloggerwarningget_manager_normalize_input"get_processor_inputs_from_messagesany_normalize_toolsrX   apply_chat_templater#   
MULTIMODALtodevice_build_generation_configr.   init_cbr%   
_streaming_non_streaming)selfrL   rM   model_idmodelrP   modalityrR   ri   messagesprocessor_inputs	has_videochat_template_kwargsra   inputs
gen_configrj   	streamings                     r:   handle_requestResponseHandler.handle_requestd   s      	t$%)%8%8%>"%%888T&&>>uO3H:VF8LM++777P
 ((.BB8V 
'
 
	  "13 .%%dhhw&78..	
"&#)4t"*h.A.A"A"Oi	
 #	
 YYu||,F2249P9PY_2`
2@D@Q@Q*9<W[HHXt,	??'' # 
 
 ,,'' - 
 
 
 
s   G.G70G51G7ra   Nc                     U (       d  U $ U  VVVs/ s H;  nSU;  a0  SUR                  5        VVs0 s H  u  p#US:w  d  M  X#_M     snnS.OUPM=     snnn$ s  snnf s  snnnf )aX  Normalize Responses API tool definitions for ``apply_chat_template``.

The Responses API uses a flat format: ``{"type": "function", "name": ..., "parameters": ...}``
while ``apply_chat_template`` expects a nested format:
``{"type": "function", "function": {"name": ..., "parameters": ...}}``.
Already-nested tools are passed through unchanged.
functionrU   )rU   r   )items)ra   tkvs       r:   rw    ResponseHandler._normalize_tools   sm     L 
 ^hop]pZqwwy-XytqAQWKdady-XYvww
 	
-X
s    AA AAAc                 r   U S   nU R                  S5      n[        U[        5      (       a  SUS./nOL[        U[        5      (       a-  U(       a  SUS   ;  a  SUS./nO [        R                  U5      nO
[        SSS	9eU(       a1  U(       a  US   S   S
:X  a	  X#S   S'   U$ UR                  SS
US.5        U$ )u  Normalize the Responses API ``input`` field into chat messages.

The Responses API accepts multiple input formats. This method converts them
into a structure close to what ``apply_chat_template`` expects (messages with
``role``, ``content``, ``tool_calls``, ``tool_call_id``). Further processing
is done by ``get_processor_inputs_from_messages``.

NOTE: if this conversion logic grows too complex, consider having separate
``get_processor_inputs_from_messages`` implementations for chat completions
and the Responses API instead of funneling both through the same path.

Formats handled:
    - **String** → single user message.
    - **Flat content list** (``input_text``, ``input_image``, no ``role``) → user message.
    - **Multi-turn list** — messages and tool call items (``function_call``,
      ``function_call_output``) from a previous response, converted via
      :meth:`_normalize_response_items`.

If ``instructions`` is present, it is prepended as a system message.
inputinstructionsr=   rolerT   r   r     z 'input' must be a string or liststatus_codedetailsystemrT   )rX   rW   r5   rY   rJ   _normalize_response_itemsr   insert)rL   inpr   r   s       r:   rt    ResponseHandler._normalize_input   s    , 7mxx/c3!'C89HT""vSV+%+<=*DDSIC8Z[[ HQK/8;)5I&  H#NOr9   r   c                    / nU  H  nUR                  S5      nSU;   a)  UR                  US   UR                  SS5      S.5        MC  US:X  aa  US   US   US	   S
.S.nU(       a2  US   S   S:X  a&  US   R                  S/ 5      R                  U5        M  UR                  SU/S.5        M  US:X  a  UR                  SUS   US   S.5        M  [        SSU< 3S9e   U$ )uP  Convert a list of Responses API items into chat messages.

Input items may be a mix of:
    - Messages (``EasyInputMessageParam`` with ``role``, or ``type: "message"``).
    - ``function_call`` — merged as ``tool_calls`` onto the preceding assistant message.
    - ``function_call_output`` — converted to ``role: "tool"`` messages.
rU   r   rT    r   function_callcall_idname	arguments)r   r   )idr   	assistant
tool_calls)r   r   function_call_outputtooloutput)r   tool_call_idrT   r   zUnsupported input item type: r   )rX   append
setdefaultr   )r   r   item	item_typetcs        r:   r   )ResponseHandler._normalize_response_items   s    D(I~f$((9VXBY Z[o-y/)-fDDU V V 4 CRL++L"=DDRHOO[$MN44 &(,Y#'> $>[\e[h<ijj5 8 r9   r   r(   rP   z(ProcessorMixin | PreTrainedTokenizerFastr   r   r   r'   ri   rj   c
                   ^ ^^^	^^^^^^^ UR                  UTUUTT	S9u  mmUS   n
[        U
[        5      (       a  [        U
5      OU
R                  S   mSmSm[
        R
                  " 5       nST 3nST 3mUUUS/ UR                  SS	5      S
S.mS[        [        S4   4UUUUUUUU UUU	4S jjn[        U" 5       SS9$ )zDGenerate a streaming Responses API reply (SSE) using DirectStreamer.)rM   rj   	input_idsr   r   resp_msg_responseparallel_tool_callsFauto)r   
created_atr   objectra   r   rD   rN   Nc                   >#     TR                  [        ST[        S/0 TDS/ S.D6S95      7v   TS-  mTR                  [        ST[        S/0 TDS/ S.D6S95      7v   TS-  mTR                  [	        STT[        TS	SS
/ S9S95      7v   TS-  mTR                  [        STTTS[        SS/ S9S95      7v   TS-  mSn / nSnU(       d>  TR                  5       I S h  vN nU/n  UR                  TR                  5       5        M!  T(       a  [/        TTR0                  TS   5      nU(       a  [3        U5       H  u  pxT S U 3n	[5        U	U	S!US"   US#   S$S%9n
UR                  U
5        TS-  mTR                  [	        STTU
S95      7v   TS-  mTR                  [7        S&TU	TUS#   US"   S'95      7v   TS-  mTR                  [9        S(TTU
S95      7v   TS-  mM     [        SU / S9nTR                  [;        S)TTSSU / S*95      7v   TS-  mTR                  [=        S+TTSSUS95      7v   TS-  m[        TS	S$S
U// S,9nTR                  [9        S(TSUS95      7v   TS-  mU/[?        U5      -   n[A        TTRB                  5      nTR                  [E        S-T[        S/0 TDS$XS..D6S95      7v   TS-  mg  GN! [        R                   a     Of = f/ nU GH  nUc  Sn  GO
[        U[        5      (       a  [        R!                  SUR"                   35        UR                  TR                  [%        STUR"                  S95      5        TS-  mUR                  TR                  ['        ST[        S/0 TDS/ [)        SUR"                  S9S.D6S95      5        SR+                  U5      7v     g X-  n UR                  TR                  [-        STTSSU/ S95      5        TS-  mGM     U(       a  SR+                  U5      7v   U(       d  GMV  GN! [F        [        RH                  4 a    TRK                  5         e f = f7f)0Nzresponse.createdqueued)statusr   )rU   sequence_numberr   r    zresponse.in_progressin_progresszresponse.output_item.addedmessager   )r   rU   r   r   rT   )rU   r   output_indexr   zresponse.content_part.addedr   output_textr   rU   r<   annotations)rU   item_idr   r   content_indexpartFTz"Exception in response generation: error)rU   r   r   zresponse.failedfailedserver_error)coder   )r   r   r   zresponse.output_text.delta)rU   r   r   r   r   deltalogprobsschema_tool_call_r   r   r   	completedr   r   rU   r   r   r   z%response.function_call_arguments.done)rU   r   r   r   r   r   zresponse.output_item.donezresponse.output_text.done)rU   r   r   r   r   r<   r   zresponse.content_part.doner   rU   r   r   rT   r   zresponse.completed)r   r   usager0   )&chunk_to_sser   r   r   r   r   r   r   rX   r   
get_nowaitasyncio
QueueEmptyrW   r$   rq   r   r[   r   r   r   joinr   r&   generated_token_ids	enumerater   r   r   r   r   rY   compute_usagetotal_tokensr   GeneratorExitCancelledErrorcancel)	full_textr   doner<   batch	sse_partsparsedir   tc_idtc_itemoutput_text_partmsg_item
all_outputr   	input_lenmsg_idr   rP   queuerM   response_baser   seqstreamerrj   s                  r:   event_stream0ResponseHandler._streaming.<locals>.event_streamI  sc    [''(/(+!)!VM!V(SU!V  q''+3(+!)![M![-XZ![  q ''09(+%12%!*#0!,$&	  q ''1: &(+%1&'/]Y[\	 	 q 	
!&,D!FE"!LL)9)9);< #p -i9U9UWbckWlmF%.v%6EA'1l+aS$AE&>#((-%4%'Z*,[/'2'G '--g6(A-L"&"3"3 <)E471=)0	!"#   1HC"&"3"3 F)P47,11=.0o)+F!"	# 	  1HC"&"3"3 ;)D471=)0	!"#   1HCQ &7V $6=yfh#i '')8 &(+%&&'&!#
 
 q''09 &(+%&&'-	 	 q0"&$-. " ''/8(+%&%	  q 'Z$z*::
%i1F1FG''*1(+!)!nM!n+V`!n  q - #--  ,.I %<#'D!%dL99"LL+MdhhZ)XY%,, $ 1 1$6GUXbfbjbj$k!"
  1HC%,, $ 1 1$7->8;19 2*.;2*3;352?^]a]e]e2f	2*	%&!" #%'')"44"!)	!(( -- 6)E,2471223*.-/!"
 qW !&Z ! ggi00s $B "7#9#9:  !	sb   QCP& !K"P& *"K F;P& QP& K"P& !K""C!P& QAP& &,QQztext/event-stream)
media_type)
generate_streamingrW   rY   lenshapetimerX   r   r5   r
   )r   rM   r   rP   r   rL   r   r   ri   rj   r   r   resp_idr   r   r   r   r   r   r   r   s   `` `     `    @@@@@@@r:   r~   ResponseHandler._streaming  s     &88!# 9 
x ;'	&0D&A&AC	NyWYGZ	YY[
*&
|$ $ #'88,A5#I!	
^	N39$= ^	 ^	 ^	@ !<OPPr9   c
                   #    UR                  X#XgUS9I Sh  vN u  pn[        SU 3SSS[        SU
/ S9// S	9/nU	bU  [        X<U	S
   5      nU(       a?  [	        U5       H0  u  nnU SU 3nUR                  [        UUSUS   US   SS95        M2     [        U[        U5      5      n[        SU 3[        R                  " 5       SUUSU/ UR                  SS5      SS9
n[        UR                  SS95      $  N7f)z;Generate a non-streaming Responses API reply (single JSON).)rM   Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Fr   )
r   r   r   r   r   r   r   ra   r   rD   T)exclude_none)generate_non_streamingr   r   r&   r   r   r   r   r   r   r   rX   r	   
model_dump)r   rM   r   rP   r   rL   r   r   ri   rj   r   r   generated_idsoutput_itemsr   r   r   r   r   r   s                       r:   r   ResponseHandler._non_streaming-  sJ     5@4V4VfZ 5W 5
 /
+	m
 "*&" +Y\^_`	
 "%iH@UVF&v.EAr)l+aS9E ''0$$)!0!#F&(o#.	 / i]);<zl#yy{ $)> F
 H//T/BCC[/
s   C:C8C!C:model_generation_configrR   c                 n   > [         TU ]  XUS9nUR                  S5      b  [        US   5      Ul        U$ )zXApply Responses API params (``max_output_tokens``) on top of the base generation config.rQ   max_output_tokens)superr|   rX   r7   max_new_tokens)r   rL   r  rR   r.   	__class__s        r:   r|   (ResponseHandler._build_generation_configk  sC    !G<Tci<j88'(4/248K3L/M,  r9   r0   )N)F)r1   r2   r3   r4   __doc__r,   _valid_params_classUNUSED_RESPONSE_FIELDS_unused_fieldsdictr5   r
   r	   r   staticmethodrY   rw   rt   r   r!   r~   r   boolr|   r8   __classcell__)r
  s   @r:   rJ   rJ   ^   s   5C+NO O3 OCTWcCc Of 
T
T 1 
d4j46G 
 
 *t *T
 * *X &d &T
 & &h $(JQJQ !JQ >	JQ
 JQ JQ JQ 'JQ )JQ D[JQ 
JQp $(:D:D !:D >	:D
 :D :D :D ':D ):D D[:D 
:D|!T !L^ !hl ! !r9   rJ   input_tokensoutput_tokensrN   c           
      <    [        U UX-   [        SS9[        SS9S9$ )zBuild a ``ResponseUsage`` object for a Responses API reply.

Args:
    input_tokens (`int`): Number of prompt tokens.
    output_tokens (`int`): Number of generated tokens.

Returns:
    `ResponseUsage`: Usage statistics with zero-filled detail fields.
r   )cached_tokens)reasoning_tokens)r  r  r   input_tokens_detailsoutput_tokens_details)r   r   r   )r  r  s     r:   r   r   u  s-     !#!1/a@11E r9   );r  r   r   collections.abcr   typingr   utilsr   utils.import_utilsr   fastapir   fastapi.responsesr	   r
   openai.types.responsesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   -openai.types.responses.response_create_paramsr   %openai.types.responses.response_usager   r   r   r!   r"   r#   r$   r%   r&   transformersr'   r(   r)   r*   
get_loggerr1   rq   r,   r  rJ   r7   r   r0   r9   r:   <module>r&     s      *    4 %A    & \ll  gg 
		H	%0MUZ 
 "T!k T!n C M r9   