
    Z j'                     ,   S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
  \
" 5       (       a  SSKJr  SS	KJrJr  SS
KJrJrJr  SSKJr  SSKJrJrJr  \(       a  SSKJrJrJrJr   " S S\SS9r1 Skr\R@                  " \!5      r" " S S\5      r#g)a
  
Handler for the /v1/completions endpoint (OpenAI legacy Completions API).

Accepts a freeform text prompt (no chat template) and returns generated text
in choices[].text. Supports streaming and non-streaming modes, and suffix for
fill-in-the-middle text insertion.
    N)AsyncGenerator)TYPE_CHECKING   )logging)is_serve_available)HTTPException)JSONResponseStreamingResponse)
CompletionCompletionChoiceCompletionUsage)CompletionCreateParamsBase   )BaseGenerateManagerBaseHandler_StreamError)GenerationConfigPreTrainedModelPreTrainedTokenizerFastProcessorMixinc                   4    \ rS rSr% \\S'   \\S'   \\S'   Srg)&TransformersTextCompletionCreateParams-   generation_configseedstream N)	__name__
__module____qualname____firstlineno__str__annotations__intbool__static_attributes__r       t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/cli/serving/completion.pyr   r   -   s    
ILr'   r   F)total>   nechouserbest_oflogprobsstream_optionspresence_penaltyc                     ^  \ rS rSrSr\r\rS\	S\
SS4S jr S S\
S	S
SSS\
S\	SSS\S\
S-  SS4S jjr S S\
S	S
SSS\
S\	SSS\S\
S-  SS4S jjr   S!S\
S\
S\
S\
S-  SSS\
4S jjrS"S\	SSS\4U 4S jjjrSrU =r$ )#CompletionHandlerB   zHandler for the `/v1/completions` endpoint.

Takes a raw text ``prompt`` (no chat template) and generates text returned in
``choices[].text``. Supports streaming (SSE) and non-streaming (JSON) responses,
and ``suffix`` for fill-in-the-middle insertion.
body
request_idreturnz StreamingResponse | JSONResponsec           
        #    U R                  U5        UR                  SS5      n[        U[        5      (       d
  [	        SSS9eU R                  U5      u  pEnU R                  R                  XVS9nU R                  R                  XW5      n[        R                  SU SU 35        U R                  R                  XHS	9n	[        US
U5      n
U
" X8(       a  SOSS9nU(       d  UR                  UR                  5      nU R!                  XR"                  US	9nU(       a  U	R%                  X\5        UR                  S5      nUR                  S5      nU(       a  U R'                  X%XdXX5      $ U R)                  X%XdXX5      I Sh  vN $  N7f)ac  Validate the request, load the model, and dispatch to streaming or non-streaming.

Args:
    body (`dict`): The raw JSON request body (OpenAI legacy completions format).
    request_id (`str`): Unique request identifier (from header or auto-generated).

Returns:
    `StreamingResponse | JSONResponse`: SSE stream or JSON depending on ``body["stream"]``.
prompt i  zprompt must be a string.)status_codedetail)	processorz[Request received] Model: z, CB: use_cb	tokenizerNpt)return_tensorssuffixr   )_validate_requestget
isinstancer"   r   _resolve_modelmodel_managerget_model_modalitygeneration_stateuse_continuous_batchingloggerwarningget_managergetattrtodevice_build_generation_configr   init_cb
_streaming_non_streaming)selfr4   r5   r8   model_idmodelr<   modalityr>   gen_managerr?   inputs
gen_configrB   	streamings                  r(   handle_request CompletionHandler.handle_requestM   sr     	t$(B'&#&&C8RSS%)%8%8%>"%%888T&&>>uO3H:VF8LM++777PI{I>	6&$dKYYu||,F2249P9PY_2`
2(#HHX&	??:i6_jss,,9K   s   FFF
FNrW   r   r<   z(ProcessorMixin | PreTrainedTokenizerFastrV   rZ   r[   r   rY   rB   r
   c	           	         ^ ^^^^^^^ UR                  X#UTTS9u  mmUS   n	[        U	[        5      (       a  [        U	5      OU	R                  S   mS[
        [        S4   4UUUUUU UU4S jjn
[        U
" 5       SS9$ )	zStream tokens as SSE.r5   	input_idsr6   Nc            	       >#     Sn U (       d>  T
R                  5       I S h  vN nU/n  UR                  T
R                  5       5        M!  TR                  S L=(       a    TR                  TR                  :  nU(       a  SOSnTb  TR                  TT	TS97v   [        TTR                  TTR                  -   S	9nTR                  TT	XVS
97v   g  N! [        R                   a     Of = f/ nU Ht  nUc  Sn   On[        U[        5      (       a5  UR                  SUR                   S35        SR                  U5      7v     g UR                  TR                  TT	US95        Mv     U(       a  SR                  U5      7v   U (       d  GM  GNF! [        [        R                  4 a    TR                  5         e f = f7f)NFTzdata: {"error": "z"}

r9   )textlengthstopprompt_tokenscompletion_tokenstotal_tokens)finish_reasonusage)rD   append
get_nowaitasyncio
QueueEmptyrE   r   msgjoin_build_chunk_ssemax_new_tokensrj   r   GeneratorExitCancelledErrorcancel)donerd   batch	sse_partshit_maxrk   rl   r[   	input_lenrV   queuer5   rU   streamerrB   s          r(   sse_gen-CompletionHandler._streaming.<locals>.sse_gen   s    '!&,D!FE"!LL)9)9);< #( %334?vHDYDY]g]v]vDv,3%//
H6/RR'"+&.&;&;!*X-B-B!B
 ++JP]+kkC -
 #--  ,.I %<#'D!%dL99%,,/A$((7-ST"$'')"44"!(()>)>z8Z^)>)_` !& ! ggi00- $F "7#9#9: !s^   GF CF "C BF GF C,)F +C,,AF GAF ,F>>Gztext/event-stream
media_type)generate_streamingrE   listlenshaper   r"   r
   )rU   r5   rW   r<   rV   rZ   r[   rY   rB   ra   r   r|   r}   r~   s   ``  ` ` `  @@@r(   rS   CompletionHandler._streamingx   s|     &886S]jt8ux;'	&0D&A&AC	NyWYGZ	(	~c4i8 (	 (	T !7JKKr'   r	   c	                 x  #    UR                  X#XVUS9I Sh  vN u  pnUb  X-   n	[        U5      nUR                  SL=(       a    XR                  :  nU(       a  SOSn[        U
UX-   S9n[	        U[        [        R                  " 5       5      U[        U	SSUS9/SUS	9n[        UR                  S
S9SS9$  N7f)z)Run generation and return a JSONResponse.r`   Nre   rf   rg   r   rd   indexr.   rk   text_completion)idcreatedrW   choicesobjectrl   T)exclude_nonezapplication/jsonr   )
generate_non_streamingr   rt   r   r   r$   timer   r	   
model_dump)rU   r5   rW   r<   rV   rZ   r[   rY   rB   rd   r|   generated_idsri   r{   rk   rl   results                    r(   rT    CompletionHandler._non_streaming   s      0;/Q/QfZ 0R 0
 *
& =D.++47j<MQjQj<j$+#/"6
 		$ !"/	 %
  F--4-@M_``C*
s   B:B8B!B:rd   rk   rl   zCompletionUsage | Nonec                     [         R                  " US[        [        R                  " 5       5      U[        R                  " USSUS9/US9nU R                  U5      $ )a  Build a streaming ``Completion`` chunk and format it as an SSE ``data:`` line.

Uses ``model_construct`` to bypass pydantic validation so that ``finish_reason``
can be ``None`` for mid-stream chunks (the OpenAI SDK's ``CompletionChoice`` only
accepts literal values).
r   r   Nr   )r   r   r   rW   r   rl   )r   model_constructr$   r   r   chunk_to_sse)rU   r5   rV   rd   rk   rl   chunks          r(   rs   "CompletionHandler._build_chunk_sse   sb     **$		$ 00!"/	 
   ''r'   model_generation_configr>   c                    > [         TU ]  XUS9nUR                  S5      b  [        US   5      Ul        UR                  S5      b  S[        US   5      -   Ul        UR                  S5      b
  US   Ul        U$ )zgApply legacy completion params (``max_tokens``, ``frequency_penalty``, ``stop``) on top of base config.r=   
max_tokensfrequency_penaltyg      ?rf   )superrQ   rD   r$   rt   floatrepetition_penaltystop_strings)rU   r4   r   r>   r   	__class__s        r(   rQ   *CompletionHandler._build_generation_config  s    !G<Tci<j88L!-/243E/F,88'(436tDW?X9Y3Y088F'-1&\*  r'   r   )N)r9   NN)F)r   r   r    r!   __doc__r   _valid_params_classUNUSED_LEGACY_COMPLETION_FIELDS_unused_fieldsdictr"   r]   r   rS   rT   rs   r%   rQ   r&   __classcell__)r   s   @r(   r2   r2   B   s    A4N' '3 'Ce 'h ":L:L !:L >	:L
 :L :L ':L ):L d
:L 
:LN "-a-a !-a >	-a
 -a -a '-a )-a d
-a 
-aj $(*.(( ( 	(
 Tz( (( 
(B!T !L^ !hl ! !r'   r2   )$r   ro   r   collections.abcr   typingr   utilsr   utils.import_utilsr   fastapir   fastapi.responsesr	   r
   openai.typesr   r   r   %openai.types.completion_create_paramsr   r   r   r   transformersr   r   r   r   r   r   
get_loggerr   rK   r2   r   r'   r(   <module>r      s      *    4 %AJJP B A gg-Gu #  
		H	%Q! Q!r'   