
    Z jS                         S r SSKJr  SSKJr  SSKJrJrJrJ	r	  SSK
JrJr  SSKJrJr  \R                   " \5      rSr " S S	\S
S9r " S S\S
S9r\ " S S\5      5       rS/rg)z
Processor class for Janus.
   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixin
TextKwargsUnpack)PreTokenizedInput	TextInput)auto_docstringloggingzYou are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.

c                   $    \ rS rSr% Sr\\S'   Srg)JanusTextKwargs"   a_  
generation_mode (`str`, *optional*, defaults to `"text"`):
    The generation mode indicating which modality to generate. Can be one of `"text"` or `"image"`. When set
    to `"text"`, the processor prepares inputs for text generation. When set to `"image"`, it prepares inputs
    for image generation by appending image start tokens to the prompt.
generation_mode N)__name__
__module____qualname____firstlineno____doc__str__annotations____static_attributes__r       {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/janus/processing_janus.pyr   r   "   s     r   r   F)totalc                   6    \ rS rSr% \\S'   SSSS.SS0S	.rS
rg)JanusProcessorKwargs-   text_kwargsFlefttext)paddingpadding_sider   return_tensorspt)r    common_kwargsr   N)r   r   r   r   r   r   	_defaultsr   r   r   r   r   r   -   s"      #(&U[\*D1Ir   r   c            
          ^  \ rS rSrSU 4S jjr\  SS\\-  \\   -  \\   -  S\	S-  S\
\   S\4S jj5       rS\	4S	 jr SS
 jrSrU =r$ )JanusProcessor5   Nc                    > SU l         UR                  U l        UR                  U l        UR                  U l        X@l        [        TU ]!  XUS9  g)zy
use_default_system_prompt (`bool`, *optional*, defaults to `False`):
    Use default system prompt for Text Generation.
i@  )chat_templateN)	num_image_tokensimage_token	boi_tokenimage_start_token	eoi_tokenimage_end_tokenuse_default_system_promptsuper__init__)selfimage_processor	tokenizerr-   r4   kwargs	__class__s         r   r6   JanusProcessor.__init__7   sO    
 !$$00!*!4!4(22)B&=Qr   r"   imagesr:   returnc                     U R                   " [        4SU R                  R                  0UD6nUc  Uc  [	        S5      eUbV  [        U[        5      (       a  U/nO=[        U[        [        45      (       a  [        S U 5       5      (       d  [	        S5      eUS   R                  S5      n/ nU R                  U R                  U R                  -  -   U R                  -   nU Hd  nUR                  U R                  U5      nU R                   (       a  US:X  a	  ["        U-   nUS:X  a  XR                  -  nUR%                  U5        Mf     U R                  " U40 US   D6n	Ub#  US:w  a  U R&                  " SS	U0US
   D6S   U	S'   [)        U	S9$ )a  
Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
tokenizer_init_kwargsz'You must specify either text or images.c              3   B   #    U  H  n[        U[        5      v   M     g 7f)N)
isinstancer   ).0ts     r   	<genexpr>*JanusProcessor.__call__.<locals>.<genexpr>`   s     =_Z^UVjC>P>PZ^s   zAInvalid input text. Please provide a string, or a list of stringsr    r   r"   imager=   images_kwargspixel_values)datar   )_merge_kwargsr   r9   init_kwargs
ValueErrorrB   r   listtupleallpopr1   r/   r.   r3   replacer4   DEFAULT_SYSTEM_PROMPTappendr8   r   )
r7   r"   r=   r:   output_kwargsr   prompt_stringsone_img_tokenspromptrJ   s
             r   __call__JanusProcessor.__call__D   s   $ ** 
8<8R8R
V\
 <FNFGG$$$v e}55#=_Z^=_:_:_ !dee'6::;LM //43C3CdF[F[3[\_c_s_ssF^^D$4$4nEF--/V2K.7')000!!&)  ~~nMm0LM /W"<#'#7#7#hv#hWfIg#h$D  &&r   c                 <    U R                   R                  " U40 UD6$ )z
Forwards all arguments to the image processor's `postprocess` method.
Refer to the original method's docstring for more details.
)r8   postprocess)r7   r=   r:   s      r   r\   JanusProcessor.postprocessz   s     
 ##//A&AAr   c                     Ub  US:X  a  U R                   " U4SU0UD6$ US:X  a.  [        UR                  5       5      nU R                  USS9nUS   $ [	        U R
                  R                   SU S35      e)	a  
Post-process the output of a multimodal model to return the requested modality output.
If the model cannot generated the requested modality, an error will be raised.

Args:
    generated_outputs (`torch.Tensor` or `np.ndarray`):
        The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
        or `(sequence_length,)`.
    skip_special_tokens (`bool`, *optional*, defaults to `True`):
        Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
    generation_mode (`str`, *optional*):
        Generation mode indicated which modality to output and can be one of `["text", "image", "audio"]`.
    **kwargs:
        Additional arguments to be passed to the tokenizer's `batch_decode method`.

Returns:
    `list[Union[str, PIL.Image.Image]]`: The decoded text or generated image.
r"   skip_special_tokensrG   zPIL.Image.Image)r%   rI   z# got an unexpected generation_mode=z.. Supported options are only `text` and `image)post_process_image_text_to_textrN   floatr\   rM   r;   r   )r7   generated_outputsr_   r   r:   r=   s         r   post_process_multimodal_output-JanusProcessor.post_process_multimodal_output   s    * "o&?77!7JNT  ' $%6%<%<%> ?%%&7HY%ZF.)) >>**++NN_  `N  O r   )r3   r1   r/   r.   r4   )NF)NN)TN)r   r   r   r   r6   r   r
   r	   rN   r   r   r   r   rY   r\   rc   r   __classcell__)r;   s   @r   r*   r*   5   s    R  [_$(3'++d9o=EV@WW3' T!3' -.	3'
 
3' 3'jB* B LP" "r   r*   N)r   feature_extraction_utilsr   image_utilsr   processing_utilsr   r   r   r   tokenization_utils_baser	   r
   utilsr   r   
get_loggerr   loggerrS   r   r   r*   __all__r   r   r   <module>rn      s    5 % T T C , 
		H	%N j +5  m^ m m` 
r   