
    Z j                         S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	J
r
Jr  SSKJrJr  SSKJr  \R"                  " \5      r " S	 S
\
SS9r " S S\SS9r\ " S S\	5      5       rS/rg)z
Processor class for UDOP.
    )logging   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixin
TextKwargsUnpack)PreTokenizedInput	TextInput)auto_docstringc                   r    \ rS rSr% \\   \\\      -  S-  \S'   \\\      \\\\         -  S-  \S'   Srg)UdopTextKwargs   Nword_labelsboxes )__name__
__module____qualname____firstlineno__listint__annotations____static_attributes__r       y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/udop/processing_udop.pyr   r      sC    cT$s)_,t33S	?T$tCy/22T99r   r   F)totalc                   <    \ rS rSr% \\S'   SSSSSSSSSSS.	0rSrg)	UdopProcessorKwargs#   text_kwargsTFr   )	add_special_tokenspadding
truncationstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverboser   N)r   r   r   r   r   r   	_defaultsr   r   r   r   r    r    #   s2    "&).*/&+"

Ir   r    c            
          ^  \ rS rSrSrU 4S jr\  SS\S-  S\\	-  \
\   -  \
\	   -  S\\   S\4S	 jj5       rS
 r\S 5       rSrU =r$ )UdopProcessor4   aK  
Constructs a UDOP processor which combines a LayoutLMv3 image processor and a UDOP tokenizer into a single processor.

[`UdopProcessor`] offers all the functionalities you need to prepare data for the model.

It first uses [`LayoutLMv3ImageProcessor`] to resize, rescale and normalize document images, and optionally applies OCR
to get words and normalized bounding boxes. These are then provided to [`UdopTokenizer`],
which turns the words and bounding boxes into token-level `input_ids`, `attention_mask`, `token_type_ids`, `bbox`.
Optionally, one can provide integer `word_labels`, which are turned into token-level `labels` for token
classification tasks (such as FUNSD, CORD).

Additionally, it also supports passing `text_target` and `text_pair_target` to the tokenizer, which can be used to
prepare labels for language modeling tasks.
c                 $   > [         TU ]  X5        g )N)super__init__)selfimage_processor	tokenizer	__class__s      r   r2   UdopProcessor.__init__E   s    4r   Nimagestextkwargsreturnc                 |   U R                   " [        4SU R                  R                  0UD6nUS   R	                  SS 5      nUS   R	                  SS 5      nUS   R	                  SS 5      nUS   R                  SS5      nUS   R                  SS5      n	US   R                  S	S 5      n
U R                  R                  (       a  Ub  [        S
5      eU R                  R                  (       a  Ub  [        S5      eU(       a  U	(       d  [        S5      eU
b  U R                  " S0 US   D6$ U R                  " SSU0US   D6nUR	                  SS 5      nUR	                  SS 5      nUS   R	                  S	S 5        US   R	                  SS 5        XtS   S'   Ub  UOUUS   S'   XdS   S'   Ub=  U R                  R                  (       a"  Uc  [        U[        5      (       a  U/nXS   S'   U R                  " SSUb  UOU0US   D6nUSL a  U R                  US   US   5      US'   UR                  U5        U$ )Ntokenizer_init_kwargsr"   r   r   	text_pairr'   Fr)   text_targetzdYou cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.zaYou cannot provide word labels if you initialized the image processor with apply_ocr set to True.zKYou cannot return overflowing tokens without returning the offsets mapping.r8   images_kwargswordstext_pair_targetr9   Tpixel_valuesoverflow_to_sample_mappingr   )_merge_kwargsr    r5   init_kwargspopgetr4   	apply_ocr
ValueError
isinstancestrget_overflowing_imagesupdate)r3   r8   r9   r:   output_kwargsr   r   r>   r'   r)   r?   featuresfeatures_wordsfeatures_boxesencoded_inputss                  r   __call__UdopProcessor.__call__H   s    **
"&.."<"<
 
 m,00$?#M266}dK!-044[$G	$1-$@$D$DE`bg$h!!.}!=!A!ABZ\a!b#M266}dK))u/@v  )){/Fs  %-Cjkk">> .  ++\6\]?=[\H%\\'48N%\\'48N-(,,]DA-(,,-?F8A-(5=B=NETbM-(1:E-(7 D$8$8$B$ByGXdC(( 6D<Jm,[9!^^ !-T>.N )D0+/+F+F^,n=Y.Z,( OON+Or   c                     / nU H  nUR                  X   5        M     [        U5      [        U5      :w  a#  [        S[        U5       S[        U5       35      eU$ )Nz`Expected length of images to be the same as the length of `overflow_to_sample_mapping`, but got z and )appendlenrJ   )r3   r8   rD   images_with_overflow
sample_idxs        r   rM   $UdopProcessor.get_overflowing_images   sr    !4J ''(:; 5 #$,F(GG,-.eC8R4S3TV 
 $#r   c                 |    U R                   R                  nU R                  R                  n[        X-   S/-   5      $ )Nbbox)r5   model_input_namesr4   r   )r3   tokenizer_input_namesimage_processor_input_namess      r   r^   UdopProcessor.model_input_names   s:     $ @ @&*&:&:&L&L#)G6(RSSr   r   )NN)r   r   r   r   __doc__r2   r   r   r   r   r   r
   r    r   rT   rM   propertyr^   r   __classcell__)r6   s   @r   r.   r.   4   s    5  %)Z^FT!F ++d9o=EV@WWF ,-	F
 
F FR$ T Tr   r.   N)rb   transformersr   image_processing_utilsr   image_utilsr   processing_utilsr   r   r	   r
   tokenization_utils_baser   r   utilsr   
get_loggerr   loggerr   r    r.   __all__r   r   r   <module>rn      s|    ! 2 % T T C # 
		H	%:Zu :
*% " pTN pT pTf 
r   