
    Z j                         S r SSKrSSKJr  SSKJrJrJr  SSKJ	r	J
r
  SSKJrJr   " S S	\S
S9r\R                  " \5      r\ " S S\5      5       rS/rg)z
Processor class for Donut.
    N   )
ImageInput)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)auto_docstringloggingc                       \ rS rSr0 rSrg)DonutProcessorKwargs    N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/donut/processing_donut.pyr   r      s    Ir   r   F)totalc            	          ^  \ rS rSrSU 4S jjr\  SS\S-  S\\\   -  \	-  \
-  S-  S\\   4S jj5       r\S 5       rSS	 jrS
rU =r$ )DonutProcessor!   Nc                 $   > [         TU ]  X5        g )N)super__init__)selfimage_processor	tokenizerkwargs	__class__s       r   r   DonutProcessor.__init__#   s    4r   imagestextr"   c                 J   Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6nUb  U R
                  " U40 US   D6nUb.  Ub  US   R                  SS5        U R                  " U40 US   D6nUc  W$ Uc  W$ WS   WS'   US   US'   U$ )	NzBYou need to specify either an `images` or `text` input to process.tokenizer_init_kwargsimages_kwargstext_kwargsadd_special_tokensF	input_idslabels)
ValueError_merge_kwargsr   r!   init_kwargsr    
setdefault)r   r%   r&   r"   output_kwargsinputs	encodingss          r   __call__DonutProcessor.__call__&   s     >dlabb** 
"&.."<"<
 
 ))&SM/4RSF!m,778LeTtL}]/KLI<M^(5F8"+K"8F;Mr   c                 N    U R                   R                  n[        USS/-   5      $ )Nr,   r-   )r    model_input_nameslist)r   image_processor_input_namess     r   r8    DonutProcessor.model_input_namesF   s*    &*&:&:&L&L#/;2IIJJr   c                 n   Uc  U R                   R                  5       n0 nU(       Gao  [        R                  " SU[        R                  5      nUc  GODXR                  5       S nSU;  a  GO*USUR                  S5      S-    nU[        S5      [        S5      *  n[        R                  " U5      n[        R                  " SU S3U[        R                  5      n	U	c  UR                  US5      nGOU	R                  5       n	[        R                  " U5      n
[        R                  " U	5      n[        R                  " U
 SU 3U[        R                  [        R                  -  5      nUb  UR                  S5      R                  5       nSU;   a7  SU;   a1  U R                  USUS	9nU(       a  [        U5      S:X  a  US
   nXU'   Ou/ XG'   UR                  S5       HB  nUR                  5       nX;   a  US
   S:X  a  USS S:X  a  USS nXG   R                  U5        MD     [        XG   5      S:X  a	  XG   S
   XG'   XR!                  U	5      [        U	5      -   S R                  5       nUSS S:X  a  U/U R                  USS SUS	9-   $ U(       a  GMo  U(       a  U(       a  U/$ U$ U(       a  / $ SU0$ )zC
Convert a (generated) token sequence into an ordered JSON format.
Nz<s_>   z</s_ z(.*?)T)is_inner_valueadded_vocabr   z<sep/><z/>   text_sequence)r!   get_added_vocabresearch
IGNORECASEstartindexlenescapereplacegroupDOTALLstrip
token2jsonsplitappendfind)r   tokensr@   rA   outputpotential_startstart_tokenkeykey_escaped	end_tokenstart_token_escapedend_token_escapedcontentvalueleafs                  r   rR   DonutProcessor.token2jsonL   s    ..88:K iiFO& !6!6!8!:;K+%%&B(9(9#(>(BCKc%jCH95C))C.K		T+a"8&"--PI R8%OO-	&(ii&<#$&IIi$8!))*+51B0CDfbmm^`^g^gNg &%mmA.446G(W-? $Ze f "5zQ(-a*/3K&($+MM)$<D#'::<D#2tAw#~$rs)W[J['+Abz"K..t4	 %=
 v{+q0*0+a.FKI 6Y G IJPPR"1:*"8doofQRjQUcno&oooU fX -F8969'2Fov-FFr   r   )NN)FN)r   r   r   r   r   r
   r   strr9   r	   r   r   r   r5   propertyr8   rR   r   __classcell__)r#   s   @r   r   r   !   s    5  %)GKT! DIo	),==D -.	 > K K
8G 8Gr   r   )__doc__rG   image_utilsr   processing_utilsr   r   r   tokenization_utils_baser   r	   utilsr
   r   r   
get_loggerr   loggerr   __all__r   r   r   <module>rn      sk    
 % H H C ,+5  
		H	% bG^ bG bGJ 
r   