
    Z jw               
       Z   S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
JrJr  SSKJrJrJrJrJr  SSKJrJr  SS	KJrJrJrJrJr  \" 5       (       a  SSKrSS
KJr  \R@                  " \!5      r"S r#  SSSS\$S-  S\$S-  S\$\	-  S-  4S jjr% " S S\SS9r&\ " S S\5      5       r'S/r(g)z%Image processor class for LayoutLMv3.    N   )TorchvisionBackend)BatchFeature)ChannelDimensiongroup_images_by_shapereorder_imagesto_pil_image)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STD
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringis_pytesseract_availableloggingrequires_backends)
functionalc                     [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      [        SU S   U-  -  5      /$ )Ni  r         r   )int)boxwidthheights      ڋ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/layoutlmv3/image_processing_layoutlmv3.pynormalize_boxr   0   s`    DCFUN#$DCFVO$%DCFUN#$DCFVO$%	     imageznp.ndarray | torch.Tensorlangtesseract_configinput_data_formatc                    [        [        S/5        [        U S5      (       a  U R                  5       R	                  5       n O5[        U [        R                  5      (       d  [        R                  " U 5      n Ub  UOSn[        XS9nUR                  u  pV[        R                  " XASUS9nUS   US   US	   US
   US   4u  ppn[        U5       VVs/ s H  u  pUR                  5       (       a  M  UPM      nnn[        U5       VVs/ s H  u  pX;  d  M  UPM     nnn[        U	5       VVs/ s H  u  nnX;  d  M  UPM     n	nn[        U
5       VVs/ s H  u  nnX;  d  M  UPM     n
nn[        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn[        U5       VVs/ s H  u  nnX;  d  M  UPM     nnn/ n[        XX5       H%  u  nnnnUUUU-   UU-   /nUR!                  U5        M'     / nU H  nUR!                  [#        UXV5      5        M      [%        U5      [%        U5      :X  d   S5       eUU4$ s  snnf s  snnf s  snnf s  snnf s  snnf s  snnf )zdApplies Tesseract OCR on a document image, and returns recognized words + normalized bounding boxes.pytesseractcpu r$   dict)r"   output_typeconfigtextlefttopr   r   z-Not as many words as there are bounding boxes)r   apply_tesseracthasattrr'   numpy
isinstancenpndarrayarrayr	   sizer&   image_to_data	enumeratestripzipappendr   len)r!   r"   r#   r$   	pil_imageimage_widthimage_heightdatawordsr.   r/   r   r   idxwordirrelevant_indicescoordactual_boxesxywh
actual_boxnormalized_boxesr   s                            r   r0   r0   9   s;    o7 ue		!!#rzz**+;+G'R UHI )K$$YvVfgD&*6lDL$u+tT[}^bck^l&l#EV 09/?T/?)#tzz|#/?T#,U#3U#3ics7TT#3EU$-dOUOjc5s7TEODU#,S>
S>ZS%S5R5>C
S%.u%5W%5zsE9VU%5EW&/&7Y&7
U3;Xe&7FY L$U3
1aAE1q5)
J' 4
 c; MN  u:-.._0__.""") UUU
SWYsH   H=2H=	II/I	?I	I&I=II$I4Ic                   D    \ rS rSr% Sr\\S'   \S-  \S'   \S-  \S'   Srg)LayoutLMv3ImageProcessorKwargsh   a  
apply_ocr (`bool`, *optional*, defaults to `True`):
    Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes. Can be overridden by
    the `apply_ocr` parameter in the `preprocess` method.
ocr_lang (`str`, *optional*):
    The language, specified by its ISO code, to be used by the Tesseract OCR engine. By default, English is
    used. Can be overridden by the `ocr_lang` parameter in the `preprocess` method.
tesseract_config (`str`, *optional*):
    Any additional custom configuration flags that are forwarded to the `config` parameter when calling
    Tesseract. For example: '--psm 6'. Can be overridden by the `tesseract_config` parameter in the
    `preprocess` method.
	apply_ocrNocr_langr#    )	__name__
__module____qualname____firstlineno____doc__bool__annotations__str__static_attributes__rS   r    r   rO   rO   h   s"     ODjDj r    rO   F)totalc            #       H  ^  \ rS rSr\r\R                  r\	r
\rSSS.rSrSrSrSrSrSrS\\   4U 4S jjr\S	\S\\   S
\4U 4S jj5       r   SS	\S   S\S\SSS\S\S\S\S\S\\\   -  S-  S\\\   -  S-  S\S-  S\\-  S-  S\S\S-  S\S-  S
\4"S jjr Sr!U =r"$ ) LayoutLMv3ImageProcessor{      )r   r   TNr(   kwargsc                 &   > [         TU ]  " S0 UD6  g )NrS   )super__init__)selfrb   	__class__s     r   re   !LayoutLMv3ImageProcessor.__init__   s    "6"r    imagesreturnc                 &   > [         TU ]  " U40 UD6$ )N)rd   
preprocess)rf   ri   rb   rg   s      r   rl   #LayoutLMv3ImageProcessor.preprocess   s    w!&3F33r    ztorch.Tensor	do_resizer7   resamplez7PILImageResampling | tvF.InterpolationMode | int | Nonedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stddisable_groupingreturn_tensorsrQ   rR   r#   c           	         U(       a  [        U S5        / n/ nU Hv  nUR                  (       a  [        R                  S5        [	        UR                  5       UU[        R                  S9u  nnUR                  U5        UR                  U5        Mx     [        XS9u  nn0 nUR                  5        H"  u  nnU(       a  U R                  UX4S9nUUU'   M$     [        UU5      n[        UUS9u  nn0 nUR                  5        H8  u  nnU(       a  U R                  UU5      nU R                  UXxXU5      nUUU'   M:     [        UU5      n[        SU0US9nU(       a
  WUS'   WUS	'   U$ )
Nr&   z]apply_ocr can only be performed on cpu. Tensors will be transferred to cpu before processing.r)   )rw   )r!   r7   ro   pixel_values)rA   tensor_typerB   boxes)r   is_cudaloggerwarning_oncer0   r'   r   FIRSTr<   r   itemsresizer   center_croprescale_and_normalizer   ) rf   ri   rn   r7   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   rQ   rR   r#   rb   words_batchboxes_batchr!   rB   r|   grouped_imagesgrouped_images_indexresized_images_groupedshapestacked_imagesresized_imagesprocessed_images_groupedprocessed_imagesrA   s                                    r   _preprocess$LayoutLMv3ImageProcessor._preprocess   s   * dM2KK==''w  /IIK+;O_OeOe u ""5)""5)   0EV/o,,!#%3%9%9%;!E>!%>!`,:"5) &< ((>@TU 0E^fv/w,,#% %3%9%9%;!E>!%!1!1.)!L!77
LV_N /=$U+ &< **BDXY.2B!CQ_`'DM'DMr    rS   )TNN)#rT   rU   rV   rW   rO   valid_kwargsr   BILINEARro   r
   ru   r   rv   r7   rn   rr   rt   rQ   rR   r#   r   re   r   r   r   rl   listrY   r   floatr[   r   r   r\   __classcell__)rg   s   @r   r_   r_   {   s   1L!**H'J%IC(DIJLIH#(F!G # 4 4v>\7] 4bn 4 4$ #'+#B^$B B 	B
 LB B B B B B DK'$.B 4;&-B +B j(4/B B  *!B" *#B& 
'B Br    r_   )NN))rX   r2   r4   torchimage_processing_backendsr   image_processing_utilsr   image_transformsr   r   r   r	   image_utilsr
   r   r   r   r   processing_utilsr   r   utilsr   r   r   r   r   r&   torchvision.transforms.v2r   tvF
get_loggerrT   r~   r   r[   r0   rO   r_   __all__rS   r    r   <module>r      s    ,   ; 2 e e  5   7 
		H	% $(7;	,#&,#
*,# Dj,# --4	,#^!\ !& V1 V Vr &
&r    