
    Z jM                     Z   S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
JrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJrJr  SSKJr  SSKJrJrJrJrJ r   SSK!J"r"J#r#J$r$  \ RJ                  " \&5      r' " S S\RP                  5      r) " S S\RP                  5      r* " S S\RP                  5      r+ " S S\RP                  5      r, " S S\5      r- " S S\RP                  5      r.\ " S S\5      5       r/\ " S S\/5      5       r0 " S  S!\RP                  5      r1 " S" S#\RP                  5      r2 " S$ S%\RP                  5      r3 " S& S'\RP                  5      r4 " S( S)\RP                  5      r5 " S* S+\5      r6\" S,S-9 " S. S/\/5      5       r7\" S0S-9 " S1 S2\/\5      5       r8/ S3Qr9g)4zPix2Struct modeling file    N)nn   )initialization)ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPooling!CausalLMOutputWithCrossAttentionsSeq2SeqLMOutputSeq2SeqModelOutput)PreTrainedModel)DUMMY_INPUTS
DUMMY_MASKauto_docstringis_torchdynamo_compilinglogging   )Pix2StructConfigPix2StructTextConfigPix2StructVisionConfigc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Pix2StructLayerNorm3   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)zS
Construct a layernorm module in the T5 style. No bias and no subtraction of mean.
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pix2struct/modeling_pix2struct.pyr!   Pix2StructLayerNorm.__init__4   s/     	ll5::k#:; #    c                    UR                  [        R                  5      R                  S5      R	                  SSS9nU[        R
                  " X R                  -   5      -  nU R                  R                  [        R                  [        R                  4;   a%  UR                  U R                  R                  5      nU R                  U-  $ )N   T)keepdim)tor#   float32powmeanrsqrtr&   r%   dtypefloat16bfloat16)r'   hidden_statesvariances      r+   forwardPix2StructLayerNorm.forward<   s     !##EMM266q9>>r4>P%H?T?T4T(UU ;; ??),,T[[->->?M{{]**r-   )r&   r%   )gư>__name__
__module____qualname____firstlineno__r!   r<   __static_attributes____classcell__r*   s   @r+   r   r   3   s    $+ +r-   r   c                   r   ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
Pix2StructVisionEmbeddingsL   a  
Construct the embeddings from patch. In `Pix2Struct` the input is different from classic Vision-transformer models.
Here the input is a sequence of `seq_len` flattened patches that also combines padding patches (tokens). Each patch
is represented by a vector of `hidden_size` values.
configreturnNc                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l
        [        R                  " UR                  5      U l        g N)r    r!   r   Linearpatch_embed_hidden_sizer(   patch_projection	Embeddingseq_lenrow_embeddercolumn_embedderDropoutdropout_ratedropoutr'   rI   r*   s     r+   r!   #Pix2StructVisionEmbeddings.__init__S   s}     "		&*H*H&J\J\ ]LL9K9KL!||FNNF<N<NOzz&"5"56r-   flattened_patchesc                     US S 2S S 2S4   R                  5       nUS S 2S S 2S4   R                  5       nUS S 2S S 2SS 24   nU R                  U5      nU R                  U5      nU R                  U5      nXE-   U-   nU R	                  U5      nU$ )Nr   r   r/   )longrO   rR   rS   rV   )r'   rY   row_indicescol_indices
embeddingsrow_embeddingscol_embeddingss          r+   r<   "Pix2StructVisionEmbeddings.forward\   s     (1a0557'1a0557-aABh7**+<=
**;7--k:  0>A
\\*-
r-   )rS   rV   rO   rR   )r?   r@   rA   rB   __doc__r   r!   r#   Tensorr<   rC   rD   rE   s   @r+   rG   rG   L   s<    7/ 7D 7 %,,  r-   rG   c                   8   ^  \ rS rSrU 4S jr   SS jrSrU =r$ )Pix2StructVisionAttentionp   c                 l  > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        UR                  U l        U R                  U R                  -  U l	        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        SU l        g NFbias)r    r!   r(   d_kvkey_value_proj_dimnum_attention_headsn_headsattention_dropoutrV   	inner_dimr   rM   querykeyvalueoutputgradient_checkpointingrW   s     r+   r!   "Pix2StructVisionAttention.__init__q   s    !--"(++11//(?(??YYt//eL
99T--t~~EJYYt//eL
ii0@0@uM&+#r-   c                 @  ^ ^ UR                   SS u  mnUU 4S jnU" T R                  U5      5      nU" T R                  U5      5      nU" T R                  U5      5      n	[        R
                  " XxR                  SS5      5      n
UGc  [        R                  " ST R                  XU4U
R                  U
R                  S9nT R                  (       a  T R                  (       a  SUl        UR                  5       S:X  a)  X2SS2SSSS24   R                  UR                  5      -   nOyUb  X2R                  UR                  5      -   nOX[!        5       (       dI  [        R"                  " TU4UR                  UR                  S9nX2R                  UR                  5      -   nSU-
  nUR%                  US:H  [        R&                  " U
R                  5      R(                  5      nX-  n
[        R*                  " U
[        R,                  " [        R&                  " U
R                  5      R(                  5      5      n
[.        R0                  R3                  U
S[        R4                  S	9R7                  U
5      n[.        R0                  R9                  UT R8                  T R                  S
9n[        R
                  " X5      nUR                  SS5      R;                  5       R=                  TST R>                  5      nT RA                  U5      nU4U4-   nU(       a  X4-   nU$ )z
Self-attention block
Nr/   c                    > U R                  5       R                  TSTR                  TR                  5      R	                  SS5      $ )
projectionr0   r   r/   )
contiguousviewrn   rl   	transpose)states
batch_sizer'   s    r+   to_projection_shape>Pix2StructVisionAttention.forward.<locals>.to_projection_shape   s<    $$&++JDLL$JaJabllmnpqrrr-   r   r   devicer7   Tr0   )dimr7   ptraining)!shaperq   rr   rs   r#   matmulr|   zerosrn   r   r7   ru   r   requires_gradr   r2   r   r$   masked_fillfinfominmaxtensorr   
functionalsoftmaxr3   type_asrV   rz   r{   rp   rt   )r'   r:   attention_maskposition_biasoutput_attentions
seq_lengthr   query_states
key_statesvalue_statesscoresposition_bias_maskedattn_weightsattn_outputoutputsr~   s   `              @r+   r<   !Pix2StructVisionAttention.forward   s    "/!4!4Ra!8
J	s +4::m+DE )-)@A
*4::m+DE l,@,@A,FG !KKDLL*9&--W]WcWcM **t}}.2+!!#q( -q$a?O0P0S0STaThTh0i i+ -0A0A-BVBV0W W-//!&,]5I5IQ^QdQd" !.0A0A-BVBV0W W-M,88!9KU[[Y_YeYeMfMjMjk&65<<FLL0I0M0M#NO }},,V5==,QYYZ`a }},,\T\\TXTaTa,bll<> "++Aq1<<>CCJPRTXTbTbckk+..M#33/Gr-   )
rV   ru   r(   rp   rr   rl   rn   rt   rq   rs   )NNFr>   rE   s   @r+   re   re   p   s    ,$ G Gr-   re   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pix2StructVisionMlp   rI   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rh   r    r!   r   rM   r(   d_ffwi_0wi_1worT   rU   rV   r   dense_act_fnactrW   s     r+   r!   Pix2StructVisionMlp.__init__       IIf00&++EJ	IIf00&++EJ	))FKK););%Hzz&"5"56&--.r-   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ rL   r   r   r   rV   
isinstancer   r%   r#   rc   r7   int8r2   r'   r:   hidden_geluhidden_linears       r+   r<   Pix2StructVisionMlp.forward       hhtyy78		-0#3]3 tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r-   r   rV   r   r   r   )	r?   r@   rA   rB   r   r!   r<   rC   rD   rE   s   @r+   r   r      s    /5 / r-   r   c                      ^  \ rS rSrS\SS4U 4S jjr  SS\R                  S\R                  S-  S\S\	\R                  \R                  4   \	\R                     -  4S	 jjr
S
rU =r$ )Pix2StructVisionLayer   rI   rJ   Nc                   > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        g )Nr   r)   )r    r!   chunk_size_feed_forwardseq_len_dimre   	attentionr   mlpr   r(   layer_norm_epspre_mlp_layer_normpre_attention_layer_normrW   s     r+   r!   Pix2StructVisionLayer.__init__   ss    '-'E'E$26:&v."5f6H6HfNcNc"d(;F<N<NTZTiTi(j%r-   r:   r   r   c                     UnU R                  U5      nU R                  UUUS9nUS   nUSS  nXd-   nU R                  U5      nU R                  U5      U-   nU4U-   nU$ )N)r   r   r   r   )r   r   r   r   )	r'   r:   r   r   residualself_attention_outputsattention_outputr   layer_outputs	            r+   r<   Pix2StructVisionLayer.forward   s     ! 55mD!%)/ "0 "

 2!4(, )3 ..}=xx-=/G+r-   )r   r   r   r   r   r   NF)r?   r@   rA   rB   r   r!   r#   rc   booltupler<   rC   rD   rE   s   @r+   r   r      s    k/ kD k /3"'	|| t+  	
 
u||U\\)	*U5<<-@	@ r-   r   c                      ^  \ rS rSrS\SS4U 4S jjr    SS\R                  S\R                  S-  S\S	\S
\S\	\
-  4S jjrSrU =r$ )Pix2StructVisionEncoderi  rI   rJ   Nc                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf r   )
r    r!   rI   r   
ModuleListrangenum_hidden_layersr   layerru   )r'   rI   _r*   s      r+   r!    Pix2StructVisionEncoder.__init__  sT    ]]5QWQiQiKj#kKja$9&$AKj#kl
&+# $ls   A&r:   r   r   output_hidden_statesreturn_dictc                 $   U(       a  SOS nU(       a  SOS n[        U R                  5       H0  u  pU(       a  Xa4-   nU	" XU5      n
U
S   nU(       d  M(  XzS   4-   nM2     U(       a  Xa4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )N r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7frL   r   .0vs     r+   	<genexpr>2Pix2StructVisionEncoder.forward.<locals>.<genexpr>2  s     m$[q$[s   	last_hidden_stater:   
attentions)	enumerater   r   r   )r'   r:   r   r   r   r   all_hidden_statesall_self_attentionsilayer_modulelayer_outputss              r+   r<   Pix2StructVisionEncoder.forward  s     #7BD$5b4(4OA#$58H$H!(HYZM)!,M  &91=M<O&O#  5   14D Dm]GZ$[mmm++*
 	
r-   )rI   ru   r   )NFFT)r?   r@   rA   rB   r   r!   r#   rc   r   r   r   r<   rC   rD   rE   s   @r+   r   r     s{    ,5 ,$ , /3"'%* 
||
 t+
  	

 #
 
 
	 
 
r-   r   c                   l    \ rS rSr% \\S'   SrSr\S 5       r	\
R                  " 5       S 5       rS rSrg	)
Pix2StructPreTrainedModeli:  rI   )imagetextFc                 z    [         R                  " [        5      n[         R                  " [        5      nUUUS.nU$ )N)decoder_input_ids	input_idsdecoder_attention_mask)r#   r   r   r   )r'   r   
input_maskdummy_inputss       r+   r   &Pix2StructPreTrainedModel.dummy_inputsA  s6    LL.	\\*-
!*"&0

 r-   c                    U R                   R                  n[        U[        5      (       a%  [        R
                  " UR                  US-  5        g[        U[        5      (       GaN  [        U R                   [        5      (       a   U R                   R                  R                  OU R                   R                  n[        U R                   [        5      (       a   U R                   R                  R                  OU R                   R                  n[        R                  " UR                  R                  SX#S-  -  S9  [        UR                  S5      (       aA  UR                  R                  b*  [        R                   " UR                  R                  5        [        R                  " UR"                  R                  SX#S-  -  S9  [        UR"                  S5      (       aA  UR"                  R                  b*  [        R                   " UR"                  R                  5        [        R                  " UR$                  R                  SX$S-  -  S9  [        UR$                  S5      (       aC  UR$                  R                  b+  [        R                   " UR$                  R                  5        ggg[        U[&        5      (       Ga  [        U R                   [        5      (       a   U R                   R                  R                  OU R                   R                  n[        U R                   [        5      (       a   U R                   R                  R(                  OU R                   R                  n[        U R                   [        5      (       a   U R                   R                  R*                  OU R                   R*                  n[        R                  " UR,                  R                  SX#U-  S-  -  S9  [        R                  " UR.                  R                  SX#S-  -  S9  [        R                  " UR0                  R                  SX#S-  -  S9  [        R                  " UR2                  R                  SX&U-  S-  -  S9  UR4                  (       a0  [        R                  " UR6                  R                  SX#S-  -  S9  gg[        U[8        R:                  5      (       a  [        U R                   [        5      (       a   U R                   R                  R                  OU R                   R                  n[        R                  " UR                  SX#S-  -  S9  UR<                  bK  [?        UR                  SS5      (       d.  [        R                   " UR                  UR<                     5        ggg[        U[@        5      (       a  [        U R                   [        5      (       a   U R                   R                  R                  OU R                   R                  n[        R                  " URB                  R                  SX#S-  -  S9  g[        U[8        RD                  [8        RF                  45      (       ac  [        RH                  " UR                  SU R                   RJ                  S9  UR                  b!  [        R                   " UR                  5        gg[        U[        5      (       a/  UR                  b!  [        RL                  " UR                  5        gg[        U[8        R:                  5      (       a  [        R                  " UR                  SU R                   RJ                  S9  UR<                  bK  [?        UR                  SS5      (       d.  [        R                   " UR                  UR<                     5        gggg)	zInitialize the weights      ?g        g      )r5   stdrj   N_is_hf_initializedF)'rI   initializer_factorr   r   init	constant_r%    Pix2StructTextDenseGatedActDenser   text_configr(   r   normal_r   hasattrrj   zeros_r   r   Pix2StructTextAttentionrk   	num_headsrq   rr   rs   rt   has_relative_attention_biasrelative_attention_biasr   rP   padding_idxgetattrPix2StructTextModellm_headrM   Conv2dtrunc_normal_initializer_rangeones_)r'   modulefactorr(   r   rl   rn   s          r+   _init_weights'Pix2StructPreTrainedModel._init_weightsL  sl    //f122NN6==&3,7 @AA dkk+;<< ''33[[,, 
 4>dkkK[3\3\4;;**//bfbmbmbrbrDLL++#6VZEZ;[\v{{F++0@0@0LFKK,,-LL++#6VZEZ;[\v{{F++0@0@0LFKK,,-LL))T>9RSvyy&))fiinn.HFIINN+ /I) 788 dkk+;<< ''33[[,,  1;4;;HX0Y0Y'',,_c_j_j_v_v 
 dkk+;<< ''11[[**  LL,,3FUgGglpFp<qrLL**&QUDU:VWLL,,3FSWFW<XYLL--CVRdHdimGm=no11V;;BBRXmq\qRrs 2-- dkk+;<< ''33[[,,  LLSfQU@U6VW!!-gfmmMach6i6iFMM&*<*<=> 7j- 344 dkk+;<< ''33[[,,  LL..SfY]H]>^_BII 677v}}3DKK<Y<YZ{{&FKK( ' 344}}(

6==) )--LLSdkk6S6ST!!-gfmmMach6i6iFMM&*<*<=> 7j- .r-   c                 :   U R                   R                  nU R                   R                  nUc  [        S5      eUR	                  UR
                  5      nUSS S24   R                  5       USSS 24'   X$S'   Uc  [        S5      eUR                  US:H  U5        U$ )Nzself.model.config.decoder_start_token_id has to be defined. In Pix2Struct it is usually set to the pad_token_id. See Pix2Struct docs for more information..r0   r   ).r   z1self.model.config.pad_token_id has to be defined.)rI   decoder_start_token_idpad_token_id
ValueError	new_zerosr   clonemasked_fill_)r'   r   r  r  shifted_input_idss        r+   _shift_right&Pix2StructPreTrainedModel._shift_right  s    !%!C!C{{//!)< 
 &//	@%.sCRCx%8%>%>%@#qr'"$:&!PQQ&&'8D'@,O  r-   r   N)r?   r@   rA   rB   r   __annotations__input_modalities_can_compile_fullgraphpropertyr   r#   no_gradr  r  rC   r   r-   r+   r   r   :  sH    ("  ]]_I? I?X!r-   r   c                      ^  \ rS rSr% \\S'   SrSrSrS/r	S\4U 4S jjr
S r\     SS\R                  S	-  S
\R                  S	-  S\S	-  S\S	-  S\S	-  S\\-  4S jj5       rSrU =r$ )Pix2StructVisionModeli  rI   rY   )r   Tr   c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        UR                  UR                  S9U l
        U R                  5         g Nr   )r    r!   rI   rG   r^   r   encoderr   r(   r   	layernorm	post_initrW   s     r+   r!   Pix2StructVisionModel.__init__  sS     4V<.v6,V-?-?VEZEZ[ 	r-   c                 .    U R                   R                  $ rL   )r^   rO   r'   s    r+   get_input_embeddings*Pix2StructVisionModel.get_input_embeddings  s    ///r-   Nr   r   r   r   rJ   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eUc   UR                  SS9S:g  R                  5       nU R                  U5      nU R                  UUUUUS9nUS   n	U R                  U	5      n	U(       d
  U	4n
XSS -   $ [        U	UR                  UR                  S9$ )	a\  
flattened_patches (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_channels x patch_height x patch_width)`):
    Flattened and padded pixel values. These values can be obtained using [`AutoImageProcessor`]. See
    [`Pix2StructVisionImageProcessor.__call__`] for details. Check the [original
    paper](https://huggingface.co/papers/2210.03347) (figure 5) for more details.

Example:

```python
>>> import httpx
>>> from io import BytesIO
>>> from PIL import Image
>>> from transformers import AutoProcessor, Pix2StructVisionModel

>>> image_processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base")
>>> model = Pix2StructVisionModel.from_pretrained("google/pix2struct-textcaps-base")

>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
...     outputs = model(**inputs)

>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 2048, 768]
```
Nz%You have to specify flattened_patchesr0   r   r   )r   r   r   r   r   r   )rI   r   r   r   r  sumfloatr^   r%  r&  r   r:   r   )r'   rY   r   r   r   r   kwargsembedding_outputencoder_outputssequence_outputhead_outputss              r+   r<   Pix2StructVisionModel.forward  s   P 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY$DEE!/333;q@GGIN??+<=,,)/!5# ' 
 *!,..9+-L!""555-)77&11
 	
r-   )rI   r^   r%  r&  )NNNNN)r?   r@   rA   rB   r   r  main_input_namer  supports_gradient_checkpointing_no_split_modulesr!   r+  r   r#   rc   r   r   r   r<   rC   rD   rE   s   @r+   r"  r"    s    "")O!&*#01
5 
0  26.2)-,0#'H
 <<$.H
 t+H
  $;	H

 #TkH
 D[H
 
+	+H
 H
r-   r"  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )r   i  rI   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rh   r   rW   s     r+   r!   )Pix2StructTextDenseGatedActDense.__init__  r   r-   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ rL   r   r   s       r+   r<   (Pix2StructTextDenseGatedActDense.forward  r   r-   r   	r?   r@   rA   rB   r   r!   r<   rC   rD   rE   s   @r+   r   r     s    /3 / r-   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pix2StructTextLayerFFi0  rI   c                    > [         TU ]  5         [        U5      U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r$  )r    r!   r   DenseReluDenser   r(   layer_norm_epsilon
layer_normr   rT   rU   rV   rW   s     r+   r!   Pix2StructTextLayerFF.__init__1  sK    >vF-f.@.@fF_F_`zz&"5"56r-   c                 p    U R                  U5      nU R                  U5      nXR                  U5      -   nU$ rL   )rE  rC  rV   )r'   r:   forwarded_statess      r+   r<   Pix2StructTextLayerFF.forward9  s;    ??=9../?@%5E(FFr-   )rC  rV   rE  r?  rE   s   @r+   rA  rA  0  s    73 7 r-   rA  c                   p   ^  \ rS rSrS
S\S\S-  4U 4S jjjr\SS j5       rSS jr	     SS jr
S	rU =r$ )r  i@  NrI   	layer_idxc                   > [         TU ]  5         X l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l	        UR                  U l        U R                  U R                  -  U l        X0l        Uc-  [        R                  SU R                   R"                   S35        [$        R&                  " U R
                  U R
                  SS9U l        [$        R&                  " U R
                  U R
                  SS9U l        [$        R&                  " U R
                  U R
                  SS9U l        [$        R&                  " U R
                  U R
                  SS9U l        U R                  (       a0  [$        R0                  " U R                  U R                  5      U l        SU l        g )NzInstantiating a decoder z without passing `layer_idx` is not recommended and will to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.Fri   )r    r!   r  relative_attention_num_bucketsrelative_attention_max_distancer(   rk   rl   r  rn   rU   rV   rp   rK  loggerwarning_oncer*   r?   r   rM   rq   rr   rs   rt   rP   r  ru   r'   rI   r  rK  r*   s       r+   r!    Pix2StructTextAttention.__init__A  sc   +F(.4.S.S+/5/U/U,!--"(++''**(?(??"*4>>+B+B*C D, , YYt//1A1AN
99T--t/?/?eLYYt//1A1AN
ii 0 0$2B2BO+++-<<8[8[]a]i]i+jD(&+#r-   c                 b   SnU(       aC  US-  nX@S:  R                  [        R                  5      U-  -  n[        R                  " U 5      n O,[        R                  " U [        R
                  " U 5      5      * n US-  nX:  nU[        R                  " U R                  5       U-  5      [        R                  " X5-  5      -  X%-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " X`U5      -  nU$ )aR  
Adapted from Mesh Tensorflow:
https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593

Translate relative position to a bucket number for relative attention. The relative position is defined as
memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
This should allow for more graceful generalization to longer sequences than the model has been trained on

Args:
    relative_position: an int32 Tensor
    bidirectional: a boolean - whether the attention is bidirectional
    num_buckets: an integer
    max_distance: an integer

Returns:
    a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
r   r/   r   )r2   r#   r[   absr   
zeros_likelogr0  math	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larges           r+   _relative_position_bucket1Pix2StructTextAttention._relative_position_bucket]  s   . AKQ!6 : :5:: F TT %		*; <!&+<e>N>NO`>a!b b  1$	$0 &/II'--/);<hh|/01&( "UZZ.	&"
 &+YY&8RbcTc(d&
" 	EKKE_``r-   c                    Uc   U R                   R                  R                  n[        R                  " U[        R
                  US9SS2S4   U-   n[        R                  " U[        R
                  US9SSS24   nXe-
  nU R                  USU R                  U R                  S9nU R                  U5      n	U	R                  / SQ5      R                  S5      n	U	$ )z%Compute binned relative position biasN)r7   r   F)r[  r\  r]  )r/   r   r   r   )r  r%   r   r#   aranger[   rb  rM  rN  permute	unsqueeze)
r'   query_length
key_lengthr   past_seen_tokenscontext_positionmemory_positionrZ  relative_position_bucketvaluess
             r+   compute_bias$Pix2StructTextAttention.compute_bias  s    >1188??F <<EJJvVWXZ^W^_brr,,zFSTXZ[T[\+>#'#A#A;;==	 $B $
  --.FG	*44Q7r-   c                 N   UR                   SS u  pUb  UR                  U R                  5      OSn
[        U
[        R
                  5      (       a  U
R                  5       OU
n
USLnU R                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUb[  [        U[        5      (       aF  UR                  R                  U R                  5      nU(       a  UR                  nOUR                   nOUnU(       a  UOUnU(       aU  U(       aN  W(       aG  UR"                  U R                     R$                  nUR"                  U R                     R&                  nOU R)                  U5      nU R+                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      nUb@  UR-                  UUU R                  5      u  nnU(       a  SUR                  U R                  '   [        R.                  " UUR                  SS5      5      nUc  UR                   S   nU R0                  (       db  [        R2                  " SU R                  U	U4UR4                  UR6                  S	9nU R8                  (       a  U R:                  (       a  SUl        OU R?                  U	UUR4                  U
S
9nUb#  USS2SS2SS2SUR                   S   24   nUU-   nUnUU-  n[@        RB                  RE                  URG                  5       SS9RI                  U5      n[@        RB                  RK                  UU RJ                  U R:                  S9n[        R.                  " UU5      nUR                  SS5      RM                  5       nUR                  USU RN                  5      nU RQ                  U5      nUU4nU(       a  UU4-   nU$ )zp
Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states).
Nr/   r   r0   r   Tr   r   )r   rj  r.  r   ))r   get_seq_lengthrK  r   r#   rc   r  rq   r{   rn   rl   r|   r	   
is_updatedgetcross_attention_cacheself_attention_cachelayerskeysrn  rr   rs   updater   r  r   r   r7   ru   r   r   ro  r   r   r   r0  r   rV   rz   rp   rt   )r'   r:   maskkey_value_statesr   past_key_valuesr   r1  r~   r   rj  is_cross_attentionr   rt  curr_past_key_valuescurrent_statesr   r   r   ri  causal_maskr   r   r   r   s                            r+   r<   Pix2StructTextAttention.forward  s    "/!4!4Ra!8
M\Mh?99$..Ino7ABRTYT`T`7a7a+113gw .T9zz-0#((RtG^G^_iijkmno &:oGZ+[+[(3377GJ!'6'L'L$'6'K'K$#2 -?)]/j-44T^^DIIJ/66t~~FMML.1J::n5L#RtG^G^_iijkmnoJ',,ZT\\4KbKbcmmnoqrsL*+?+F+FzS_aeaoao+p(
L%AEO..t~~> lJ,@,@A,FG #))"-J33 %j*=fmm[a[g[g! ..4==26M/ $ 1 1
6==Sc !2 ! "1a,Bj.>.>r.B,B#BC - ;,&& }},,V\\^,DLLVT}},,\T\\TXTaTa,bll<>!++Aq1<<>!&&z2t~~Fkk+../Gr-   )rV   ru   r  r(   rp   rr   rl   rK  rn   rt   rq   r  rN  rM  rs   FN)T       )Nr   )NNNNF)r?   r@   rA   rB   r   intr!   staticmethodrb  ro  r<   rC   rD   rE   s   @r+   r  r  @  sZ    ,3 ,cfimcm , ,8 -  - `* [ [r-   r  c                   N   ^  \ rS rSrSS\S-  4U 4S jjjr     SS jrSrU =r$ )	 Pix2StructTextLayerSelfAttentioni   NrK  c                    > [         TU ]  5         [        XUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nr  rK  r   r    r!   r  r   r   r(   rD  rE  r   rT   rU   rV   rQ  s       r+   r!   )Pix2StructTextLayerSelfAttention.__init__  sR    0W`
 .f.@.@fF_F_`zz&"5"56r-   c           	          U R                  U5      nU R                  UUUUUUS9n	XR                  U	S   5      -   nU4U	SS  -   n
U
$ )N)r{  r   r}  	use_cacher   r   r   rE  r   rV   )r'   r:   r   r   r}  r  r   r1  normed_hidden_statesr   r   s              r+   r<   (Pix2StructTextLayerSelfAttention.forward	  sl      $}=>> '+/ * 
 &5Ea5H(II "%5ab%99r-   r   rV   rE  r  )NNNFF	r?   r@   rA   rB   r  r!   r<   rC   rD   rE   s   @r+   r  r     s4    7SSWZ 7 7  r-   r  c                   L   ^  \ rS rSrSS\S-  4U 4S jjjr    SS jrSrU =r$ )	!Pix2StructTextLayerCrossAttentioni"  NrK  c                    > [         TU ]  5         [        USUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )NFr  r   r  )r'   rI   rK  r*   s      r+   r!   *Pix2StructTextLayerCrossAttention.__init__#  sP    0UZfop-f.@.@fF_F_`zz&"5"56r-   c           	          U R                  U5      nU R                  UUUUUUS9n	XR                  U	S   5      -   n
U
4U	SS  -   nU$ )N)r{  r|  r   r}  r   r   r   r  )r'   r:   r|  r   r   r}  r   r1  r  r   r   r   s               r+   r<   )Pix2StructTextLayerCrossAttention.forward)  sk      $}=>> -'+/ * 
 %||4DQ4G'HH/$4QR$88r-   r  rL   )NNNFr  rE   s   @r+   r  r  "  s0    7#* 7 7  r-   r  c                   V   ^  \ rS rSrSS\S-  4U 4S jjjr         SS jrSrU =r$ )	Pix2StructTextBlockiA  NrK  c                    > [         TU ]  5         [        UUUS9U l        [	        UUS9U l        [        U5      U l        g )Nr  )rK  )r    r!   r  self_attentionr  encoder_decoder_attentionrA  r   rQ  s       r+   r!   Pix2StructTextBlock.__init__B  sH    >(C
 *K*
&
 )0r-   c           	         U R                  UUUUUU	S9nUS   nUSS  nUR                  [        R                  :X  al  [        R                  " U5      R                  5       (       aC  [        R                  " UR                  5      R                  S-
  n[        R                  " X* US9nUS LnU(       a  U R                  UUUUUU	S9nUS   nUR                  [        R                  :X  al  [        R                  " U5      R                  5       (       aC  [        R                  " UR                  5      R                  S-
  n[        R                  " X* US9nUUSS  -   nU R                  U5      nUR                  [        R                  :X  al  [        R                  " U5      R                  5       (       aC  [        R                  " UR                  5      R                  S-
  n[        R                  " X* US9nU4nUU-   $ )N)r   r   r}  r  r   r   r   i  )r   r   )r|  r   r   r}  r   )r  r7   r#   r8   isinfanyr   r   clampr  r   )r'   r:   r   r   encoder_hidden_statesencoder_attention_maskencoder_decoder_position_biasr}  r  r   r   r1  r   attention_outputsclamp_valuedo_cross_attentioncross_attention_outputsr   s                     r+   r<   Pix2StructTextBlock.forwardR  s    "&!4!4)'+/ "5 "
 /q12126 %--/EKK4N4R4R4T4T++m&9&9:>>EK!KK<[YM2$>&*&D&D!65; /"3 'E '# 4A6M ""emm3M8R8V8V8X8X#kk-*=*=>BBTI %M|Q\ ] !24KAB4O O / %--/EKK4N4R4R4T4T++m&9&9:>>EK!KK<[YM "***r-   )r  r   r  r  )	NNNNNNFFTr  rE   s   @r+   r  r  A  s@    1SSWZ 1 1& "#&*<+ <+r-   r  z3
    The standalone text decoder of Pix2Struct
    )custom_introc                     ^  \ rS rSr% \\S'   SrS/rSS0rSr	U 4S jr
S	 r\           SS\R                  S
-  S\R                  S
-  S\R                  S
-  S\R                  S
-  S\R                  S
-  S\S
-  S\S
-  S\S
-  S\S
-  S\R                  S
-  S\S
-  S\\R                  S4   \-  4S jj5       rSrU =r$ )r  i  rI   )r   r  zlm_head.weightzembed_tokens.weightTc                 R  > [         TU ]  U5        [        R                  " UR                  UR
                  5      U l        [        R                  " [        UR                  5       Vs/ s H  n[        U[        US:H  5      US9PM     sn5      U l        [        UR
                  UR                  S9U l        [        R                   " UR"                  5      U l        [        R&                  " UR
                  UR                  SS9U l        U R+                  5         SU l        g s  snf )Nr   r  r   Fri   )r    r!   r   rP   
vocab_sizer(   embed_tokensr   r   
num_layersr  r   r   r   rD  final_layer_normrT   rU   rV   rM   r  r'  ru   )r'   rI   r   r*   s      r+   r!   Pix2StructTextModel.__init__  s     LL):):F<N<NO]] v0011A $FQRSV`ab1

 !4F4F4FFLeLe fzz&"5"56yy!3!3V5F5FUS 	&+#s   (!D$c                     Xl         g rL   )r  r'   new_embeddingss     r+   set_input_embeddings(Pix2StructTextModel.set_input_embeddings  s    *r-   Nr   r   r  r  inputs_embedsr}  r  r   r   labelsr   rJ   .c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  UOU R                   R                  nU R
                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUb  Ub  [        S5      eUb&  UR                  5       nUR                  SUS   5      nO"Ub  UR                  5       SS nO[        S5      eUc%  U R                  c   S5       eU R                  U5      nUu  pU(       a`  Uc]  U R                   R                  (       a/  [        [        U R                   S9[        U R                   S95      nO[        U R                   S9nUc8  Ub  UR!                  5       U-   OUn["        R$                  " UUUR&                  S	9nU R                   R(                  (       a  [+        U R                   UUUS
9nOVUSS2SSSS24   nUR-                  UR.                  S9nSU-
  ["        R0                  " UR.                  5      R2                  -  nUbL  UR                  5       u  nnnUU4nUc  ["        R$                  " UUR&                  S	9nU R5                  U5      nOSnU	(       a  SOSnU(       a  SOSnU(       a  SOSnSnSnU R7                  U5      n[9        U R:                  5       H\  u  nnU	(       a  UU4-   nU" UUUUUUUUUS9	nUS   nUS   nUb  UU(       a  SOS   nU(       d  ME  UUS   4-   nUc  MS  UUS   4-   nM^     U R=                  U5      nU R7                  U5      nU R?                  U5      n U	(       a  UU4-   nSn!U
b  U
R-                  U R&                  5      n
[@        RB                  " SSS9n"U"" U RE                  5       R                  SU R                  S5      5      U
RE                  5       R                  S5      5      n!U(       d  [G        S U!U UUUU4 5       5      $ [I        U!U UUUUS9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Pix2StructText is a model with relative position
    embeddings so you should be able to pad the inputs on both the right and the left.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for detail.

    [What are input IDs?](../glossary#input-ids)

    To know more on how to prepare `input_ids` for pretraining take a look a [Pix2StructText
    Training](./t5#training).

Example:

```python
>>> from transformers import AutoProcessor, Pix2StructTextModel

>>> processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base")
>>> model = Pix2StructTextModel.from_pretrained("google/pix2struct-textcaps-base")

>>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
>>> loss = outputs.loss
```
NzZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...FzTYou cannot specify both decoder_input_ids and decoder_inputs_embeds at the same timer0   zEYou have to specify either decoder_input_ids or decoder_inputs_embedsz<You have to initialize the model with valid token embeddings)rI   )r   )rI   r  r   r}  )r7   r   r   )r}  r  r   r   r   r   r/      r  r5   )ignore_index	reductionc              3   0   #    U  H  nUc  M  Uv   M     g 7frL   r   r   s     r+   r   .Pix2StructTextModel.forward.<locals>.<genexpr>X  s"      A  s   	)losslogitsr}  r:   r   cross_attentions)%rI   r  r   r   r   ru   r   rO  warningr  sizer{   r  is_encoder_decoderr	   r   rs  r#   r$   r   
is_decoderr   r2   r7   r   r   invert_attention_maskrV   r   r   r  r  r   CrossEntropyLossrz   r   r   )#r'   r   r   r  r  r  r}  r  r   r   r  r   r1  input_shaper~   r   mask_seq_lengthr  encoder_batch_sizeencoder_sequence_lengthr   encoder_hidden_shapeencoder_extended_attention_maskr   all_attentionsall_cross_attentionsr   r  r:   r   r   r   r  r  loss_fcts#                                      r+   r<   Pix2StructTextModel.forward  sp   T "+!6IDKK<Q<Q	1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY&&4==YNNl I ]%>stt"#..*K!r;r?;I&',,.s3Kdee $$0p2pp0 --i8M!,
0{{--"5 4l$++6V# #/dkk"B! BQA\..0:=bl  #ZZ
OML`L`aN;;!!,{{+- /	K )D$)9:K%..}/B/B.CK,M<O<O0P0T0TTK !,=R=W=W=Y: 7$68O#P %-).4HQ^QeQe)f&.2.H.HI_.`+.2+"6BD0d&7rd(,%]3(4OA|#$58H$H!(%/- /#"3
M *!,M
 *!,M$00=CTaZ[0\-  !/=3C2E!E(4+?=QRCSBU+U(9  5< --m<]3m,   1]4D DYYv}}-F**OHF--/44RRI6K\K\K^KcKcdfKghD  #%"(   1++%1
 	
r-   )rV   r  r  ru   r   r  )NNNNNNNNNNN)r?   r@   rA   rB   r   r  r  r9  _tied_weights_keysr8  r!   r  r   r#   
LongTensorFloatTensorr   r   r   r   r<   rC   rD   rE   s   @r+   r  r    s]    !  ./*,AB&*#,&+  .237:>;?15(,!%)-,0*.#'w
##d*w
 ))D0w
  %0047	w

 !& 1 1D 8w
 ''$.w
 w
 $;w
  $;w
 #Tkw
   4'w
 D[w
 
u  #%	&)J	Jw
 w
r-   r  zr
    A conditional generation model with a language modeling head. Can be used for sequence generation tasks.
    c                     ^  \ rS rSr% \\S'   SrS\4U 4S jjrS rS r	S\
R                  4S jrS	 r\            SS\R                   S
-  S\R                   S
-  S\R"                  S
-  S\R$                  S
-  S\\\R                         S
-  S\S
-  S\R"                  S
-  S\R*                  S
-  S\S
-  S\S
-  S\S
-  S\S
-  S\\R                      \-  4S jj5       rSrU =r$ )"Pix2StructForConditionalGenerationin  rI   rY   c                    > [         TU ]  U5        [        UR                  5      U l        [        UR                  5      U l        UR                  U l        U R                  5         g rL   )
r    r!   r"  vision_configr%  r  r   decoderis_vqar'  rW   s     r+   r!   +Pix2StructForConditionalGeneration.__init__w  sK     ,V-A-AB*6+=+=>mm 	r-   c                 6    U R                   R                  5       $ rL   )r  r+  r*  s    r+   r+  7Pix2StructForConditionalGeneration.get_input_embeddings  s    ||0022r-   c                 :    U R                   R                  U5        g rL   )r  r  r  s     r+   r  7Pix2StructForConditionalGeneration.set_input_embeddings  s    )).9r-   rJ   c                 6    U R                   R                  5       $ rL   )r  get_output_embeddingsr*  s    r+   r  8Pix2StructForConditionalGeneration.get_output_embeddings  s    ||1133r-   c                 :    U R                   R                  U5        g rL   )r  set_output_embeddingsr  s     r+   r  8Pix2StructForConditionalGeneration.set_output_embeddings  s    **>:r-   Nr   r   r   r3  r}  r  decoder_inputs_embedsr  r   r   r   c                 2   U	b  U	OU R                   R                  R                  n	Ub  UOU R                   R                  nUc  U R	                  UUU
UUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nUS   nUbX  UcU  UcR  U R                  U5      nUb  UO2UR                  U R                   R                  5      R                  5       nSUSS2S4'   U R                  UUUUUUU	U
UUUS9nU(       d  X-   $ [        UR                  UR                  UR                   UR"                  UR$                  UR&                  UR(                  UR"                  UR$                  S9	$ )	a  
flattened_patches (`torch.FloatTensor` of shape `(batch_size, seq_length, hidden_size)`):
    Flattened pixel patches. the `hidden_size` is obtained by the following formula: `hidden_size` =
    `num_channels` * `patch_size` * `patch_size`

    The process of flattening the pixel patches is done by `Pix2StructProcessor`.
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)

    Pix2StructText uses the `pad_token_id` as the starting token for `decoder_input_ids` generation. If
    `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
    `past_key_values`).

    To know more on how to prepare `decoder_input_ids` for pretraining take a look at [Pix2StructText
    Training](./t5#training).
decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss for the decoder.

Example:

Inference:

```python
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO
>>> from transformers import AutoProcessor, Pix2StructForConditionalGeneration

>>> processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base")
>>> model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base")

>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> inputs = processor(images=image, return_tensors="pt")

>>> # autoregressive generation
>>> generated_ids = model.generate(**inputs, max_new_tokens=50)
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
>>> print(generated_text)
A stop sign is on a street corner.

>>> # conditional generation
>>> text = "A picture of"
>>> inputs = processor(text=text, images=image, return_tensors="pt", add_special_tokens=False)

>>> generated_ids = model.generate(**inputs, max_new_tokens=50)
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
>>> print(generated_text)
A picture of a stop sign with a red stop sign
```

Training:

```python
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO
>>> from transformers import AutoProcessor, Pix2StructForConditionalGeneration

>>> processor = AutoProcessor.from_pretrained("google/pix2struct-base")
>>> model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-base")

>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))
>>> text = "A stop sign is on the street corner."

>>> inputs = processor(images=image, return_tensors="pt")
>>> labels = processor(text=text, return_tensors="pt").input_ids

>>> # forward pass
>>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss
>>> print(f"{loss.item():.5f}")
5.94282
```N)rY   r   r   r   r   r   r   r/   r   )r   r   r  r}  r  r  r  r   r   r  r   )	r  r  r}  decoder_hidden_statesdecoder_attentionsr  encoder_last_hidden_stater  encoder_attentions)rI   r   r  r   r%  r   r   lenr  ner  r0  r  r   r  r  r}  r:   r   r  r   )r'   rY   r   r   r   r3  r}  r  r  r  r   r   r   r1  r:   decoder_outputss                   r+   r<   *Pix2StructForConditionalGeneration.forward  s   N "+!6IDKK<S<S<]<]	%0%<k$++BYBY ""ll"3-"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO (*"3";@U@] $ 1 1& 9 *5 '&))$++*B*BCIIK # ,-"1a4( ,,'1/+"/#1/!5# ' 
 "44 %%"))+;;"1"?"?.99,==&5&G&G"1"?"?.99

 
	
r-   )r  r%  r  )NNNNNNNNNNNN)r?   r@   rA   rB   r   r  r7  r!   r+  r  r   Moduler  r  r   r#   r  r  
BoolTensorr   r   rc   r   r   r<   rC   rD   rE   s   @r+   r  r  n  s    )O	/ 	3:4ryy 4;  7;3759:>BF(,*.59!%)-,0#'b
 ,,t3b
 ))D0b
 !++d2	b

 !& 0 04 7b
 uU%6%6784?b
 b
   4'b
  %||d2b
 $;b
  $;b
 #Tkb
 D[b
 
u  	!$6	6b
 b
r-   r  )r   r  r"  r  ):rb   rW  r#   r    r   r   activationsr   cache_utilsr   r   r	   
generationr
   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_utilsr   utilsr   r   r   r   r   configuration_pix2structr   r   r   
get_loggerr?   rO  r  r   rG   re   r   r   r   r   r"  r   rA  r  r  r  r  r  r  __all__r   r-   r+   <module>r     s       & ! C C ) / 9  .  e d 
		H	%+")) +2! !HW		 Wv")) :&6 &R&
bii &
R q! q! q!h _
5 _
 _
Fryy :BII  |bii |@ryy D		 >M+4 M+` 
U
3 U

U
p 
~
)BO ~

~
Br-   