
    Z j                          S r SSKJr  SSKJr  SSKJrJr  \R                  " \	5      r
\" SS9\ " S S	\5      5       5       r\" SS9\ " S
 S\5      5       5       r\" SS9\ " S S\5      5       5       r/ SQrg)zPix2Struct model configuration    )strict   )PreTrainedConfig)auto_docstringloggingzgoogle/pix2struct-base)
checkpointc                   t   \ rS rSr% SrSrS/rSSSSSSSS.rS	r\	\
S
'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\	-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\	\
S'   Sr\\
S '   Sr\	S!-  \
S"'   S#r\	\\	   -  S!-  \
S$'   S!r\	S!-  \
S%'   Sr\\
S&'   S'r \\
S('   Sr!\\
S)'   S*r"g!)+Pix2StructTextConfig   an  
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
    The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
    The maximum distance of the longer sequences for the bucket separation.
dense_act_fn (`Union[Callable, str]`, *optional*, defaults to `"gelu_new"`):
    The non-linear activation function (function or string).

Example:

```python
>>> from transformers import Pix2StructTextConfig, Pix2StructTextModel

>>> # Initializing a Pix2StructTextConfig with google/pix2struct-base style configuration
>>> configuration = Pix2StructTextConfig()

>>> # Initializing a Pix2StructTextModel (with random weights) from the google/pix2struct-base style configuration
>>> model = Pix2StructTextModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```pix2struct_text_modelpast_key_valueshidden_size	num_heads
num_layers)r   num_attention_headsnum_hidden_layersdecoder_attention_headsencoder_attention_headsencoder_layersdecoder_layersiD  
vocab_size   @   d_kv   d_ff       relative_attention_num_buckets   relative_attention_max_distanceg?dropout_rateư>layer_norm_epsilon      ?initializer_factorgelu_newdense_act_fnr   decoder_start_token_idF	use_cacheNpad_token_id   eos_token_idbos_token_idtie_word_embeddingsT
is_decoderadd_cross_attention )#__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   r!   r"   floatr$   r&   r(   strr)   r*   boolr+   r-   listr.   r/   r0   r1   __static_attributes__r2       ڈ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pix2struct/configuration_pix2struct.pyr
   r
      s   . )J#4"5$*)#.#.&&M JKD#ND#JIs*,"C,+.#S. #L%#+# $$ ##"L#""#C#It L#* +,L#S	/D(,#L#*# %%J %%rB   r
   c                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S'   S	r\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\-  \S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Srg )!Pix2StructVisionConfigU   a*  
patch_embed_hidden_size (`int`, *optional*, defaults to 768):
    Dimensionality of the input patch_embedding layer in the Transformer encoder.
d_ff (`int`, *optional*, defaults to 2048):
    Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
d_kv (`int`, *optional*, defaults to 64):
    Dimensionality of the key, query, value projections per attention head.
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
    `"relu"`, `"selu"` and `"gelu_new"` `"gelu"` are supported.
dense_act_fn (`Union[Callable, str]`, *optional*, defaults to `"gelu_new"`):
    The non-linear activation function (function or string).
seq_len (`int`, *optional*, defaults to 4096):
    Maximum sequence length (here number of patches) supported by the model.
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
    The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
    The maximum distance (in tokens) to use for each attention layer.

Example:

```python
>>> from transformers import Pix2StructVisionConfig, Pix2StructVisionModel

>>> # Initializing a Pix2StructVisionConfig with google/pix2struct-base style configuration
>>> configuration = Pix2StructVisionConfig()

>>> # Initializing a Pix2StructVisionModel (with random weights) from the google/pix2struct-base style configuration
>>> model = Pix2StructVisionModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```pix2struct_vision_modelr   r   patch_embed_hidden_sizer   r   r   r   r   r   r   r'   r(   r#   layer_norm_epsg        r"   attention_dropoutg|=initializer_ranger%   r&   i   seq_lenr   r   r    r!   r2   N)r3   r4   r5   r6   r7   r8   r   r;   r<   rH   r   r   r   r   r(   r>   rI   r=   r"   rJ   rK   r&   rL   r   r!   rA   r2   rB   rC   rE   rE   U   s    B +JK#&S&D#D#Ns!!"L#" NE  #L%#+#%(us{($u$ ##GS*,"C,+.#S.rB   rE   c                      ^  \ rS rSr% SrSr\\S.rSr	\
\-  S-  \S'   Sr\
\-  S-  \S'   Sr\\S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )Pix2StructConfig   a  
is_vqa (`bool`, *optional*, defaults to `False`):
    Whether the model has been fine-tuned for VQA or not.

Example:

```python
>>> from transformers import Pix2StructConfig, Pix2StructForConditionalGeneration

>>> # Initializing a Pix2StructConfig with google/pix2struct-base style configuration
>>> configuration = Pix2StructConfig()

>>> # Initializing a Pix2StructForConditionalGeneration (with random weights) from the google/pix2struct-base style configuration
>>> model = Pix2StructForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

>>> # We can also initialize a Pix2StructConfig from a Pix2StructTextConfig and a Pix2StructVisionConfig

>>> # Initializing a Pix2Struct text and Pix2Struct vision configuration
>>> config_text = Pix2StructTextConfig()
>>> config_vision = Pix2StructVisionConfig()

>>> config = Pix2StructConfig(text_config=config_text, vision_config=config_vision)
```
pix2struct)text_configvision_configNrQ   rR   r%   r&   g{Gz?rK   Fis_vqar/   Tis_encoder_decoderc                 l  > U R                   c9  [        U R                  U R                  S9U l         [        R                  S5        Ok[        U R                   [        5      (       aL  U R                  U R                   S'   U R                  U R                   S'   [        S0 U R                   D6U l         U R                  c%  [        5       U l        [        R                  S5        O9[        U R                  [        5      (       a  [        S0 U R                  D6U l        U R                   R                  U l
        U R                   R                  U l        U R                   R                  U l        U R                  U R                   l        U R                  U R                  l        [        TU ]<  " S0 UD6  g )N)rT   r/   zU`text_config` is `None`. initializing the `Pix2StructTextConfig` with default values.rT   r/   zY`vision_config` is `None`. initializing the `Pix2StructVisionConfig` with default values.r2   )rQ   r
   rT   r/   loggerinfo
isinstancedictrR   rE   r)   r+   r-   rK   super__post_init__)selfkwargs	__class__s     rC   r[   Pix2StructConfig.__post_init__   sU   #3#'#:#:$($<$< D KKop(($//595L5LD126:6N6ND233Gd6F6FGD%!7!9DKKst**D11!7!M$:L:L!MD&*&6&6&M&M# ,,99 ,,99-1-C-C*/3/E/E,''rB   )r)   r-   r+   rQ   rR   )r3   r4   r5   r6   r7   r8   r
   rE   sub_configsrQ   rY   r   r<   rR   r&   r=   rK   rS   r?   r/   rT   r[   rA   __classcell__)r^   s   @rC   rN   rN      s    6 J"6I_`K26K((4/648M4**T18 ###u#FD %%##( (rB   rN   )rN   r
   rE   N)r7   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr3   rV   r
   rE   rN   __all__r2   rB   rC   <module>rg      s    % . 3 , 
		H	% 347&+ 7&  57&t 342/- 2/  52/j 34@(' @(  5@(F QrB   