
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zVilT model configuration    )strict   )PreTrainedConfig)auto_docstringzdandelin/vilt-b32-mlm)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S'   S	r\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\-  \S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   Sr\\\   -  \\\4   -  \S'   Sr\\\   -  \\\4   -  \S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S!r\\S%'   S#r \\S&'   S'r!\S'-  \S('   U 4S) jr"S*r#U =r$$ )+
ViltConfig   a[  
modality_type_vocab_size (`int`, *optional*, defaults to 2):
    The vocabulary size of the modalities passed when calling [`ViltModel`]. This is used after concatenating the
    embeddings of the text and image modalities.
max_image_length (`int`, *optional*, defaults to -1):
    The maximum number of patches to take as input for the Transformer encoder. If set to a positive integer,
    the encoder will sample `max_image_length` patches at maximum. If set to -1, will not be taken into
    account.
num_images (`int`, *optional*, defaults to -1):
    The number of images to use for natural language visual reasoning. If set to a positive integer, will be
    used by [`ViltForImagesAndTextClassification`] for defining the classifier head.

Example:

```python
>>> from transformers import ViLTModel, ViLTConfig

>>> # Initializing a ViLT dandelin/vilt-b32-mlm style configuration
>>> configuration = ViLTConfig()

>>> # Initializing a model from the dandelin/vilt-b32-mlm style configuration
>>> model = ViLTModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```vilti:w  
vocab_size   type_vocab_sizemodality_type_vocab_size(   max_position_embeddingsi   hidden_size   num_hidden_layersnum_attention_headsi   intermediate_sizegelu
hidden_actg        hidden_dropout_probattention_probs_dropout_probg{Gz?initializer_rangeg-q=layer_norm_epsi  
image_size    
patch_sizer   num_channelsTqkv_biasmax_image_lengthtie_word_embeddings
num_imagesNpad_token_idc                 X   > UR                  SS 5        SU l        [        TU ]  " S0 UD6  g )Nr$   T )popr$   super__post_init__)selfkwargs	__class__s     |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/vilt/configuration_vilt.pyr+   ViltConfig.__post_init__L   s*    

($/#' ''    )r$   )%__name__
__module____qualname____firstlineno____doc__
model_typer   int__annotations__r   r   r   r   r   r   r   r   strr   floatr   r   r   r   listtupler   r    r!   boolr#   r$   r%   r&   r+   __static_attributes____classcell__)r.   s   @r/   r	   r	      s+   6 JJOS$%c%#%S%Ks!!!s!J'**03 %#+3#u#!NE!47Jd3i%S/1746Jd3i%S/16L#Hdc $$J#L#*#( (r1   r	   N)	r6   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r(   r1   r/   <module>rE      sG     . 3 # 237(! 7(  47(t .r1   