
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zVideoMAE model configuration    )strict   )PreTrainedConfig)auto_docstringzMCG-NJU/videomae-base)
checkpointc                      \ rS rSr% SrSrSr\\\   -  \	\\4   -  \
S'   Sr\\\   -  \	\\4   -  \
S'   Sr\\
S	'   Sr\\
S
'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\-  \
S'   Sr\\-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   S r\\
S!'   S"r\\
S#'   S$r\\
S%'   S&r \\
S''   Sr!\\
S('   S)r"g*)+VideoMAEConfig   a  
num_frames (`int`, *optional*, defaults to 16):
    The number of frames in each video.
tubelet_size (`int`, *optional*, defaults to 2):
    The number of tubelets.
use_mean_pooling (`bool`, *optional*, defaults to `True`):
    Whether to mean pool the final hidden states instead of using the final hidden state of the [CLS] token.
decoder_num_attention_heads (`int`, *optional*, defaults to 6):
    Number of attention heads for each attention layer in the decoder.
decoder_hidden_size (`int`, *optional*, defaults to 384):
    Dimensionality of the decoder.
decoder_num_hidden_layers (`int`, *optional*, defaults to 4):
    Number of hidden layers in the decoder.
decoder_intermediate_size (`int`, *optional*, defaults to 1536):
    Dimensionality of the "intermediate" (i.e., feed-forward) layer in the decoder.
norm_pix_loss (`bool`, *optional*, defaults to `True`):
    Whether to normalize the target patch pixels.

Example:

```python
>>> from transformers import VideoMAEConfig, VideoMAEModel

>>> # Initializing a VideoMAE videomae-base style configuration
>>> configuration = VideoMAEConfig()

>>> # Randomly initializing a model from the configuration
>>> model = VideoMAEModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```videomae   
image_size   
patch_sizer   num_channels
num_frames   tubelet_sizei   hidden_size   num_hidden_layersnum_attention_headsi   intermediate_sizegelu
hidden_actg        hidden_dropout_probattention_probs_dropout_probg{Gz?initializer_rangeg-q=layer_norm_epsTqkv_biasuse_mean_pooling   decoder_num_attention_headsi  decoder_hidden_size   decoder_num_hidden_layersi   decoder_intermediate_sizenorm_pix_loss N)#__name__
__module____qualname____firstlineno____doc__
model_typer   intlisttuple__annotations__r   r   r   r   r   r   r   r   r   strr   floatr   r   r   r   boolr    r"   r#   r%   r&   r'   __static_attributes__r(       ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/videomae/configuration_videomae.pyr	   r	      s   B J47Jd3i%S/1746Jd3i%S/16L#JL#Ks!!!s!J'**03 %#+3#u#!NE!Hd!d!'((""%&s&%)s)M4r7   r	   N)	r-   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r(   r7   r8   <module>r=      sH    # . 3 # 238% 8  48v 
r7   