
    Z j                         S SK Jr  SSKJr  SSKJr  SSKJrJr  \" SS9\ " S	 S
\5      5       5       r	\" SS9\ " S S\5      5       5       r
S
S/rg)    )strict   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzOpenGVLab/InternVL3-1B-hf)
checkpointc                     ^  \ rS rSr% SrSrSrSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\\   -  \\S4   -  \	S'   Sr\\\   -  \\S4   -  \	S '   S!r\\	S"'   Sr\\	S#'   S$r\\	S%'   S&r \\	S''   S$r!\\	S('   U 4S) jr"S*r#U =r$$ )+InternVLVisionConfig   a  
projection_dropout (`float`, *optional*, defaults to 0.0):
    Dropout probability for the projection layer.
norm_type (`str`, *optional*, defaults to `"layer_norm"`):
    The type of normalization to use in the encoder. Can be `"layer_norm"` or `"rms_norm"`.
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling
use_mean_pooling (`bool`, *optional*, defaults to `True`):
    Whether to mean pool the final hidden states of the patches instead of using the final hidden state of the
    CLS token, before applying the classification head.

Example:

```python
>>> from transformers import InternVLVisionConfig, InternVLVisionModel

>>> # Initializing a InternVLVisionModel OpenGVLab/InternVL3-1B-hf style configuration
>>> configuration = InternVLVisionConfig()

>>> # Initializing a model (with random weights) from the OpenGVLab/InternVL3-1B-hf configuration
>>> model = InternVLVisionModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```internvl_visionvision_configi   hidden_size   num_hidden_layers   num_attention_headsFattention_biasuse_qk_normi   intermediate_sizegelu
hidden_actg        hidden_dropout_probattention_dropoutprojection_dropoutg{Gz?initializer_range
layer_norm	norm_typegư>layer_norm_eps)  r!   .
image_size)   r#   
patch_sizer   num_channelsuse_mask_tokenT use_absolute_position_embeddingsg?layer_scale_init_valueuse_mean_poolingc                 ^  > [        U R                  [        [        45      (       a  U R                  OU R                  U R                  4U l        [        U R                  [        [        45      (       a  U R                  OU R                  U R                  4U l        [
        TU ]  " S0 UD6  g )N )
isinstancer"   listtupler$   super__post_init__selfkwargs	__class__s     ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/internvl/configuration_internvl.pyr0   "InternVLVisionConfig.__post_init__L   s    )$//D%=IIDOOPTP_P_aeapapOq 	  *$//D%=IIDOOPTP_P_aeapapOq 	 	''    )r"   r$   )%__name__
__module____qualname____firstlineno____doc__
model_typebase_config_keyr   int__annotations__r   r   r   boolr   r   r   strr   floatr   r   r   r   r    r"   r-   r.   r$   r%   r&   r'   r(   r)   r0   __static_attributes____classcell__r4   s   @r5   r   r      s&   4 #J%OKs!! ND K!s!J'**%(us{(&))#u#!Is!!NE!4>Jd3i%S/1>4<Jd3i%S/1<L# ND -1$d1$'E'!d!( (r7   r   c                      ^  \ rS rSr% SrSr\\S.rSr	\
\-  S-  \S'   Sr\
\-  S-  \S'   Sr\\S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\\   -  \S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )InternVLConfigV   a  
downsample_ratio (`float`, *optional*, defaults to 0.5):
    Factor by which to downsample the image.

Example:

```python
>>> from transformers import InternVLForConditionalGeneration, InternVLConfig

>>> # Initializing a InternVL style configuration
>>> configuration = InternVLConfig()

>>> # Initializing a model (with random weights) from the OpenGVLab/InternVL3-1B-hf configuration
>>> model = InternVLForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```internvl)text_configr   Nr   rK   isP image_token_id   image_seq_lengthg      ?downsample_ratior   projector_hidden_actvision_feature_layerdefaultvision_feature_select_strategyTtie_word_embeddingsc                   > [        U R                  [        5      (       a  [        S0 U R                  D6U l        OU R                  c  [        5       U l        [        U R                  [        5      (       aU  U R                  R                  SS5      U R                  S'   [        U R                  S      " S0 U R                  D6U l        O U R                  c  [        S   " 5       U l        [        TU ]   " S0 UD6  g )Nr=   qwen2r+   )	r,   r   dictr   rK   getr   r/   r0   r1   s     r5   r0   InternVLConfig.__post_init__y   s    d(($//!5!K8J8J!KD'!5!7Dd&&---1-=-=-A-A,PW-XD\*-d.>.>|.LMaPTP`P`aD%-g68D''r7   )r8   r9   r:   r;   r<   r=   r	   r   sub_configsr   rX   r   r@   rK   rL   r?   rN   rO   rC   rP   rB   rR   r-   rT   rU   rA   r0   rD   rE   rF   s   @r5   rH   rH   V   s    & J",?STK48M4**T1826K((4/6 NC c!e! &#&,.#S	/.*3"C3 $$( (r7   rH   N)huggingface_hub.dataclassesr   configuration_utilsr   utilsr   autor   r	   r   rH   __all__r+   r7   r5   <module>ra      sv     / 3 # - 67:(+ :(  8:(z 67-(% -(  8-(` "#3
4r7   