
    Z jI                     t    S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	  \" SS	9\ " S
 S\5      5       5       r
S/rg)zVipLlava model configuration    )strict   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzllava-hf/vip-llava-7b-hf)
checkpointc                      ^  \ rS rSr% SrSrSS0r\\S.rSr	\
\-  S-  \S'   Sr\
\-  S-  \S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\\   -  \\S4   -  \S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )VipLlavaConfig   a  
projector_layernorm_eps (`float`, *optional*, defaults to 1e-05):
    The layer norm epsilon of the projector layernorm
vision_feature_layers (`Union[int, list[int]]`, *optional*, defaults to `[-2, -5, -8, -11, 6]`):
    The vision feature layer, or list of layers to select the vision features from.

Example:

```python
>>> from transformers import VipLlavaForConditionalGeneration, VipLlavaConfig, CLIPVisionConfig, LlamaConfig

>>> # Initializing a CLIP-vision config
>>> vision_config = CLIPVisionConfig()

>>> # Initializing a Llama config
>>> text_config = LlamaConfig()

>>> # Initializing a VipLlava vipllava-7b style configuration
>>> configuration = VipLlavaConfig(vision_config, text_config)

>>> # Initializing a model from the vipllava-7b style configuration
>>> model = VipLlavaForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```vipllavaimage_token_idimage_token_index)text_configvision_configNr   r    }  geluprojector_hidden_actgh㈵>projector_layernorm_eps)ii   .vision_feature_layersi@  image_seq_lengthFtie_word_embeddingsc                   > [        U R                  [        5      (       aU  U R                  R                  SS5      U R                  S'   [        U R                  S      " S0 U R                  D6U l        O&U R                  c  [        S   " SSSSSSS	S
S9U l        [        U R
                  [        5      (       aU  U R
                  R                  SS5      U R
                  S'   [        U R
                  S      " S0 U R
                  D6U l        O U R
                  c  [        S   " 5       U l        [        TU ]  " S0 UD6  g )N
model_typeclip_vision_modeli   i      iP        r   i   )intermediate_sizehidden_size
patch_size
image_sizenum_hidden_layersnum_attention_heads
vocab_sizeprojection_dimllama )
isinstancer   dictgetr   r   super__post_init__)selfkwargs	__class__s     ڄ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/vipllava/configuration_vipllava.pyr1   VipLlavaConfig.__post_init__C   s$   d(($///3/A/A/E/ElTg/hD|,!/0B0B<0P!Q!gTXTfTf!gD'!/0C!D"& "$$& "	"D d&&---1-=-=-A-A,PW-XD\*-d.>.>|.LMaPTP`P`aD%-g68D''    )__name__
__module____qualname____firstlineno____doc__r   attribute_mapr	   sub_configsr   r.   r   __annotations__r   r   intr   strr   floatr   listtupler   r   boolr1   __static_attributes____classcell__)r4   s   @r5   r   r      s    6 J-M #-zJK48M4**T1826K((4/6"s" &#&%)U)?S3c?U38_<Sc %%( (r7   r   N)r<   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   autor   r	   r   __all__r,   r7   r5   <module>rM      sN    # . 3 # - 56A(% A(  7A(H 
r7   