
    Z j                      
   S SK Jr  SSKJr  SSKJr  SSKJrJr  SSK	J
r
Jr  \R                  " \5      r\" SS	9\ " S
 S\5      5       5       r\" SS	9\ " S S\5      5       5       r\" SS	9\ " S S\5      5       5       r/ SQrg)    )strict   )PreTrainedConfig)!MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)auto_docstringlogging   )CONFIG_MAPPING
AutoConfigz"Salesforce/instructblip-flan-t5-xl)
checkpointc                       \ rS rSr% SrSrSrSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\\   -  \\\4   -  \	S'   Sr\\\   -  \\\4   -  \	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Srg)InstructBlipVideoVisionConfig!   a   
Example:

```python
>>> from transformers import InstructBlipVideoVisionConfig, InstructBlipVideoVisionModel

>>> # Initializing a InstructBlipVideoVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipVideoVisionConfig()

>>> # Initializing a InstructBlipVideoVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipVideoVisionModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```instructblipvideo_vision_modelvision_config  hidden_sizei   intermediate_size'   num_hidden_layers   num_attention_heads   
image_size   
patch_sizegelu
hidden_actgư>layer_norm_epsg        attention_dropoutg|=initializer_rangeTqkv_bias N)__name__
__module____qualname____firstlineno____doc__
model_typebase_config_keyr   int__annotations__r   r   r   r   listtupler   r   strr   floatr    r!   r"   bool__static_attributes__r#       ږ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/instructblipvideo/configuration_instructblipvideo.pyr   r   !   s      2J%OK!s!s!!47Jd3i%S/1746Jd3i%S/16J NE %(us{($u$Hdr3   r   c                       \ rS rSr% SrSrSrSr\\	S'   Sr
\\	S'   S	r\\	S
'   S	r\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   S rg)!InstructBlipVideoQFormerConfigD   a  
cross_attention_frequency (`int`, *optional*, defaults to 2):
    The frequency of adding cross-attention to the Transformer layers.
encoder_hidden_size (`int`, *optional*, defaults to 1408):
    The hidden size of the hidden states for cross-attention.

Examples:

```python
>>> from transformers import InstructBlipVideoQFormerConfig, InstructBlipVideoQFormerModel

>>> # Initializing a InstructBlipVideo Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipVideoQFormerConfig()

>>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipVideoQFormerModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```instructblipvideo_qformerqformer_configi:w  
vocab_sizei   r      r   r   i   r   r   r   g?hidden_dropout_probattention_probs_dropout_probi   max_position_embeddings{Gz?r!   g-q=r   r   Npad_token_idr	   cross_attention_frequencyr   encoder_hidden_sizer#   )r$   r%   r&   r'   r(   r)   r*   r:   r+   r,   r   r   r   r   r   r/   r<   r0   r=   r>   r!   r   r@   rA   rB   r2   r#   r3   r4   r6   r6   D   s    ( -J&OJKs!!!s!J'**03 %#+3#&S&#u#!NE! L#* %&s&##r3   r6   c                      ^  \ rS rSr% SrSrSS0r\\\	S.r
Sr\\-  S-  \S'   Sr\\-  S-  \S	'   Sr\\-  S-  \S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   U 4S jrSrU =r$ )InstructBlipVideoConfign   a  
qformer_config (`dict`, *optional*):
    Dictionary of configuration options used to initialize [`InstructBlipVideoQFormerConfig`].
num_query_tokens (`int`, *optional*, defaults to 32):
    The number of query tokens passed through the Transformer.

Example:

```python
>>> from transformers import (
...     InstructBlipVideoVisionConfig,
...     InstructBlipVideoQFormerConfig,
...     OPTConfig,
...     InstructBlipVideoConfig,
...     InstructBlipVideoForConditionalGeneration,
... )

>>> # Initializing a InstructBlipVideoConfig with Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipVideoConfig()

>>> # Initializing a InstructBlipVideoForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipVideoForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

>>> # We can also initialize a InstructBlipVideoConfig from a InstructBlipVideoVisionConfig, InstructBlipVideoQFormerConfig and any PreTrainedConfig

>>> # Initializing Instructblipvideo vision, Instructblipvideo Q-Former and language model configurations
>>> vision_config = InstructBlipVideoVisionConfig()
>>> qformer_config = InstructBlipVideoQFormerConfig()
>>> text_config = OPTConfig()

>>> config = InstructBlipVideoConfig(vision_config=vision_config, qformer_config=qformer_config, text_config=text_config)
```instructblipvideovideo_token_idvideo_token_index)text_configr9   r   Nr   r9   rI       num_query_tokensg      ?initializer_factorr?   r!   c                 ~  > U R                   c)  [        S   " 5       U l         [        R                  S5        OY[	        U R                   [
        5      (       a:  U R                   R                  SS5      n[        U   " S0 U R                   D6U l         U R                  c%  [        5       U l        [        R                  S5        O9[	        U R                  [
        5      (       a  [        S0 U R                  D6U l        U R                  c%  [        5       U l	        [        R                  S5        O9[	        U R                  [
        5      (       a  [        S0 U R                  D6U l	        U R                  R                  U R                  l        U R                   R                  [        ;   U l        [         TU ]D  " S0 UD6  g )NoptzTtext_config is None. Initializing the text config with default values (`OPTConfig`).r)   z\qformer_config is None. Initializing the InstructBlipVideoQFormerConfig with default values.z``vision_config` is `None`. initializing the `InstructBlipVideoVisionConfig` with default values.r#   )rI   r
   loggerinfo
isinstancedictgetr9   r6   r   r   r   rB   r)   r   use_decoder_only_language_modelsuper__post_init__)selfkwargstext_model_type	__class__s      r4   rV   %InstructBlipVideoConfig.__post_init__   sT   #-e46DKKno(($//"..22<GO-o>RAQAQRD&"@"BDKKvw++T22"@"W4CVCV"WD%!>!@DKKr **D11!>!TASAS!TD262D2D2P2P//3/?/?/J/JNo/o,''r3   )r9   rI   rT   r   )r$   r%   r&   r'   r(   r)   attribute_mapr   r6   r   sub_configsr   rR   r   r,   r9   rI   rK   r+   rL   r0   r!   rH   rV   r2   __classcell__)rZ   s   @r4   rD   rD   n   s    "H %J%':;M!86K 59M4**T1859ND++d2926K((4/6c ###u#$(sTz(( (r3   rD   )rD   r6   r   N)huggingface_hub.dataclassesr   configuration_utilsr   models.auto.modeling_autor   utilsr   r   autor
   r   
get_loggerr$   rO   r   r6   rD   __all__r#   r3   r4   <module>rf      s   , / 3 J , - 
		H	% ?@$4   AB ?@%$%5 %$  A%$P ?@N(. N(  AN(b ir3   