
    Z jy*                        S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
Jr  \" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       r/ SQrg)zSAM3 model configuration    )strict)CLIPTextConfig   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzfacebook/sam3)
checkpointc                     ^  \ rS rSr% SrSrSrSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\\\   -  \\\4   -  \	S'   Sr\\\   -  \\\4   -  \	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\   S-  \	S'   Sr\S-  \	S'   S r\\\   -  \\\4   -  \	S!'   Sr\\-  \	S"'   S#r\\	S$'   U 4S% jrS&rU =r $ )'Sam3ViTConfig   a  
rope_theta (`float`, *optional*, defaults to 10000.0):
    Base frequency for RoPE.
window_size (`int`, *optional*, defaults to 24):
    Window size for windowed attention.
global_attn_indexes (`list[int]`, *optional*, defaults to `[7, 15, 23, 31]`):
    Indexes of layers with global attention.
pretrain_image_size (`int`, *optional*, defaults to 336):
    Pretrained model image size for position embedding initialization.
hidden_dropout (`float`, *optional*, defaults to 0.0):
    Dropout probability for hidden states.
backbone_configsam3_vit_model   hidden_sizei  intermediate_size    num_hidden_layers   num_attention_headsr   num_channelsi  
image_size   
patch_sizegelu
hidden_actư>layer_norm_eps        attention_dropoutg     @
rope_theta   window_sizeNglobal_attn_indexeslayer_scale_init_valueiP  pretrain_image_sizehidden_dropout{Gz?initializer_rangec                 T   > [         TU ]  " S0 UD6  U R                  c
  / SQU l        g g )N)             )super__post_init__r%   selfkwargs	__class__s     |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/sam3/configuration_sam3.pyr2   Sam3ViTConfig.__post_init__>   s,    ''##+'6D$ ,    )r%   )!__name__
__module____qualname____firstlineno____doc__base_config_key
model_typer   int__annotations__r   r   r   r   r   listtupler   r   strr   floatr!   r"   r$   r%   r&   r'   r(   r*   r2   __static_attributes____classcell__r6   s   @r7   r   r      s$    (O!JK!s!s!!L#48Jd3i%S/1846Jd3i%S/16J NE %(us{(JK,0cT)0+/EDL/=@tCy5c?:@"%NECK%#u#7 7r9   r   c                      ^  \ rS rSr% SrSrSrS\0rSr	\
\-  S-  \S'   Sr\\S'   Sr\S-  \S	'   Sr\\   S-  \S
'   Sr\\S'   Sr\\S'   Sr\\S'   U 4S jr\S 5       r\R4                  S 5       rSrU =r$ )Sam3VisionConfigD   a  
fpn_hidden_size (`int`, *optional*, defaults to 256):
    The hidden dimension of the FPN.
backbone_feature_sizes (`List[List[int]]`, *optional*, defaults to `[[288, 288], [144, 144], [72, 72]]`):
    The spatial sizes (height, width) of the feature maps from the backbone at different scales.
scale_factors (`list[float]`, *optional*, defaults to `[4.0, 2.0, 1.0, 0.5]`):
    Scale factors for FPN multi-scale features. List of scaling factors for each FPN level.
vision_configsam3_vision_modelr   N   fpn_hidden_sizebackbone_feature_sizesscale_factorsr   r   r   r   r)   r*   c                   > U R                   c  / SQOU R                   U l         U R                  c  SS/SS/SS//U l        [        U R                  [        5      (       aU  U R                  R                  SS5      U R                  S'   [        U R                  S      " S0 U R                  D6U l        O U R                  c  [        S   " 5       U l        [        TU ]   " S0 UD6  g )N)g      @g       @g      ?g      ?i      H   r@   r   r0   )	rR   rQ   
isinstancer   dictgetr	   r1   r2   r3   s     r7   r2   Sam3VisionConfig.__post_init__^   s    595G5G5O1UYUgUg&&.,/:SzB8*LD'd**D11151E1E1I1I,Xh1iD  .#1$2F2F|2T#U#mX\XlXl#mD !!)#12B#C#ED ''r9   c                 .    U R                   R                  $ )z"Image size for the vision encoder.r   r   r4   s    r7   r   Sam3VisionConfig.image_sizek   s     ##...r9   c                 $    XR                   l        g)z-Set the image size and propagate to backbone.Nr[   r4   values     r7   r   r]   p   s     +0'r9   )r   rQ   rR   )r:   r;   r<   r=   r>   r?   r@   r
   sub_configsr   rW   r   rB   rP   rA   rQ   rC   rR   rF   r   rE   r   r*   r2   propertyr   setterrG   rH   rI   s   @r7   rK   rK   D   s     &O$J:K 7;OT,,t3:OS*.D4K.(,M4;%,J NE #u#( / / 0 0r9   rK   c                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\-  \S'   Sr\\S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Srg)Sam3GeometryEncoderConfigv   zW
roi_size (`int`, *optional*, defaults to 7):
    ROI size for box pooling operations.
sam3_geometry_encoderrO   r   r   
num_layers   r      r   皙?dropoutrelur   r    r(   r   r   r,   roi_sizer)   r*   r0   N)r:   r;   r<   r=   r>   r@   r   rA   rB   rh   r   r   rl   rF   r   rE   r(   r   rn   r*   rG   r0   r9   r7   re   re   v   s}    
 )JKJ  !s!GUS[J"%NECK% NE Hc#u#r9   re   c                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\-  \S'   Sr\\S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   Srg)Sam3DETREncoderConfig   zc
hidden_dropout (`float`, *optional*, defaults to 0.0):
    Dropout probability for hidden states.
sam3_detr_encoderrO   r      rh   ri   r   rj   r   rk   rl   rm   r   r    r(   r   r   r)   r*   r0   N)r:   r;   r<   r=   r>   r@   r   rA   rB   rh   r   r   rl   rF   r   rE   r(   r   r*   rG   r0   r9   r7   rp   rp      ss    
 %JKJ  !s!GUS[J"%NECK% NE #u#r9   rp   c                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\S'   Sr\\-  \S'   Sr\\S'   Sr\\-  \S'   Sr\\S'   Sr\\S'   Srg)Sam3DETRDecoderConfig   zQ
num_queries (`int`, *optional*, defaults to 200):
    Number of object queries.
sam3_detr_decoderrO   r   rs   rh      num_queriesri   r   rj   r   rk   rl   rm   r   r    r(   r   r   r)   r*   r0   N)r:   r;   r<   r=   r>   r@   r   rA   rB   rh   ry   r   r   rl   rF   r   rE   r(   r   r*   rG   r0   r9   r7   ru   ru      s}    
 %JKJK  !s!GUS[J"%NECK% NE #u#r9   ru   c                   x    \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\-  \S'   Sr\\S'   Sr\\S'   Srg)Sam3MaskDecoderConfig   zw
num_upsampling_stages (`int`, *optional*, defaults to 3):
    Number of upsampling stages in the pixel decoder (FPN).
sam3_mask_decoderrO   r   r   num_upsampling_stagesr   r   r    rl   ri   r   r)   r*   r0   N)r:   r;   r<   r=   r>   r@   r   rA   rB   r~   r   rF   rl   r   r*   rG   r0   r9   r7   r{   r{      sQ    
 %JK!"3" NE GUS[  #u#r9   r{   c                   &  ^  \ rS rSr% SrSrSr\\\	\
\\S.rSr\\-  S-  \S'   Sr\\-  S-  \S'   Sr\\-  S-  \S	'   Sr\\-  S-  \S
'   Sr\\-  S-  \S'   Sr\\-  S-  \S'   Sr\\S'   U 4S jr\S 5       r\R8                  S 5       rSrU =r$ )
Sam3Config   a  
geometry_encoder_config (`dict` or `Sam3GeometryEncoderConfig`, *optional*):
    Configuration for the geometry encoder.
detr_encoder_config (`dict` or `Sam3DETREncoderConfig`, *optional*):
    Configuration for the DETR encoder.
detr_decoder_config (`dict` or `Sam3DETRDecoderConfig`, *optional*):
    Configuration for the DETR decoder.
mask_decoder_config (`dict` or `Sam3MaskDecoderConfig`, *optional*):
    Configuration for the mask decoder.

Example:
```python
>>> from transformers import Sam3Config, Sam3Model

>>> # Initializing a SAM3 configuration
>>> configuration = Sam3Config()

>>> # Initializing a model from the configuration
>>> model = Sam3Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```
sam3T)rM   text_configgeometry_encoder_configdetr_encoder_configdetr_decoder_configmask_decoder_configNrM   r   r   r   r   r   r)   r*   c                 6  > U R                   c  [        5       U l         [        U R                   [        5      (       a  [        S
0 U R                   D6U l         U R                  c  [        S
0 SSSSSSSSS	.D6U l        [        U R                  [        5      (       a  [        S
0 U R                  D6U l        U R                  c  [        5       U l        [        U R                  [        5      (       a  [        S
0 U R                  D6U l        U R                  c  [        5       U l        [        U R                  [        5      (       a  [        S
0 U R                  D6U l        U R                  c  [        5       U l
        [        U R                  [        5      (       a  [        S
0 U R                  D6U l
        U R                  c  [        5       U l        [        U R                  [        5      (       a  [        S
0 U R                  D6U l        [        TU ]<  " S
0 UD6  g )Ni   r   i   i   r#   r   r   r   )
vocab_sizer   r   projection_dimr   r   max_position_embeddingsr   r0   )rM   rK   rV   rW   r   r   r   re   r   rp   r   ru   r   r{   r1   r2   r3   s     r7   r2   Sam3Config.__post_init__   s   %!1!3Dd(($//!1!GD4F4F!GD#-  "'#')-&))++-/1"(	 D d&&---A0@0@AD''/+D+FD(d22D99+D+dtGcGc+dD(##+'<'>D$d..55'<'Xt?W?W'XD$##+'<'>D$d..55'<'Xt?W?W'XD$##+'<'>D$d..55'<'Xt?W?W'XD$''r9   c                 .    U R                   R                  $ )zImage size for the SAM3 model.rM   r   r\   s    r7   r   Sam3Config.image_size$  s     !!,,,r9   c                 $    XR                   l        g)z2Set the image size and propagate to vision config.Nr   r_   s     r7   r   r   )  s     ).%r9   )r   r   r   r   r   rM   )r:   r;   r<   r=   r>   r@   is_compositionrK   r   re   rp   ru   r{   ra   rM   rW   r   rB   r   r   r   r   r   r*   rF   r2   rb   r   rc   rG   rH   rI   s   @r7   r   r      s    2 JN)%#<444K 59M4**T1826K((4/6>BT$44t;B:> 0047>:> 0047>:> 0047>#u#*(X - - . .r9   r   )r   r   rK   re   rp   ru   r{   N)r>   huggingface_hub.dataclassesr   transformersr   configuration_utilsr   utilsr   autor	   r
   r   rK   re   rp   ru   r{   r   __all__r0   r9   r7   <module>r      sE    . ' 3 # - ?+&7$ &7  ,&7R ?+-0' -0  ,-0` ?+$ 0 $  ,$( ?+$, $  ,$& ?+$, $  ,$( ?+$, $  ,$  ?+a.! a.  ,a.Hr9   