
    Z j~1                     >   S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	  \" SS	9\ " S
 S\5      5       5       r
\" SS	9\ " S S\5      5       5       r\" SS	9\ " S S\5      5       5       r\" SS	9\ " S S\5      5       5       r\" SS	9\ " S S\5      5       5       r/ SQrg)zSAM2 model configuration    )strict   )PreTrainedConfig)auto_docstring   )CONFIG_MAPPING
AutoConfigzfacebook/sam2.1-hiera-tiny)
checkpointc                     ^  \ rS rSr% SrSrSrSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\\   -  S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\\   -  S-  \	S'   Sr\\   S-  \	S'   S	r\\	S'   Sr\\   S-  \	S'   Sr\\   S-  \	S'   Sr\\   S-  \	S'   Sr\\   S-  \	S'   Sr\\   S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   U 4S  jrS!r U =r!$ )"Sam2HieraDetConfig   a  
patch_kernel_size (`list[int]`, *optional*, defaults to `[7, 7]`):
    The kernel size of the patch.
patch_stride (`list[int]`, *optional*, defaults to `[4, 4]`):
    The stride of the patch.
patch_padding (`list[int]`, *optional*, defaults to `[3, 3]`):
    The padding of the patch.
query_stride (`list[int]`, *optional*, defaults to `[2, 2]`):
    The downsample stride between stages.
window_positional_embedding_background_size (`list[int]`, *optional*, defaults to `[7, 7]`):
    The window size per stage when not using global attention.
num_query_pool_stages (`int`, *optional*, defaults to 3):
    The number of query pool stages.
blocks_per_stage (`list[int]`, *optional*, defaults to `[1, 2, 7, 2]`):
    The number of blocks per stage.
embed_dim_per_stage (`list[int]`, *optional*, defaults to `[96, 192, 384, 768]`):
    The embedding dimension per stage.
num_attention_heads_per_stage (`list[int]`, *optional*, defaults to `[1, 2, 4, 8]`):
    The number of attention heads per stage.
window_size_per_stage (`list[int]`, *optional*, defaults to `[8, 4, 14, 7]`):
    The window size per stage.
global_attention_blocks (`list[int]`, *optional*, defaults to `[5, 7, 9]`):
    The blocks where global attention is used.
backbone_configsam2_hiera_det_model`   hidden_size   num_attention_headsr   num_channelsN
image_sizepatch_kernel_sizepatch_stridepatch_paddingquery_stride+window_positional_embedding_background_sizenum_query_pool_stagesblocks_per_stageembed_dim_per_stagenum_attention_heads_per_stagewindow_size_per_stageglobal_attention_blocksg      @	mlp_ratiogelu
hidden_actư>layer_norm_eps{Gz?initializer_rangec                   > U R                   b  U R                   OSS/U l         U R                  b  U R                  OSS/U l        U R                  b  U R                  OSS/U l        U R                  b  U R                  OSS/U l        U R                  b  U R                  OSS/U l        U R
                  b  U R
                  OSS/U l        U R                  b  U R                  O/ SQU l        U R                  b  U R                  O/ SQU l        U R                  b  U R                  O/ SQU l        U R                  b  U R                  O/ S	QU l	        U R                  b  U R                  O/ S
QU l
        [        TU ]0  " S0 UD6  g )N         r   r   )r   r   r*   r   )r           )r   r   r+      )r/   r+      r*   )   r*   	    )r   r   r   r   r   r   r   r   r   r   r    super__post_init__selfkwargs	__class__s     |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/sam2/configuration_sam2.pyr5    Sam2HieraDetConfig.__post_init__J   s   -1__-H$//tUYl;?;Q;Q;]!7!7deghci151B1B1ND--UVXYTZ373E3E3QT//XY[\W]151B1B1ND--UVXYTZ ??K <<Q 	8
 :>9N9N9Z 5 5`l(,(@(@(LD$$Re 	  372T2T2`D..fr 	* +/*D*D*PD&&Vc 	" -1,H,H,TD((Zc 	$ 	''    )r   r   r    r   r   r   r   r   r   r   r   )"__name__
__module____qualname____firstlineno____doc__base_config_key
model_typer   int__annotations__r   r   r   listr   r   r   r   r   r   r   r   r   r   r    r!   floatr#   strr%   r'   r5   __static_attributes____classcell__r9   s   @r:   r   r      s]   2 (O'JK  L#)-Jd3i$&-04sT#Y-4+/L#S	/D(/,0M3c?T)0+/L#S	/D(/DH/cT1AH!"3")-d3i$&-,0cT)06:!49t#3:.249t+204T#Y-4IuJ NE #u#( (r<   r   c                     ^  \ rS rSr% SrSrSrS\0rSr	\
\-  S-  \S'   Sr\\   S-  \S'   Sr\S-  \S'   S	r\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )Sam2VisionConfige   a  
backbone_channel_list (`List[int]`, *optional*, defaults to `[768, 384, 192, 96]`):
    The list of channel dimensions for the backbone.
backbone_feature_sizes (`List[List[int]]`, *optional*, defaults to `[[256, 256], [128, 128], [64, 64]]`):
    The spatial sizes of the feature maps from the backbone.
fpn_hidden_size (`int`, *optional*, defaults to 256):
    The hidden dimension of the FPN.
fpn_kernel_size (`int`, *optional*, defaults to 1):
    The kernel size for the convolutions in the neck.
fpn_stride (`int`, *optional*, defaults to 1):
    The stride for the convolutions in the neck.
fpn_padding (`int`, *optional*, defaults to 0):
    The padding for the convolutions in the neck.
fpn_top_down_levels (`List[int]`, *optional*, defaults to `[2, 3]`):
    The levels for the top-down FPN connections.
num_feature_levels (`int`, *optional*, defaults to 3):
    The number of feature levels from the FPN to use.
vision_configsam2_vision_modelr   Nbackbone_channel_listbackbone_feature_sizes   fpn_hidden_sizer   fpn_kernel_size
fpn_strider   fpn_paddingfpn_top_down_levelsr   num_feature_levelsr"   r#   r$   r%   r&   r'   c                    > U R                   c  / SQOU R                   U l         U R                  c  SS/SS/SS//OU R                  U l        U R                  c  SS/OU R                  U l        [        U R                  [
        5      (       aU  U R                  R                  SS5      U R                  S'   [        U R                  S      " S	0 U R                  D6U l        OU R                  c  [        5       U l        [        TU ](  " S	0 UD6  g )
N)r.   r-   r,   r   rS      @   r   r   rC   r   r3   )rQ   rR   rX   
isinstancer   dictgetr   r   r4   r5   r6   s     r:   r5   Sam2VisionConfig.__post_init__   s    #'#=#=#E4KeKe 	" 372M2M2Uc3Z#sb"X.[_[v[v 	# .2-E-E-MAq6SWSkSk d**D11151E1E1I1I,Xn1oD  .#1$2F2F|2T#U#mX\XlXl#mD !!)#5#7D ''r<   )rQ   r   rR   rX   )r=   r>   r?   r@   rA   rB   rC   r	   sub_configsr   r^   r   rE   rQ   rF   rD   rR   rT   rU   rV   rW   rX   rY   r#   rH   r%   rG   r'   r5   rI   rJ   rK   s   @r:   rM   rM   e   s    & &O$J:K 7;OT,,t3:.249t+2*.D4K.OSOSJK,0cT)0J NE #u#( (r<   rM   c                       \ rS rSr% SrSrSr\\S'   Sr	\\
\   -  \\\4   -  \S'   Sr\\
\   -  \\\4   -  \S	'   Sr\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Srg)Sam2PromptEncoderConfig   a=  
mask_input_channels (`int`, *optional*, defaults to 16):
    The number of channels to be fed to the `MaskDecoder` module.
num_point_embeddings (`int`, *optional*, defaults to 4):
    The number of point embeddings to be used.
scale (`float`, *optional*, defaults to 1):
    The scale factor for the prompt encoder.
prompt_encoder_configrS   r   r)   r      
patch_sizemask_input_channelsr+   num_point_embeddingsr"   r#   r$   r%   r   scaler3   N)r=   r>   r?   r@   rA   rB   r   rD   rE   r   rF   tuplerg   rh   ri   r#   rH   r%   rG   rj   rI   r3   r<   r:   rc   rc      s     .OK48Jd3i%S/1846Jd3i%S/16!! !#!J NE E3Nr<   rc   c                       \ rS rSr% SrSrSr\\S'   Sr	\
\S'   Sr\\S	'   S
r\\S'   Sr\\S'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Srg)Sam2MaskDecoderConfig   am  
mlp_dim (`int`, *optional*, defaults to 2048):
    The dimension of the MLP in the two-way transformer.
attention_downsample_rate (`int`, *optional*, defaults to 2):
    The downsample rate for the attention layers.
num_multimask_outputs (`int`, *optional*, defaults to 3):
    The number of multimask outputs.
iou_head_depth (`int`, *optional*, defaults to 3):
    The depth of the IoU head.
iou_head_hidden_dim (`int`, *optional*, defaults to 256):
    The hidden dimension of the IoU head.
dynamic_multimask_via_stability (`bool`, *optional*, defaults to `True`):
    Whether to use dynamic multimask via stability.
dynamic_multimask_stability_delta (`float`, *optional*, defaults to 0.05):
    The stability delta for the dynamic multimask.
dynamic_multimask_stability_thresh (`float`, *optional*, defaults to 0.98):
    The stability threshold for the dynamic multimask.
mask_decoder_configrS   r   r"   r#   i   mlp_dimr   num_hidden_layersr/   r   attention_downsample_rater   num_multimask_outputsiou_head_depthiou_head_hidden_dimTdynamic_multimask_via_stabilityg?!dynamic_multimask_stability_deltag\(\?"dynamic_multimask_stability_threshr3   N)r=   r>   r?   r@   rA   rB   r   rD   rE   r#   rH   rp   rq   r   rr   rs   rt   ru   rv   boolrw   rG   rx   rI   r3   r<   r:   rm   rm      s    & ,OKJGSs  %&s&!"3"NC"",0#T0/3%u304&4r<   rm   c                      ^  \ rS rSr% SrSr\\\S.r	Sr
\\-  S-  \S'   Sr\\-  S-  \S'   Sr\\-  S-  \S'   S	r\\S
'   U 4S jrSrU =r$ )
Sam2Config   a  
prompt_encoder_config (Union[`dict`, `Sam2PromptEncoderConfig`], *optional*):
    Dictionary of configuration options used to initialize [`Sam2PromptEncoderConfig`].
mask_decoder_config (Union[`dict`, `Sam2MaskDecoderConfig`], *optional*):
    Dictionary of configuration options used to initialize [`Sam2MaskDecoderConfig`].

Example:

```python
>>> from transformers import (
...     Sam2VisionConfig,
...     Sam2PromptEncoderConfig,
...     Sam2MaskDecoderConfig,
...     Sam2Model,
... )

>>> # Initializing a Sam2Config with `"facebook/sam2.1_hiera_tiny"` style configuration
>>> configuration = Sam2Config()

>>> # Initializing a Sam2Model (with random weights) from the `"facebook/sam2.1_hiera_tiny"` style configuration
>>> model = Sam2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

>>> # We can also initialize a Sam2Config from a Sam2VisionConfig, Sam2PromptEncoderConfig, and Sam2MaskDecoderConfig

>>> # Initializing SAM2 vision encoder, memory attention, and memory encoder configurations
>>> vision_config = Sam2VisionConfig()
>>> prompt_encoder_config = Sam2PromptEncoderConfig()
>>> mask_decoder_config = Sam2MaskDecoderConfig()

>>> config = Sam2Config(vision_config, prompt_encoder_config, mask_decoder_config)
```sam2)rO   re   ro   NrO   re   ro   r&   r'   c                   > [        U R                  [        5      (       aU  U R                  R                  SS5      U R                  S'   [        U R                  S      " S0 U R                  D6U l        O U R                  c  [        S   " 5       U l        [        U R
                  [        5      (       a  [        S0 U R
                  D6U l        OU R
                  c  [        5       U l        [        U R                  [        5      (       a  [        S0 U R                  D6U l        OU R                  c  [        5       U l        [        TU ](  " S0 UD6  g )NrC   rP   r3   )r]   rO   r^   r_   r   re   rc   ro   rm   r4   r5   r6   s     r:   r5   Sam2Config.__post_init__  s   d(($///3/A/A/E/ElTg/hD|,!/0B0B<0P!Q!gTXTfTf!gD'!/0C!D!FDd00$77)@)^4C]C])^D&''/)@)BD&d..55'<'Xt?W?W'XD$%%-'<'>D$''r<   )ro   re   rO   )r=   r>   r?   r@   rA   rC   r	   rc   rm   ra   rO   r^   r   rE   re   ro   r'   rG   r5   rI   rJ   rK   s   @r:   r{   r{      sx    !F J#!84K 59M4**T18<@4"22T9@:> 0047>#u#( (r<   r{   )r{   r   rM   rc   rm   N)rA   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   autor   r	   r   rM   rc   rm   r{   __all__r3   r<   r:   <module>r      s     . 3 # - 78I() I(  9I(X 786(' 6(  96(r 78.   9, 78!5, !5  9!5H 78A(! A(  9A(Hr<   