
    Z jF                     X    S SK JrJrJrJr  S SKJr  SSKJr  SS0r	 " S S\5      r
S/rg	)
    )	Tokenizerdecodersnormalizerspre_tokenizers)BPE   )TokenizersBackendtokenizer_fileztokenizer.jsonc                      ^  \ rS rSrSr\rSrSS/r\	r
       SS\\\\4   -  S-  S\\\   -  S-  S	\S
\S\S\S\4U 4S jjjrSrU =r$ )Siglip2Tokenizer   zF
Gemma tokenizer + SigLIP2 training default: lowercase normalization.
left	input_idsattention_maskNvocabmerges	unk_token	bos_token	eos_token	pad_token
mask_tokenc                   > Uc9  [        U5      S[        U5      S[        U5      S[        U5      S[        U5      S0nXl        U=(       d    / U l        [        [	        U R                  U R                  S[        U5      S SS95      U l        [        R                  " SS	S
S9U R
                  l        [        R                  " [        R                  " SS5      [        R                  " 5       [        R                  " 5       /5      U R
                  l        [        R                  " SS5      U R
                  l        ["        T
U ]H  " SUUUUUS.UD6  ['        U S5      (       aO  [)        U R*                  [,        5      (       a0  U R*                  R/                  SU R0                  R2                  5        [5        U SS 5      n	U	bI  U	R                   b;  [        R                  " [        R6                  " 5       U	R                   /5      U	l        g g g )Nr         r      T)r   r   fuse_unkr   dropoutbyte_fallback merged_with_previousF)patternbehaviorinvertu   ▁)r   r   r   r   r   init_kwargstokenizer_class
_tokenizer )str_vocab_mergesr   r   r&   r   Splitpre_tokenizerr   SequenceReplaceByteFallbackFusedecoderr   
normalizersuper__init__hasattr
isinstancer$   dict
setdefault	__class____name__getattr	Lowercase)selfr   r   r   r   r   r   r   kwargsbackendr9   s             ځ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/siglip2/tokenization_siglip2.pyr4   Siglip2Tokenizer.__init__)   s    =IIIIJE |#kk||i."	
 )7(<(<"8)
% #+"3"3eS)8+@+@+BHMMOT#
 &1%8%8e%D" 	
!	
 	
 4''Jt7G7G,N,N''(94>>;R;RS$d37#5#5#A!,!5!5{7L7L7NPWPbPb6c!dG $B    )r*   r&   r)   )NNz<unk>z<bos>z<eos>z<pad>z<mask>)r:   
__module____qualname____firstlineno____doc__VOCAB_FILES_NAMESvocab_files_namespadding_sidemodel_input_namesr   modelr(   r7   intlistr4   __static_attributes____classcell__)r9   s   @r@   r   r      s     *L$&67E .2)-    "7eT#s(^#d*7e d3i$&7e 	7e
 7e 7e 7e 7e 7erB   r   N)
tokenizersr   r   r   r   tokenizers.modelsr   tokenization_utils_tokenizersr	   rG   r   __all__r'   rB   r@   <module>rT      s>   , H G ! > &'78 Ae( AeH 
rB   