
    Z j                         S r SSKJrJrJrJrJrJr  SSKJ	r	  SSK
Jr  SSKJr  \R                  " \5      rSSS	S
.rSS0rSr " S S\5      rS/rg)zTokenization classes for Qwen2.    )
AddedTokenRegex	Tokenizerdecodersnormalizerspre_tokenizers)BPE   )TokenizersBackend)loggingz
vocab.jsonz
merges.txtztokenizer.json)
vocab_filemerges_filetokenizer_filezqwen/qwen-tokenizeri   zn(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+c                      ^  \ rS rSr\rSS/r\r       SS\	\
\	\4   -  S-  S\	\\	   -  S-  S\	S\	S	\	4
U 4S
 jjjrSrU =r$ )Qwen2Tokenizer$   	input_idsattention_maskNvocabmerges	unk_token	eos_token	pad_tokenc                   > Ub  UOSU l         Ub  UOSS0U l        U=(       d    / U l        [        [	        U R                  U R                  S S SSSSS95      U l        [        R                  " 5       U R
                  l        [        R                  " 5       U R
                  l        [        R                  " [        R                  " [        [         5      SSS9[        R                  " U R                   SS9/5      U R
                  l        [$        T
U ]L  " SUUUUUS	.UD6  U R)                  U R*                   V	s/ s H  n	[-        U	S
S9PM     sn	5        g s  sn	f )NF<|endoftext|>r    )r   r   dropoutr   continuing_subword_prefixend_of_word_suffixfuse_unkbyte_fallbackisolated)behaviorinvert)add_prefix_space	use_regex)r   	bos_tokenr   r   r%   T)special )r%   _vocab_mergesr   r	   
_tokenizerr   	ByteLeveldecoderr   NFC
normalizerr   SequenceSplitr   PRETOKENIZE_REGEXpre_tokenizersuper__init__
add_tokensall_special_tokensr   )selfr   r   r   r'   r   r   r%   kwargstoken	__class__s             }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/qwen2/tokenization_qwen2.pyr6   Qwen2Tokenizer.__init__)   sS    5E4P 0V[      	 |#kk||*,#%#	
 #+"4"4"6%0__%6"(6(?(?$$+,' 
 ((%)%:%:#
)
% 	 	
-	
 	
 	dF]F]^F]UE48F]^_^s   :E)r+   r,   r*   r%   )NNr   Nr   r   N)__name__
__module____qualname____firstlineno__VOCAB_FILES_NAMESvocab_files_namesmodel_input_namesr	   modelstrdictintlistr6   __static_attributes____classcell__)r<   s   @r=   r   r   $   s    )$&67E .2)-(((9`T#s(^#d*9` d3i$&9` 	9` 9` 9` 9`    r   N)__doc__
tokenizersr   r   r   r   r   r   tokenizers.modelsr	   tokenization_utils_tokenizersr   utilsr   
get_loggerr?   loggerrC   MAX_MODEL_INPUT_SIZESr3   r   __all__r)   rM   r=   <module>rW      sn    & Z Z ! >  
		H	% &  /6  J >`& >`B 
rM   