
    Z jӶ                     |    % S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJrJrJr  SSKJr  SSKJr  SSKJr  SSK J!r!J"r"J#r#J$r$J%r%  \" 5       (       a  SSK&J'r'  OSr'\" 5       (       a  SSK(J)r)  OSr)\RT                  " \+5      r,0 r-\.\/\0\   4   \1S'   0 r2\.\/\0\   4   \1S'   \\/\/S-  4   " / S\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS \" 5       (       a  SOS4PS!\" 5       (       a  S"OS4PS#\" 5       (       a  S$OS4PS%PS&\" 5       (       a  SOS4PS'\" 5       (       a  S(OS4PS)PS*PS+\" 5       (       a  S,OS4PS-\" 5       (       a  S.OS4PS/PS0\" 5       (       a  S1OS4PS2PS3\" 5       (       a  SOS4PS4\" 5       (       a  S5OS4PS6PS7\" 5       (       a  SOS4PS8PS9\" 5       (       a  S:OS4PS;PS<\" 5       (       a  SOS4PS=PS>\" 5       (       a  SOS4PS?\" 5       (       a  SOS4PS@PSA\" 5       (       a  SBOS4PSC\" 5       (       a  S5OS4PSD\" 5       (       a  SOS4PSE\" 5       (       a  SOS4PSF\" 5       (       a  SOS4PSG\" 5       (       a  SOS4PSH\" 5       (       a  SIOS4PSJPSKPSLPSMPSN\" 5       (       a  S5OS4PSO\" 5       (       a  SPOS4PSQ\" 5       (       a  SROS4PSSPST\" 5       (       a  SOS4PSU\" 5       (       a  SVOS4PSW\" 5       (       a  SOS4PSX\" 5       (       a  S5OS4PSY\" 5       (       a  SOS4PSZPS[\" 5       (       a  S\OS4PS]\" 5       (       a  S^OS4PS_PS`\" 5       (       a  SOS4PSa\" 5       (       a  S5OS4PSb\" 5       (       a  ScOS4PSd\" 5       (       a  SeOS4PSfPSg\" 5       (       a  ShOS4PSi\" 5       (       a  SjOS4PSk\" 5       (       a  SjOS4PSl\" 5       (       a  SjOS4PSm\" 5       (       a  SjOS4PSn\" 5       (       a  SjOS4PSo\" 5       (       a  SjOS4PSp\" 5       (       a  SOS4PSq\" 5       (       a  SrOS4PSs\" 5       (       a  SrOS4PSt\" 5       (       a  SrOS4PSu\" 5       (       a  SrOS4PSv\" 5       (       a  SrOS4PSw\" 5       (       a  SrOS4PSx\" 5       (       a  SrOS4PSy\" 5       (       a  SrOS4PSz\" 5       (       a  SrOS4PS{\" 5       (       a  S|OS4PS}\" 5       (       a  S5OS4PS~\" 5       (       a  S5OS4PS\" 5       (       a  S5OS4PS\" 5       (       a  S\OS4PSPS\" 5       (       a  S5OS4PSPSPSPSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PSPSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S5OS4PS\" 5       (       a  S5OS4PS\" 5       (       a  SOS4PS\" 5       (       a  S5OS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S"OS4PS\" 5       (       a  S"OS4PSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S\OS4PS\" 5       (       a  S\OS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PSPS\" 5       (       a  SrOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S\OS4PSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S\OS4PS\" 5       (       a  S\OS4PS\" 5       (       a  SrOS4PS\" 5       (       a  SrOS4PS\" 5       (       a  S\OS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S5OS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  S.OS4PS\" 5       (       a  S.OS4PSPS\" 5       (       a  S5OS4PSPS\" 5       (       a  SOS4PS\
" 5       (       a  SO\" 5       (       a  SrOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PSPS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PS\" 5       (       a  SOS4PGS \" 5       (       a  SOS4PGS\" 5       (       a  SOS4PGSPGS\" 5       (       a  SOS4PGS\" 5       (       a  SjOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  GSOS4PGS	\" 5       (       a  SOS4PGS
PGSPGSPGS\" 5       (       a  GSOS4PGS\" 5       (       a  S\OS4PGS\" 5       (       a  SOS4PGS\" 5       (       a  SOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  SjOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  GSOS4PGS\" 5       (       a  GSOS4PGSPGS\" 5       (       a  SOS4PGS \" 5       (       a  S\OS4PGS!\" 5       (       a  S5OS4PGS"\" 5       (       a  SOS4PGS#\" 5       (       a  SOS4PGS$\" 5       (       a  SjOS4PGS%PGS&\" 5       (       a  SOS4PGS'\" 5       (       a  SOS4PGS(\" 5       (       a  GS)OS4PGS*\" 5       (       a  SOS4PGS+PGS,PGS-\" 5       (       a  SOS4PGS.\" 5       (       a  SOS4PGS/PGS0\
" 5       (       a  SO\" 5       (       a  SrOS4PGS1\
" 5       (       a  SO\" 5       (       a  SrOS4PGS2PGS3PGS4PGS5PGS6\" 5       (       a  GS7OS4PGS8\" 5       (       a  SOS4PGS9\" 5       (       a  GS:OS4PGS;PGS<\" 5       (       a  SOS4PGS=\" 5       (       a  SOS4PGS>\" 5       (       a  GS?OS4PGS@\" 5       (       a  S\OS4PGSA\" 5       (       a  SOS4PGSB\" 5       (       a  SOS4P5      r31 GSCkr4\5\/   \1GSD'   \4 H  r6\6\3;  d  M  \" 5       (       a  SrOS\3\6'   M      \" \!\35      r7\!Rp                  " 5        V Vs0 s H  u  pX_M	     snn r9GSE r:GSF r;GSG\/GSH\0\   S-  4GSI jr<       GSVGSJ\/\Rz                  \/   -  GSK\/\Rz                  \/   -  S-  GSL\>GSM\.\/\/4   S-  GSN\>\/-  S-  GSO\/S-  GSP\>GSQ\/GSH\.\/\4   4GSR jjr? " GSS GST5      r@GSUGST/rAgs  snn f (W  zAuto Tokenizer class.    N)OrderedDict)Any)is_mistral_common_available   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendREGISTERED_TOKENIZER_CLASSESREGISTERED_FAST_ALIASESaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizer)bartphoBartphoTokenizerbertzbert-generationBertGenerationTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer)biogptBioGptTokenizer
blenderbotBlenderbotTokenizer)zblenderbot-smallBlenderbotSmallTokenizerblipzblip-2GPT2Tokenizer)bridgetowerr+   bros)byt5ByT5Tokenizer	camembertCamembertTokenizer)canineCanineTokenizerchinese_clip)clapr+   clipclipseg)clvpClvpTokenizer
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr+   dbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer)diaDiaTokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3ernie)esmEsmTokenizerfalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubertFlaubertTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizer)fsmtFSMTTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japaneseGPTNeoXJapaneseTokenizergptj)graniter?   )
granitemoer?   )granitemoehybridr?   )granitemoesharedr?   zgrounding-dinogroupvitherbertHerbertTokenizer)hubertr[   )ibertr+   ideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2jina_embeddings_v3XLMRobertaTokenizerzkosmos-2lasr_ctcLasrTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformer)lukeLukeTokenizerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizer)megar+   zmegatron-bert
metaclip_2)zmgp-strMgpstrTokenizerminicpmv4_6	ministralMistralCommonBackend
ministral3mistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermpt)mrar+   mt5T5Tokenizermusicgenmusicgen_melodymvpMvpTokenizer)myt5MyT5TokenizernezhanllbNllbTokenizerznllb-moe
nomic_bertnougatNougatTokenizernystromformerolmoolmo2olmo3olmo_hybridolmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitpegasus	pegasus_x)	perceiverPerceiverTokenizerphi)phobertPhobertTokenizer
pix2structpixtralplbartPLBartTokenizerpp_formulanet)
prophetnetProphetNetTokenizerqdqbertqianfan_ocrqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3qwen3_5Qwen3_5Tokenizerqwen3_5_moe	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)ragRagTokenizerrealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribert)robertar+   )zroberta-prelayernormr+   )roc_bertRoCBertTokenizerroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizer)splinterSplinterTokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemma)tapasTapasTokenizertrocrtvpudopUdopTokenizerumt5)	unispeechr[   )zunispeech-satr[   viltvisual_bert)vitsVitsTokenizervoxtralvoxtral_realtime)wav2vec2r[   )zwav2vec2-bertr[   )zwav2vec2-conformerr[   )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizerwhisperWhisperTokenizerxclipxglmXGLMTokenizer)xlmXLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyoso>%   fuyuphi3jambajanusllavamolmonvfp4r  arcticchatlmmolmo2phi3_vphimoeopencuaopenvlastep3p5minicpmvnemotronstep3_vlvipllava	chameleon	internlm2
cohere_asr
h2ovl_chat
llava_next
minimax_m2
modernbertdeepseek_v2deepseek_v3deepseek_v4deepseek_vldeepseek_ocrdeepseek_ocr2internvl_chatdeepseek_vl_v2hyperclovax_vlmdeepseek_vl_hybrid)MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASSc                 z    [        U SSS9 n[        R                  " U5      sSSS5        $ ! , (       d  f       g= f)z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders     {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/auto/tokenization_auto.py
load_vocabr    s(    	j#	0Fyy  
1	0	0s   ,
:c                    / n[        U SSS9 nU H\  nUR                  5       nU(       d  M  UR                  S5      (       a  M4  UR                  [	        UR                  5       5      5        M^     SSS5        U$ ! , (       d  f       U$ = f)z Loads a merges file into a list.rx  ry  rz  #N)r|  strip
startswithappendtuplesplit)merges_filemergesr  lines       r  load_mergesr    sp    F	k3	1VD::<DtDOOC00eDJJL12  
2
 M 
2	1
 Ms   A;A;,A;;
B

class_namereturnc                    U S;   a  [         $ U [        ;   a	  [        U    $ U [        ;   a	  [        U    $ U S:X  a  [         $ [        R	                  5        H  u  pX :X  d  M  [        U5      nUS;   a  U S:X  a  [        R                  " SS5      nO[        R                  " SU 3S5      n [        X05      n[        US	S 5      =n(       ac  U[        R                  ;   aO  [        R                  U   n[        XdR                  S
-   U5        [        R                  R                  US-   U5        Us  $    [        R                   R#                  5        H  n[        USS 5      U :X  d  M  Us  $    [        R                  " S5      n[%        X5      (       a  [        X5      $ U R'                  S
5      (       a  [)        U S S 5      $ g ! [         a     GM|  f = f)N>   BloomTokenizerBloomTokenizerFastr   )r   r   r   r   r   r   rA  r   z.tokenization_mistral_commontransformers.ztransformers.models
__module__Fast_fast__name__)r   r   r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrsysmodulessetattrr  
setdefaultAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattrendswithtokenizer_class_from_name)	r  module_nametokenizer_classmoduleresultsubmodbase_mod	tokenizermain_modules	            r  r  r    s   ==  ,,&z2211+J77((   )@(E(E(G$(3K@Krr"88"001OQ_`"001[M1BDYZ	 4%flDAAFAvQTQ\Q\G\"{{62HHoo&>GKK**6G+;XF# )H* '55<<>	9j$/:= ? )).9K{''{// 6""(CR99% " s   )BG
GGpretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_only	subfolderc                 "   UR                  S5      n	[        U [        UUUUUUUSSSU	S9n
U
c  [        R	                  S5        0 $ [        X5      n	[        U
SS9 n[        R                  " U5      nSSS5        U	WS'   U$ ! , (       d  f       N= f)a
  
Loads the tokenizer configuration from a pretrained model tokenizer configuration.

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a configuration file saved using the
          [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
        cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force to (re-)download the configuration files and override the cached versions if they
        exist.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
    token (`str` or *bool*, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
        when running `hf auth login` (stored in `~/.huggingface`).
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    local_files_only (`bool`, *optional*, defaults to `False`):
        If `True`, will only try to load the tokenizer configuration from local files.
    subfolder (`str`, *optional*, defaults to `""`):
        In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
        specify the folder name here.

<Tip>

Passing `token=True` is required when you want to use a private model.

</Tip>

Returns:
    `dict`: The configuration of the tokenizer.

Examples:

```python
# Download configuration from huggingface.co and cache.
tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
# This model does not have a tokenizer config so the result will be an empty dict.
tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

# Save a pretrained tokenizer locally and you can reload its config
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer.save_pretrained("tokenizer-test")
tokenizer_config = get_tokenizer_config("tokenizer-test")
```_commit_hashF)r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.ry  rz  )	getr   r   loggerinfor   r|  r}  r~  )r  r  r  r  r  r  r  r  kwargscommit_hashresolved_config_filer  r  s                r  get_tokenizer_configr    s    J **^,K&%%))..305  #rs	%&:HK	"W	56" 
6(F>M 
6	5s   B  
Bc                   h    \ rS rSrSrS r\\" \5      S\	\
-  4S j5       5       r\ S	S j5       rSrg)
AutoTokenizeri4  a  
This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
created with the [`AutoTokenizer.from_pretrained`] class method.

This class cannot be instantiated directly using `__init__()` (throws an error).
c                     [        S5      e)Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    r  __init__AutoTokenizer.__init__<  s    _
 	
    r  c           	      B   UR                  SS5      nSUS'   UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  S5      nUbx  [        R                  US5      n	U	c,  [        S	U S
SR	                  S [         5       5       S35      e[        U	5      n
U
c  [        SU	 S35      eU
R                  " U/UQ70 UD6$ U(       a0  [        X40 UD6n[        USS9S   n[        R                  " S+0 UD6nOUc   [        R                  " U4SU0UD6nUR                  n[        U40 UD6nUR                  SS5      nSnSU;   a9  [        US   [         ["        45      (       a  US   nOUS   R                  SS5      nUc  Ub  Ub  US:w  a  [        R                  U5      b  [        R                  U5      R%                  S5      UR%                  S5      :w  a  [        R                  U5      R%                  S5      nUS;  aB  U[&        ;   a  UOUn[        U5      n
U
b&  U
R(                  S;  a  U
R                  " U/UQ70 UD6$ [*        b  [*        R                  " U/UQ70 UD6$ [        SU S35      eSU;   a  US   US'   U(       a  UR-                  S5      (       a  USS nUSLn[/        U5      [0        ;   =(       d/    USL=(       a$    [        U5      SL=(       d    [        US-   5      SLnU=(       ab    [/        U5      [0        ;  =(       aI    USL=(       a>    [        U5      =(       d    [        US-   5      R2                  R5                  S5      (       + nU(       a  U[&        ;   a  SnSnU(       a<  US   b  US   nOUS   nSU;   a  UR7                  S5      S   nOSn[9        XqUUU5      nU(       av  U(       ao  U(       dh  U(       a  [        UR%                  S5      5        [;        WU40 UD6n
UR                  S S5      nU
R=                  5         U
R                  " U/UQ7SU0UD6$ Ubl  Un[        U5      n
U
c$  UR-                  S5      (       d  [        US-   5      n
U
b  U
R(                  S!:X  a  [*        n
U
c  [*        n
U
R                  " U/UQ70 UD6$ [?        USS5      (       aN  UR@                  nS"U;  a  UR-                  S5      (       a  USS n[        U5      n
U
R                  " U/UQ70 UD6$ [        U[B        5      (       a{  [/        URD                  5      [/        URF                  5      LaD  [H        RK                  S#URF                  RL                   S$URD                  RL                   S%35        URF                  n[O        [/        U5      R(                  5      =(       d    [?        US&S5      nUb<  [0        R                  [/        U5      [*        5      n
U
b  U
R                  " U/UQ70 UD6$ UR                  SS5      nUb  US':w  a  UR-                  S5      (       a  USS n[        U5      n
U
c$  UR-                  S5      (       d  [        US-   5      n
U
b  U
R(                  S!:X  a  [*        n
U
c  [*        n
U
R                  " U/UQ70 UD6$ [        S(URL                   S)SR	                  S* [0         5       5       S35      e! [        [        4 a    [        R                  " U40 UD6n GNf = f),a  
Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
falling back to using pattern matching on `pretrained_model_name_or_path`:

List options

Params:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        Can be either:

            - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
            - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
              using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
            - a path to a single saved vocabulary file if and only if the tokenizer only requires a
              single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
              applicable to all derived classes)
    inputs (additional positional arguments, *optional*):
        Will be passed along to the Tokenizer `__init__()` method.
    config ([`PreTrainedConfig`], *optional*)
        The configuration object used to determine the tokenizer class to instantiate.
    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the
        standard cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download the model weights and configuration files and override the
        cached versions if they exist.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    subfolder (`str`, *optional*):
        In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
        facebook/rag-token-base), specify it here.
    tokenizer_type (`str`, *optional*):
        Tokenizer type to be loaded.
    backend (`str`, *optional*, defaults to `"tokenizers"`):
        Backend to use for tokenization. Valid options are:
        - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
        - `"sentencepiece"`: Use the SentencePiece backend
    trust_remote_code (`bool`, *optional*, defaults to `False`):
        Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
        should only be set to `True` for repositories you trust and in which you have read the code, as it will
        execute code present on the Hub on your local machine.
    kwargs (additional keyword arguments, *optional*):
        Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
        `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
        `additional_special_tokens`. See parameters in the `__init__()` for more details.

Examples:

```python
>>> from transformers import AutoTokenizer

>>> # Download vocabulary from huggingface.co and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

>>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

>>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
>>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

>>> # Download vocabulary from huggingface.co and define model-specific arguments
>>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

>>> # Explicitly use the tokenizers backend
>>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

>>> # Explicitly use the sentencepiece backend
>>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3   $   #    U  H  ov   M     g 7fN .0cs     r  	<genexpr>0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s      D,Cq,Cs   r  zTokenizer class z is not currently imported.F)return_tensorsr  auto_mapr   r  )r   PythonBackendPreTrainedTokenizerFastzTokenizer class 'zf' specified in the tokenizer config was not found. The tokenizer may need to be converted or re-saved.r  r  ztransformers.r   r   z--code_revisionr  r  z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typer   z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   8   #    U  H  oR                   v   M     g 7fr  )r  r  s     r  r  r  W  s     4[IZAZZIZs   r  )(popr  r  
ValueErrorjoinr  from_pretrainedr   r
   r   	for_modelr  r   r  r  
isinstancer  listremovesuffixrv  r  r   r  typer  r  r  r  r	   r   register_for_auto_classr  r  r   decoderencoderr  warning	__class__r   )clsr  inputsr  r  _r  r  r  tokenizer_class_namer  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_mapregistered_class_namer  has_remote_codehas_local_codeexplicit_local_code	class_refupstream_repotokenizer_class_candidate_classr  s                              r  r  AutoTokenizer.from_pretrainedB  s   d Hd+#| JJz4($4d;"JJ':DAJJ{+	 %#:#>#>~t#T #+ .~.>>qyy D,C DDEQH 
 88LMO& #34H3IId!eff"223PdSYd]cdd#$AWPVWI.yOPXYK))8K8F^c#331EVZ` #-- 00MXQWX!1!5!56G!N "))*:6FF%5j%A"%5j%A%E%EoW[%\"
 &&2!-!R''++,=>J(,,->?LLVT&33F;= %<$?$?@Q$R$_$_`f$g!$,mm ),UU */ 
 #<J"G".?3K3K T 4
 +::;Xl[alekll ,(889VjY_jcijj#$:#; <F G 
 --%5n%EF>"!&<&E&Ef&M&M%;CR%@",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	  V$55 'd2 9-.DE R01G&1PQ*ZZ89 	 04]]#O!%!!$0.q1	.q1	y  ) 5a 8 $ 9!.Racp! 09L%)*@*M*Mf*UV;IGdohnoO

?D1A335"22-06J[_e  $/(>%78QRO&/H/Q/QRX/Y/Y";<UX^<^"_*/G/G?/Z"3&"3"223PdSYd]cddV.55++F(66??6;R;R7?O"223PdSYd]cdd f233FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EFm'RXZfhlJm
!/33DLBSTO*&667ThW]haghh "2!5!56G!N!-%)<<AWA`A`agAhAh)?)D&78NOO&/E/N/Nv/V/V";<RU[<["\*/G/G?/Z"3&"3"223PdSYd]cdd/0@0@/A B++/994[IZ4[+[*\\]_
 	
} ( c)99:Wb[abcs   [3 3'\\Nc                     Uc  Ub  UnOUb  UnO[        S5      eX#U4 H  nUc  M  U[        UR                  '   M     Ub  Ub  U[        UR                  '   [        R                  XUS9  g)a|  
Register a new tokenizer in this mapping.

Args:
    config_class ([`PreTrainedConfig`]):
        The configuration corresponding to the model to register.
    tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
    slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
    fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
Nz$You need to pass a `tokenizer_class`)exist_ok)r  r   r  r   r  register)config_classr  slow_tokenizer_classfast_tokenizer_classr  	candidates         r  r  AutoTokenizer.registerZ  s     "#/"6%1"6 !GHH.oVI$CL,Y-?-?@ W  +0D0PEY#$8$A$AB""<8"Tr  r  )NNNF)r  r  __qualname____firstlineno____doc__r  classmethodr   r  r   r   r  staticmethodr  __static_attributes__r  r  r  r  r  4  sZ    
 &'>?T
	1	1T
 @ T
l kpU Ur  r  r  )NFNNNFr  )Br  r  r}  osr  collectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r	   modeling_gguf_pytorch_utilsr
   tokenization_utils_baser   utilsr   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerr  r  r   dictstrr  __annotations__r   r  rv  setr  r  r  CONFIG_TO_TYPEr  r  r  PathLikeboolr  r  __all__)kvs   00r  <module>r.     s      	 
 #  G 3 \ ? <  % 2 *  BH			H	% 68 d3S	>2 702 c49n- 2%c3:o6V	%<%>%>/DIV	(?(A(A$tLV 
%<%>%>/DIV 
/F/H/H+dS	V
 
,C,E,E(4PV 
$;$=$=4HV 
'>'@'@#dKV 
*A*C*C&NV 	(V 
$;$=$=4HV 
9S9U9U5[_`V 	3V 	*V 
+B+D+D'$OV 
2I2K2K.QUVV  	&!V" 
0G0I0I,tT#V$ 	9%V& 
$;$=$=4H'V( 
&=&?&??TJ)V* 	,+V, 
$;$=$=4H-V. 	"/V0 
.E.G.G*TR1V2 	&3V4 
,C,E,E4P5V6 	%7V8 
$;$=$=4H9V: 
'>'@'@OdK;V< 	"=V> 
/F/H/H+dS?V@ 
'>'@'@OdKAVB 
(?(A(A$tLCVD 
)@)B)B%MEVF 
)@)B)B%MGVH 
(?(A(A_tLIVJ 
"9";";FKVL 	&MVN 	"OVP 	3QVR 	.SVT 
$;$=$=4HUVV 
*A*C*C&NWVX 
/F/H/H+dSYVZ 	 [V\ 
*A*C*CN]V^ 
1H1J1J-PTU_V` 
'>'@'@OdKaVb 
$;$=$=4HcVd 
%<%>%>/DIeVf 	 gVh 
/F/H/H+dSiVj 
!EXEZEZ"A`dekVl 	*mVn 
%<%>%>/DIoVp 
)@)B)BoMqVr 
)@)B)BoMsVt 
$;$=$=4HuVv 	"wVx 
(?(A(A$tLyVz 
&=&?&?"TJ{V| 
'>'@'@#dK}V~ 
'>'@'@#dKV@ 
,C,E,E(4PAVB 
(?(A(A$tLCVD 
-D-F-F)DQEVF 
#:#<#<$GGVH 
'>'@'@#dKIVJ 
(?(A(A$tLKVL 
,C,E,E(4PMVN 
1H1J1J-PTUOVP 
)@)B)B%MQVR 
-D-F-F)DQSVT 
-D-F-F)DQUVV 
*A*C*C&NWVX 
,C,E,E(4PYVZ 
)C)E)E%4P[V\ 
$;$=$=4H]V^ 
+B+D+D$O_V` 
'>'@'@OdKaVb 
+B+D+D'$OcVd 	:eVf 
$;$=$=4HgVh 	%iVj 	(kVl 	.mVn 	.oVp 
.E.G.G?TRqVr 
(?(A(A_tLsVt 
*A*C*C&NuVv 	+wVx 	&yVz 
(?(A(A$tL{V| 
)@)B)B%M}V~ 
,C,E,E4PV@ 
1H1J1JoPTUAVB 
)@)B)B%MCVD 
%<%>%>/DIEVF 
8O8Q8Q4W[\GVH 
.E.G.G*TRIVJ 
(?(A(A_tLKVL 
,C,E,E4PMVN 
(?(A(A_tLOVP 
0G0I0I,tTQVR 
0G0I0I,tTSVT 
.E.G.G*TRUVV 
"9";";FWVX 
0G0I0I,tTYVZ 
'>'@'@#dK[V\ 
-D-F-F)DQ]V^ 	"_V` 
(?(A(A$tLaVb 
)C)E)E%4PcVd 
(?(A(A$tLeVf 
)@)B)B%MgVh 
(B(D(D$$OiVj 
,C,E,E(4PkVl 
&=&?&?"TJmVn 
*A*C*C&NoVp 	%qVr 
-D-F-F/DQsVt 
0G0I0I,tTuVv 	'wVx 
/F/H/H+dSyV| *,, #)@)B)B%		
{VH *,, #)@)B)B%		
GVT *,, #)@)B)B%		
SV` *,, #)@)B)B%		
_Vl *,, #)@)B)B%		
kVv 
&@&B&B"MwVx 
1H1J1JoPTUyVz 
0G0I0I,tT{V| 
&=&?&?"TJ}V~ 
&=&?&?"TJV@ 	$AVB 
!8!:!:ECVD 
&=&?&?]TJEVF 
-D-F-FMDQGVH 
"9";";FIVJ 	"KVL 
%<%>%>/DIMVN 
$;$=$=4HOVP 
(?(A(A_tLQVR 
*A*C*CNSVT 
(?(A(A$tLUVV 
/F/H/H+dSWVX 
'>'@'@#dKYVZ 
(?(A(A$tL[V\ 
)@)B)B%M]V^ 
/F/H/H+dS_V` 
(?(A(A$tLaVb 
+B+D+D$OcVd 
)@)B)BoMeVf 
/F/H/H+dSgVh 
#:#<#<$GiVj 
&=&?&?"TJkVl 
%<%>%>/DImVn 
&=&?&??TJoVp 
*A*C*C&NqVr 
,C,E,E(4PsVt 	,uVv 
#:#<#<$GwVx 	(yVz 
(?(A(A}tL{V~ *,, #)@)B)B%		
}VH 
(?(A(A$tLIVJ 
/F/H/H+dSKVL 	.MVN 
'>'@'@OdKOVP 
,C,E,E(4PQVR 
&=&?&?"TJSVT 
-D-F-F)DQUVV 
+B+D+D'$OWVX 
,C,E,E(4PYVZ 
*A*C*C&N[V\ 
)@)B)B%M]V^ 
&=&?&?"TJ_V` 
*A*C*C&NaVb 
.E.G.G*TRcVd 
*A*C*C&NeVf 
+B+D+D'$OgVh 
/F/H/H+dSiVj 
)@)B)B%MkVl 
-D-F-F)DQmVn 	 oVp 
%<%>%>/DIqVr 
0G0I0I,tTsVt 
,C,E,E(4PuVv 
*A*C*C&NwVx 
)@)B)BoMyVz 	({V| 	5}V~ 	)V@ 
,C,E,E(4PAVB 
'>'@'@#dKCVD 
$;$=$=4HEVF 
*A*C*CNGVH 
3J3L3L/RVWIVJ 
6M6O6O2UYZKVL 
-D-F-F)DQMVN 
(B(D(D$$OOVP 
*A*C*C&NQVR 
5O5Q5Q1W[\SVT 
,F,H,H(dSUVV 	*WVX 
+B+D+D$OYVZ 
+B+D+D'$O[V\ 
*A*C*CN]V^ 
1H1J1JPTU_V` 
 7 9 9}tDaVb 
(?(A(A$tLcVd 	$eVf 
+B+D+D'$OgVh 
#:#<#<$GiVj 
$;$=$=4HkVl 
"9";";FmVn 	.oVp 	2qVr 
$;$=$=4HsVt 
+B+D+D$OuVv 	"wVz *,, #)@)B)B%		
yVF *,, #)@)B)B%		
EVP 	-QVR 	2SVT 	7UVV 	<WVX 
*A*C*C&NYVZ 
%<%>%>/DI[V\ 
$;$=$=4H]V^ 	 _V` 
1H1J1J-PTUaVb 
4K4M4M0SWXcVd 
&=&?&?"TJeVf 
(?(A(A$tLgVh 
*A*C*C&NiVj 
&=&?&?"TJkVX x&7 )3s8 &P <J00E\E^E^.Adh
+ < %%9;RS #7#=#=#?@#?41!$#?@!3# 3$s)d2B 3p 04 %)#"]#&S)9#9]R[[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^]@DU DUN
 
0c As   A@8