
    Z jx                        S r SSKJr  SSKrSSKJrJrJrJrJ	r	  SSK
JrJr  SSKJr  SSKJrJrJrJrJr  SS	KJr  SS
KJr  \R2                  " \5      r0 SSSS._SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSSSS ._S!SSSSSSSSSSS"SS#S$._S%SSSSSSSSSSS&S'._S(SSSSSSSSSSS.
_S)SSSSSSSSSSSSSS*._S+SSSSSSSSSSS.
_S,S-S.S/S0S1._S2SSSSSSSSSSS.
_S3S4SS5SS6S7._S8S9S:S;S<S=S>SS6S?S@SSA._SBSSSSSSSSCSSD.	_SES4SFSGSHS5S6SI._SJSSSSSSSKSL._SSSS6SSMSNSOSSP.	SSSSSSSSSQSS.
SSSSSSSSSSS#SSR.SSSSSSSSSSS#SSR.S9S:S;S<S=S>SS6S?S@SSA.SSSSSSSSSSS.
0 SSS_STS_SHS_SUS_SVSW_SXS_SYS_SZS_S[S_S\S_S]S_S^S"_S_S_S`S_SS_SaSb_Sc.ErSdSeSfSgShS-S.S/S0SiSj.
SkSS-S.S/S0Sl.Sm.rSnSo0SpSo0Sq.rSr r " Ss St5      r  " Su Sv\5      r! " Sw Sx\5      r" " Sy Sz\5      r# " S{ S|\5      r$ " S} S~\5      r% " S S\5      r&0 S\!_S\"_S\"_S(\"_S)\"_S2\#_S3\$_S+\$_SB\$_SE\$_SJ\$_S8\%_S\$_S\$_S\&_S\&_S\%_\!\!\"S.Er'S\(S\)\\*4   4S jr+g)z
Integration with GGML / The file is copied and adapted from https://github.com/99991/pygguf
with extra methods beings exposed
    )arrayN)	Tokenizerdecodersnormalizerspre_tokenizers
processors)BPEUnigram   )
AddedToken)GemmaConverterGPT2ConverterLlamaConverterQwen2ConverterT5Converter)logging)tqdmgeneral
model_type_model_name_or_path)architecturenamellamamax_position_embeddingsnum_hidden_layersintermediate_sizehidden_sizehead_dim
rope_thetanum_attention_headsnum_key_value_headsrms_norm_eps
vocab_size)
context_lengthblock_countfeed_forward_lengthembedding_lengthrope.dimension_countrope.freq_baseattention.head_countattention.head_count_kv attention.layer_norm_rms_epsilonr#   mistralqwen2	qwen2_moenum_expertsnum_experts_per_tok)r$   r%   r&   r'   r(   r)   r*   r+   r,   r#   expert_countexpert_used_countgpt_ossnum_local_expertssliding_window)r$   r%   r&   r'   r(   r)   r*   r+   r,   r#   r2   r3   r6   lfm2conv_L_cache)r$   r%   r&   r'   r(   r)   r*   r+   r,   r#   zshortconv.l_cacheqwen3	qwen3_moe)r$   r%   r&   r'   r(   r)   attention.key_lengthr*   r+   r,   r#   r2   r3   falcon	tokenizerbos_token_ideos_token_idunk_token_idpad_token_id)ggml.bos_token_idggml.eos_token_idggml.unknown_token_idggml.padding_token_idphi3bloomn_layern_headlayer_norm_epsilon)r%   r'   r*   r#   attention.layer_norm_epsilont5n_positions
num_layersd_ffd_modeld_kv	num_headsrelative_attention_num_bucketsdecoder_start_token_id)r$   r%   r&   r'   r;   r*   r+   rK   z attention.relative_buckets_countrT   r#   stablelmlayer_norm_eps)	r$   r%   r&   r'   r(   r*   r+   rK   r#   gpt2n_ctxn_embdr&   )r%   r$   r'   r&   r*   rK   
starcoder2norm_epsilon)r%   r$   r'   r&   r*   r+   rK   conv_kernel
state_sizetime_step_rank)	r#   r$   r'   r,   r%   zssm.conv_kernelzssm.state_sizezssm.time_step_rankzssm.inner_sizenorm_eps)r$   r%   r&   r'   r(   r)   r;   r*   r+   r,   zattention.sliding_windowr#   r$   r%   r'   r(   
rotary_dimr)   r*   r+   r;   zattention.value_lengthr,   r2   r3   expert_feed_forward_lengthexpert_gating_funcscoring_func)mambanemotrongemma2gemma3umt5deci
minimax_m2tokenizer_typetokensscores
token_typemergesadd_prefix_space)

ggml.modelzggml.tokenszggml.scoreszggml.token_typezggml.mergesrB   rC   rD   rE   zggml.add_space_prefixchat_template)rr   rq   rB   rC   rD   rE   )r=   tokenizer_confignorm_topk_probTuse_routing_bias)r:   rj   c                    [        U[        5      (       d  U/n[        U5      S:X  a  US   nS nOUS   S:w  a  [        S5      eUu  pUS;   a  [	        U S   5      n U $ US;   a  [        U S   5      n U $ US:X  a  [        U S   5      n U $ US:X  a3  [        S	[        U 5      5      R                  5       R                  5       n U $ US:X  a  [        X5      n U $ )
N   r   	   zPReceived multiple types, therefore expected the first type to indicate an array.)r   rw   r            
      )            B)
isinstancelistlen
ValueErrorintfloatboolr   tobytesdecode_gguf_parse_value)_value	data_typearray_data_types      o/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/ggml.pyr   r   m  s    i&&K	
9~aL	Q<1opp%."	..VAY M 
g	vay! M 
afQi
 M	 
asDL)113::< M 
a"6;M    c                       \ rS rSrS rSrg)GGUFTokenizerSkeletoni  c                   ^ UR                  5        H  u  p#[        XU5        M     [        U S5      (       Gd4  [        U S5      (       a  [        U S5      (       d  [        S5      eU R                  nU R
                  n[        U5       VVs0 s H
  u  pgXuU   _M     snnm[        R                  S5        / n[        TR                  5       5       Hl  u  p/ n[        S[        U	5      5       H,  nU	S U XS  pX;   d  M  X;   d  M  UR                  XU
45        M.     [        UU4S jSS	9nUR                  U5        Mn     [        US
 SS	9nU Vs/ s H  oS   US   4PM     nnXl        OU R                   V	s/ s H  n	[!        U	R#                  S5      5      PM     sn	U l        [        U S5      (       d4  [        [        U R                  5      5       Vs/ s H  nS PM     snU l        [        U S5      (       d  / U l        [        U S5      (       d  S U l        [        U S5      (       a   U R&                  c  U R(                  U l        g g g s  snnf s  snf s  sn	f s  snf )Nro   rl   rm   z\tokens and scores need to be passed for a LLaMa tokenizer without merges to be instantiated.z:Merges were not in checkpoint, building merges on the fly.rw   c                 $   > TU S      TU S      4$ )Nr   rw    )xvocabs    r   <lambda>0GGUFTokenizerSkeleton.__init__.<locals>.<lambda>  s    U1Q4[%!+4Nr   T)keyreversec                     U S   $ )Nr   r   )vals    r   r   r     s    CFr   r    added_tokensr@   unknown_token_id)itemssetattrhasattrr   rl   rm   	enumerateloggerwarningr   ranger   appendsortedextendro   tuplesplitr   r@   r   )selfdict_kvrl   rm   itro   mergepiece_scorelocalindexpiece_lpiece_rr   _r   s                    @r   __init__GGUFTokenizerSkeleton.__init__  s   KKMDADQ " tX&&4**'$2I2I r  [[F[[F.7.?@.?daQq	\.?@ENNWXF&*5;;=&9""1c%j1E',Ve}eFmW(W->g%DE 2 u*NX\]e$ ': F(:DIF289&31vs1v&&F9 K@DLu5S!12LDK4**-23t{{3C-DE-Dt-DEt^,, "Dt^,, $D 4+,,1B1B1J $ 5 5D 2K,7 A : MEs   II;$II)r   ro   rm   r@   N)__name__
__module____qualname____firstlineno__r   __static_attributes__r   r   r   r   r     s    '6r   r   c                   8    \ rS rSrS rS rS rS rS rS r	Sr
g	)
GGUFLlamaConverteri  c                     [        U5      U l        U R                  U l        0 U l        [	        U R                  SS5      S:g  U l        g )Nrk   r   )r   protooriginal_tokenizeradditional_kwargsgetattris_llama_3_tokenizerr   tokenizer_dicts     r   r   GGUFLlamaConverter.__init__  s>    *>:
"&**!#$+DJJ8H'$RV]$]!r   c                 T    [        [        UR                  UR                  5      5      $ Nr   ziprl   rm   r   r   s     r   r   GGUFLlamaConverter.vocab      Cell344r   c                     UR                   $ r   ro   r   s     r   ro   GGUFLlamaConverter.merges      ||r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 s H
  u  nu  pVXT_M     nnnnUR                  b  UR
                  UR                     OS n[        USS 5      b  UR
                  UR                     OS n	[        USS 5      b  UR
                  UR                     OS n
[        [        UUUSSS95      n/ n[        U R                  S5      (       dX  Ub  UR                  [        USSS95        U	b  UR                  [        U	SSS95        U
b  UR                  [        U
SSS95        O~[        R                  " [        R                  " U R                  R                   5      S:H  5      S	   nU H4  nUR                  [        U R                  R
                  U   SSS95        M6     [#        U5      S	:w  a  UR%                  U5        [#        U R                  R&                  5      S	:w  a?  UR)                  U R                  R&                   Vs/ s H  n[        USSS9PM     sn5        XR*                  S
'   XR*                  S'   XR*                  S'   U R,                  (       a>  S U R*                  S'   SU R*                  S'   SU R*                  S'   SU R.                  l        U$ s  snnnf s  snf )Nr>   r?   T)	unk_tokenfuse_unkbyte_fallbackrn   F
normalizedspecialry   r   r   	eos_token	bos_tokenrp   clean_up_tokenization_spaceslegacy)r   r   ro   r   r@   rl   r   r>   r   r	   r   r   r   npwherer   rn   r   add_special_tokensr   
add_tokensr   r   r   r   )r   r   vocab_scoresro   r   word_score	bpe_vocabr   r   r   r=   special_tokensspecial_tokens_idxidxadded_tokens                   r   r=   GGUFLlamaConverter.tokenizer  s   zz$**-TZZ(6?6MN6M!2NTTW6M	N8=8J8J8VELL!3!34\`	8?~W[8\8hELL!3!34nr	8?~W[8\8hELL!3!34nr	#"
	 tzz<00$%%juVZ&[\$%%juVZ&[\$%%juVZ&[\ "$"((4::3H3H*IQ*N!OPQ!R)%%j1B1B31GTYcg&hi * ~!#((8tzz&&'1,  ]a]g]g]t]tu]tkKE5I]tu /8{+.7{+.7{+$$9=D""#56EID""#AB/4D""8,-2D##*k ON vs   KK"c                 D   [         R                  " 5       [         R                  " 5       [         R                  " SS5      /nU R                  (       a  U[         R
                  " SSSS9/-  nU(       a  U[         R                  " SSS9/-  n[         R                  " U5      $ )N   ▁r   FTrp   trim_offsets	use_regexrw   contentleft)r   ByteFallbackFuseReplacer   	ByteLevelStripSequencer   replacementrp   sequences       r   decoderGGUFLlamaConverter.decoder  s    !!#MMOUC(
 $$++UQVbfghhH!<==H  **r   c                    U R                  U R                  5      nU R                  U R                  5      nUb  X!l        SnSn[        U R                  S5      (       a  U R                  R
                  nU R                  X45      nUb  XQl        U R                  X45      Ul        U R                  5       nU(       a  Xal        U R                  (       a6  [        R                  " SSSS9Ul        [        R                  " / 5      Ul        U$ )Nr   Trp   Fr   )r=   r   
normalizerr   r   rp   pre_tokenizerr  post_processorr   r   r   r   r   )r   r=   r  r   rp   r  r  s          r   	convertedGGUFLlamaConverter.converted  s    NN4::.	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$ $$&4&>&>!&Ud'I#
 $/#7#7#;I r   )r   r   r   r   N)r   r   r   r   r   r   ro   r=   r  r  r   r   r   r   r   r     s"    ^58t+!r   r   c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFQwen2Converteri)  c                 2    [        U5      U l        0 U l        g r   r   r   r   r   s     r   r   GGUFQwen2Converter.__init__*      "7"G!#r   returnc           
      "  > [        U R                  R                  5       VVs0 s H  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nUR                  [        SSSS9[        SSSS9[        SSSS9/5        U$ s  snnf )N<|endoftext|>FTr   z<|im_start|>z
<|im_end|>)r   r   rl   ro   superr  r   r   r   r   r   r   ro   r=   	__class__s         r   r  GGUFQwen2Converter.converted.  s    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	$$?udK>eTJ<E4H	
  Ss   Br   r   	r   r   r   r   r   r   r  r   __classcell__r  s   @r   r
  r
  )  s    $9  r   r
  c                   @    \ rS rSrS rS rS rS rS rS\	4S jr
S	rg
)GGUFPhi3Converteri=  c                 T    [        U5      U l        U R                  U l        0 U l        g r   r   r   r   r   r   s     r   r   GGUFPhi3Converter.__init__>  s"    *>:
"&**!#r   c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFPhi3Converter.vocabC  r   r   c                     UR                   $ r   r   r   s     r   ro   GGUFPhi3Converter.mergesF  r   r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 s H
  u  nu  pVXT_M     nnnn[	        [        Xs5      5      nUR                  [        SSSSSS9[        SSSS9[        SSSSS9[        S	SSSS9[        S
SSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9/5        UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   U$ s  snnnf )N</s>TF)rstriplstripr   r   r  r   z<|assistant|>)r%  r   r   z<|placeholder1|>z<|placeholder2|>z<|placeholder3|>z<|placeholder4|>z
<|system|>z<|end|>z<|placeholder5|>z<|placeholder6|>z<|user|>r   r   r   	pad_token)r   r   ro   r   r   r	   r   r   r@   rl   r   r?   r>   rA   )	r   r   r   ro   r   r   r   r   r=   s	            r   r=   GGUFPhi3Converter.tokenizerI  s   zz$**-TZZ(6?6MN6M!2NTTW6M	Nc)45	$$6$uX\]?udK?4ESWX-duVZ[-duVZ[-duVZ[-duVZ[<PTU9TeTR-duVZ[-duVZ[:dudS	
$ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ E Os   Gc                     [         R                  " 5       [         R                  " 5       [         R                  " US5      /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   rw   r   )r   r   r   r   r   r   r   s       r   r  GGUFPhi3Converter.decoderp  s\    !!#MMO[#.
 !<==H  **r   r  c                     U R                  U R                  5      nSnSn[        U R                  S5      (       a  U R                  R                  nU R                  X#5      Ul        U$ )Nr   Trp   )r=   r   r   r   rp   r  )r   r=   r   rp   s       r   r  GGUFPhi3Converter.converted{  s\    NN4::.	4**,>??#66GG LLG	r   r   r   r   N)r   r   r   r   r   r   ro   r=   r  r   r  r   r   r   r   r  r  =  s(    $
5%N	+
9 
r   r  c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFGPTConverteri  c                 2    [        U5      U l        0 U l        g r   r  r   s     r   r   GGUFGPTConverter.__init__  r  r   r  c                    > [        U R                  R                  5       VVs0 s H  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nU$ s  snnf r   )r   r   rl   ro   r  r  r  s         r   r  GGUFGPTConverter.converted  sZ    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	 Ss   Ar  r  r  s   @r   r/  r/    s    $9  r   r/  c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFT5Converteri  c                     S/US'   [        U5      U l        [        U R                  R                  5       VVs0 s H  u  p#X2_M	     snnU l        U R                  U l        0 U l        g s  snnf Nz
dummy textro   )r   r   r   rl   token2idr   r   )r   r   r   r   s       r   r   GGUFT5Converter.__init__  s_    $0>x *>:
*3DJJ4E4E*FG*F$!*FG"&**!# Hs   A)c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFT5Converter.vocab  r   r   c                    [        U R                  SS5      (       ae  / n[        U R                  SS5      (       a  U[        R                  " SS9/-  nU[        R                  " SSS9/-  n[        R
                  " U5      $ g )Nr   Trp   r   )prependr   )patternr   )r   r   r   Prependr   r   )r   r   r   s      r   r  GGUFT5Converter.normalizer  sx    4**Hd;;Ht..0BDII[00?@@,,S%HIIH''11r   c                 V    [         R                  " SS// SQSU R                  S   4/S9$ )N$Ar$  )rB  r$  z$Br$  )singlepairr   )r   TemplateProcessingr8  )r   s    r   r  GGUFT5Converter.post_processor  s5    ,,&>-v./
 	
r   r  c                    U R                  U R                  5      n[        [        UU R                  R                  SS95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      nUb  Xbl	        U R                  XE5      Ul
        U R                  5       nU(       a  Xrl        U$ )NFunk_idr   r   Trp   )r   r   r   r
   r@   r  r   r   rp   r  r  r  )r   r   r=   r  r   rp   r  r  s           r   r  GGUFT5Converter.converted  s    zz$**-zz..#
	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$r   )r   r   r   r8  N)r   r   r   r   r   r   r  r  r   r  r   r   r   r   r5  r5    s"    $5
9 r   r5  c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFGemmaConverteri  c                 `    S/US'   [        U5      U l        U R                  U l        0 U l        g r7  r  r   s     r   r   GGUFGemmaConverter.__init__  s.    $0>x *>:
"&**!#r   c                 V   [        [        UR                  UR                  5      5      n/ nU Hw  u  pEUS:X  a  UR	                  SU45        M   SU;   a?  [        UR                  5       5      S:X  a"  S[        U5      -  nUR	                  Xe45        Me  UR	                  XE45        My     U$ )Nz<0x09>	r   r   r   )r   r   rl   rm   r   r   strip)r   r   original_vocabupdated_vocabtokenscoreunderscoress          r   r   GGUFGemmaConverter.vocab  s    c%,,=>*LE $$dE]3#ekkm"4"9#c%j0$$k%9:$$e^4 + r   c                 0    [         R                  " SS5      $ )Nr   r   )r   r   r   s     r   r  GGUFGemmaConverter.normalizer  s    ""3..r   c                     [         R                  " SS5      [         R                  " 5       [         R                  " 5       /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   r   rw   r   )r   r   r   r   r   r   r   s       r   r  GGUFGemmaConverter.decoder  s\    UC(!!#MMO
 !<==H  **r   r  c                    U R                  U R                  5      n[        [        UU R                  R                  U R
                  S95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      Ul
        U R                  XE5      nUb  Xbl        U$ )NrH  r   Trp   )r   r   r   r
   r@   handle_byte_fallbackr  r   r   rp   r  r  )r   r   r=   r  r   rp   r  s          r   r  GGUFGemmaConverter.converted  s    zz$**-zz.."77
	 __TZZ0
!#- 4**,>??#66GG LLG	**;I$&3#r   r-  N)r   r   r   r   r   r   r  r  r   r  r   r   r   r   rL  rL    s"    $/	+9 r   rL  rd   re   rf   gemma3_textrh   )ri   decilmrj   r   r  c                 ^    U n[         U   " U5      nUR                  5       nXCR                  4$ )a  
Utilities to convert a slow tokenizer instance in a fast tokenizer instance.

Args:
    architecture (`str`): The model architecture derived from gguf file.
    transformer_tokenizer ([`~tokenization_utils_base.PreTrainedTokenizer`]):
        Instance of a slow tokenizer to convert in the backend tokenizer for
        [`~tokenization_utils_base.PreTrainedTokenizerFast`].

Return:
    A instance of [`~tokenizers.Tokenizer`] to be used as the backend tokenizer of a
    [`~tokenization_utils_base.PreTrainedTokenizerFast`]
)GGUF_TO_FAST_CONVERTERSr  r   )r   r   tokenizer_class_name	converterfast_tokenizers        r   convert_gguf_tokenizerrf  ,  s7     ('(<=nMI((*N6666r   ),__doc__r   numpyr   
tokenizersr   r   r   r   r   tokenizers.modelsr	   r
    r   convert_slow_tokenizerr   r   r   r   r   utilsr   utils.loggingr   
get_loggerr   r   GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPINGGGUF_CONFIG_DEFAULTS_MAPPINGr   r   r   r
  r  r/  r5  rL  rb  strr   dictrf  r   r   r   <module>ru     s  
   S S *  o o    
		H	%`$%`
 3*2) *& 5#8,:"`$ 3*2) *& 5#8,:"%`> 3*2) $& 5#8,:"?`V 3*2) $& 5#8,:"%2W`r 3*2) $& 5#8,:"+2*s`P 3*2) $& 5#8,:"+Q`j 3*2) $& 5#8,:"k`B 3*2) $& * 5#8,:"%2C`` 3*2) $& 5#8,:"a`x ++!/!/	y`D 3*2) $& 5#8,:"E`\  ) ("(<]`j 	'#%% & +#8(<,L":"k`D 3*2) $ 5#8(8"
E`Z  !$4 ((<[`j *3)2 5#8(6k`~ #3),@*(&.-
 4*2) $& 5#8,6" 4*2) $& !+ 5#8,:$4"" 4*2) $& !+ 5#8,:$4"" (#%% & +#8(<,L":" 4*2) $& 5#8,:"3* 	2 	M	
 	 	, 	 5 	"#8 	
 	!$ 	+N 	+ 	2 	%d 	l  	n!]` H	 ''++!/!/!3 )"++!/!/ : 	$	 	D	   0(6 (6Vv vr (H HV	} 	<k <~> >B # 	
 #       " 	/       %!" O#$  $) 07 7yRVAW 7r   