
    Z j                     ~   S SK r S SKJr  S SKrS SKJr  SSKJrJ	r	J
r
Jr  SSKJr  SSKJr  SSKJr  \" 5       (       a  S SKr\" \5      rS	S
SS.SSS.S.\	S\
S   0S\
S   0S.r\" \S   R/                  5       5      r " S S\5      r " S S5      r " S S\5      r " S S\5      r " S S\5      r " S S \5      r " S! S"\5      r " S# S$\5      r  " S% S&\5      r! " S' S(\5      r" " S) S*\5      r# " S+ S,\5      r$ " S- S.\5      r%\\\\\\\\ \!\"\#\#\$\%S/.r&S0 r'   S7S1\S2\(S-  S3\)S-  S4\(4S5 jjr*S8S6 jr+g)9    N)
NamedTuple)tqdm   )GGUF_CONFIG_DEFAULTS_MAPPINGGGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   H    \ rS rSr% \R
                  \S'   \\S'   \\S'   Sr	g)
GGUFTensor8   weightsnamemetadata N)
__name__
__module____qualname____firstlineno__npndarray__annotations__strdict__static_attributes__r       y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   r   8   s    ZZ
INr)   r   c                   Z    \ rS rSrSS jrS\S\4S jrS\\\4   S\S	\S\4S
 jrS r	Sr
g)TensorProcessor>   Nc                 $    U=(       d    0 U l         g Nr   )selfr   s     r*   __init__TensorProcessor.__init__?   s    lr)   hf_namereturnc                     U$ )z@
Preprocesses the tensor name to ease loading the GGUF tensors.
r   r1   r4   s     r*   preprocess_nameTensorProcessor.preprocess_nameB   s	     r)   gguf_to_hf_name_mapsuffix	qual_namec                     g)z
Called when get_gguf_hf_weights_map fails to map a HF parameter
(tensor) and corresponding GGUF one.

This is particularly useful to resolve one-to-many
HF-GGUF mappings sometimes appear in some MoE models.
Nr   )r1   r:   r;   r<   r4   s        r*   perform_fallback_tensor_mapping/TensorProcessor.perform_fallback_tensor_mappingH   s     	r)   c                     [        X0 5      $ r/   r   r1   r   r   kwargss       r*   processTensorProcessor.processT   s    ',,r)   r0   r/   )r   r    r!   r"   r2   r&   r8   r'   r>   rD   r(   r   r)   r*   r,   r,   >   sL    #s s 
#'S>
;>
KN
Y\
-r)   r,   c            	          ^  \ rS rSrSU 4S jjrS r SS\R                  S\S\S-  S\R                  4S	 jjr	S
r
U =r$ )LlamaTensorProcessorX   Nc                     > [         TU ]  US9  g Nr0   superr2   r1   r   	__class__s     r*   r2   LlamaTensorProcessor.__init__Y       'r)   c                 &   SU;   d  SU;   az  U R                   R                  S5      nU R                   R                  S5      nS XE4;   a  [        X0 5      $ SU;   a  U R                  XU5      nOSU;   a  U R                  XU5      n[        X0 5      $ )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r1   r   r   rC   	num_headsnum_kv_headss         r*   rD   LlamaTensorProcessor.process\   s    t!3(=>I;;??+@AL	00!'44T!77IVt#77LY',,r)   r   n_headrW   r5   c                     Ub  X#:w  a  UnUR                   S   U-  S-  nUR                  " X$S/UR                   SS  Q76 nUR                  SS5      R                  UR                   5      $ )Nr      r   )shapereshapeswapaxes)r1   r   rY   rW   dimws         r*   rU   -LlamaTensorProcessor._reverse_permute_weightsi   sl    
 #(>!FmmA&(A-OOF?W]]12->?zz!Q''66r)   r   r/   )r   r    r!   r"   r2   rD   r#   r$   intrU   r(   __classcell__rN   s   @r*   rG   rG   X   sJ    (- LP
7zz
7+.
7>ADj
7	
7 
7r)   rG   c                     ^  \ rS rSr\R
                  " S5      r\R
                  " S5      r\R
                  " S5      rSU 4S jjr	S\
S\
4S jrS	\\
\
4   S
\
S\
S\
4S jrS\
4S jrS\R                   S\\
\4   S\
S\
4S jrSrU =r$ )Qwen2MoeTensorProcessorv   zmlp.experts.\d+.z7model\.layers\.(?P<bid>\d+)\.mlp\.experts\.gate_up_proj3(?P<name>.*\.ffn_(?P<w>gate|down|up)_exps)\.weight$c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2    Qwen2MoeTensorProcessor.__init__{   rP   r)   r4   r5   c                 F    [         R                  " U R                  SU5      $ Nzmlp.experts.resubHF_EXPERT_RENAME_PATTERNr7   s     r*   r8   'Qwen2MoeTensorProcessor.preprocess_name~       vvd33^WMMr)   r:   r;   r<   c                     [         R                  " U R                  U5      =n(       a  X4-   nXaSUS    SU 3'   XaSUS    SU 3'   g g )Nblk.bid.ffn_gate_exps.ffn_up_exps)rn   	fullmatchHF_MOE_W13_PATTERNr1   r:   r;   r<   r4   mfull_hf_names          r*   r>   7Qwen2MoeTensorProcessor.perform_fallback_tensor_mapping   s_     T44g>>1>$.LKW$qxjvh GHIU$qxjVH EF ?r)   r   c                 B   [         R                  " U R                  U5      =n(       aQ  UR                  S5      nUR                  S5      nU(       a(  U R	                  XXTS      US   5        [        US 0 5      $ SU;   a  [        R                  " USS9n[        X0 5      $ )Ntensor_key_mappingparsed_parametersr   r`   ffn_gate_inp_shexpr   axis)rn   rx   GGUF_MOE_WEIGHTS_PATTERNrT   _set_moe_expert_tensorr   r#   expand_dimsr1   r   r   rC   r{   r   r   s          r*   rD   Qwen2MoeTensorProcessor.process   s    T::DAA1A!',@!A &

+> ?!++GHZ]c[dHeghilgmn!'4444' nnW15G',,r)   r   r   r`   c                    [         R                  " [        R                  " U5      5      nUS:X  a  XRS   U'   g [	        UR
                  5      nSnXg   nUS-  Xg'   X2S   ;  a$  [         R                  " XeR                  S9US   U'   US   U   n	US:X  a  U	R                  USU5      n	OU	R                  XxU5      n	U	R                  U5        g Ndowntensorsr   r[   )dtypegater   
torch
from_numpyr#   copylistr\   zerosr   narrowcopy_
r1   r   r   r4   r`   torch_weightsr\   	shard_dim
shard_sizeouts
             r*   r   .Qwen2MoeTensorProcessor._set_moe_expert_tensor   s    (()9:;4Ai(1 'EI)J)A~E	::8=EQdQd8e!),W5 1) <W ECF{jjAz:jj
CIIm$r)   r   r/   )r   r    r!   r"   rn   compilerp   ry   r   r2   r&   r8   r'   r>   rD   r#   r$   r   r(   rc   rd   s   @r*   rf   rf   v   s    !zz*=>$^_!zz*`a(Ns Ns NV#'S>V;>VKNVY\V-S -%bjj %TRUW[R[_ %gj %or % %r)   rf   c            
          ^  \ rS rSrSr\R                  " S5      r\R                  " S5      rSU 4S jjr	S\
4S jrS\R                  S	\S
\
S\
S\4
S jrS\R                  S	\S
\
S\4S jrSrU =r$ )GptOssTensorProcessor   a3  
Tensor processor for GPT-OSS models (MoE with 128 experts).
Handles:
- Splitting stacked expert tensors (down_proj, gate_proj, up_proj) into individual experts.
- Interleaving gate and up projections if stored in a combined tensor (gate_up_projs).
- Bias tensors (1D) are passed through without transpose.
z<blk\.(?P<bid>\d+)\.ffn_(?P<proj>down|gate|up)_projs\.weight$z-blk\.(?P<bid>\d+)\.ffn_gate_up_projs\.weight$c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   GptOssTensorProcessor.__init__   rP   r)   r   c                 L   U R                   R                  U5      =n(       aW  UR                  S5      nUR                  S5      nU(       a.  U(       a'  U R                  XUS   US   U5        [	        US 0 5      $ U R
                  R                  U5      =n(       aS  UR                  S5      nUR                  S5      nU(       a*  U(       a#  U R                  XUS   U5        [	        US 0 5      $ SU;   a%  [        UR                  5      S:X  a  [	        X0 5      $ [	        X0 5      $ )Nr   r   ru   proj.biasr   )	r   matchrT   _split_moe_expert_tensorr   GGUF_MOE_COMBINED_PATTERN_interleave_gate_up_tensorlenr\   r   s          r*   rD   GptOssTensorProcessor.process   s   --33D9919!',@!A &

+> ?!&7--g!E(TUV\T]_qr!'444 ..44T::1:!',@!A &

+> ?!&7//AeHVhi!'444 d?s7==1Q6gR00 ',,r)   r   r   ru   r   r   c                 D   U R                   R                  SS5      n[        [        XaR                  S   5      5       H_  nX   nSU SU SU S3n	UR                  5        H  u  pX;   d  M  U	R                  X5      n	M     [        R                  " USS	9US
   U	'   Ma     g)z:Split a stacked MoE tensor into individual expert tensors.num_local_experts   r   model.layers..block_sparse_moe.experts..z_proj.weightTr   r   N)	r   rT   rangeminr\   itemsreplacer   tensor)r1   r   r   ru   r   r   num_expertsiexpert_weightr4   key
mapped_keys               r*   r   .GptOssTensorProcessor._split_moe_expert_tensor   s     kkoo&93? s;a(89:A#JM%cU*DQCql[G#5#;#;#=>%ooc>G $> 5:LLUY4Zi(1 ;r)   c                 H   U R                   R                  SS5      nUR                  S   nUS-  nUSS2SU2SS24   nUSS2US2SS24   n	[        [	        XQR                  S   5      5       H  n
X   R
                  nX   R
                  nSU SU
 S	3nSU SU
 S
3nUR                  5        H6  u  nnX;   a  UR                  UU5      nX;   d  M$  UR                  UU5      nM8     [        R                  " USS9US   U'   [        R                  " USS9US   U'   M     g)z
Process a combined gate+up tensor.
Expected shape: [num_experts, intermediate_size, hidden_size].
Interleaving: gate occupies first half of intermediate dimension,
up occupies second half. Transpose to [hidden, half_inter] per expert.
r   r   r   r[   Nr   r   r   z.gate_proj.weightz.up_proj.weightTr   r   )
r   rT   r\   r   r   Tr   r   r   r   )r1   r   r   ru   r   r   
inter_size
half_inter	gate_partup_partr   gate_weight	up_weight	gate_nameup_namer   r   s                    r*   r   0GptOssTensorProcessor._interleave_gate_up_tensor   s6    kkoo&93?]]1%
1_
A{
{A-.	!Z[!+,s;a(89:A#,..K
I'u,FqcIZ[I%cU*DQCWG $6#;#;#=Z# ) 1 1#z BI>%ooc:>G	 $> 7<ll;UY6Zi(349LLQU4Vi(1 ;r)   r   r/   )r   r    r!   r"   __doc__rn   r   r   r   r2   r&   rD   r#   r$   r'   r   r   r(   rc   rd   s   @r*   r   r      s      "zz*ij "

+[ \(-S -0[[  [ 	[
 [ ![."W"W  "W 	"W
 !"W "Wr)   r   c                      ^  \ rS rSrS
U 4S jjrS rS\R                  S\S\4S jr	S\R                  S\S\4S jr
S	rU =r$ )BloomTensorProcessori  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   BloomTensorProcessor.__init__  rP   r)   c                     SU;   aI  U R                   S   nU R                   S   nSU;   a  U R                  XU5      nOU R                  XU5      n[        X0 5      $ )Nattn_qkvrY   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r1   r   r   rC   rV   n_embeds         r*   rD   BloomTensorProcessor.process  s_    H-Ikk-0G477GT44WQ',,r)   r   rY   r   c                    [         R                  " USSS9u  pEnUR                  X#U-  U5      nUR                  X#U-  U5      nUR                  X#U-  U5      n[         R                  " XEU/SS9nUR                  US-  X2-  -  U5      $ )N   r   r   r   )r#   array_splitr]   stack)r1   r   rY   r   qkvqkv_weightss           r*   r   -BloomTensorProcessor._reverse_reshape_weights!  s     ..!!4aIIf/9IIf/9IIf/9hhayq1""6A:1B#CWMMr)   c                     [         R                  " US5      u  pEnUR                  X#U-  5      nUR                  X#U-  5      nUR                  X#U-  5      n[         R                  " XEU/SS9R	                  5       nU$ )Nr   r   r   )r#   r   r]   r   flatten)r1   r   rY   r   q_biask_biasv_biasqkv_biass           r*   r   *BloomTensorProcessor._reverse_reshape_bias-  su     "$!;6(9:6(9:6(9:88VV41=EEGr)   r   r/   )r   r    r!   r"   r2   rD   r#   r$   rb   r   r   r(   rc   rd   s   @r*   r   r     sS    (-
N

 
NC 
NRU 
N
RZZ 
 
s 
 
r)   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )T5TensorProcessori:  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   T5TensorProcessor.__init__;  rP   r)   c                     S nUR                  S5       H%  nUR                  5       (       d  M  [        U5      n  O   [        XSU05      $ )Nr   ru   )splitisdigitrb   r   )r1   r   r   rC   ru   chunks         r*   rD   T5TensorProcessor.process>  sC    ZZ_E}}%j % '%66r)   r   r/   r   r    r!   r"   r2   rD   r(   rc   rd   s   @r*   r   r   :  s    (7 7r)   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )GPT2TensorProcessoriG  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   GPT2TensorProcessor.__init__H  rP   r)   c                     SU;   d  SU;   d  SU;   d  SU;   a  UR                   nUS:X  aF  SnUR                  S0 5      n[        R                  " [        R
                  " U5      5      US   U'   S n[        X0 5      $ )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightr   r   )r   rT   r   r   r#   r   r   )r1   r   r   rC   r   s        r*   rD   GPT2TensorProcessor.processK  s     % D($&#t+iiG ?" $D &

+> C161A1A"'''BR1Si(.D',,r)   r   r/   r   rd   s   @r*   r   r   G  s    (- -r)   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )MambaTensorProcessoria  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   MambaTensorProcessor.__init__b  rP   r)   c                     SU;   a  [         R                  " USS9nSU;   a  [         R                  " U* 5      n[        X0 5      $ )Nzssm_conv1d.weightr   r   ssm_a)r#   r   logr   rB   s       r*   rD   MambaTensorProcessor.processe  sB    $& nnW15Gd? ffgX&G',,r)   r   r/   r   rd   s   @r*   r   r   a  s    (	- 	-r)   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )NemotronTensorProcessoriq  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2    NemotronTensorProcessor.__init__r  rP   r)   c                 0    SU;   a  US-
  n[        X0 5      $ Nznorm.weightr   rA   rB   s       r*   rD   NemotronTensorProcessor.processv       D kG',,r)   r   r/   r   rd   s   @r*   r   r   q  s    (- -r)   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Gemma2TensorProcessori|  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   Gemma2TensorProcessor.__init__}  rP   r)   c                 0    SU;   a  US-
  n[        X0 5      $ r  rA   rB   s       r*   rD   Gemma2TensorProcessor.process  r  r)   r   r/   r   rd   s   @r*   r  r  |  s    (
- -r)   r  c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Lfm2TensorProcessori  c                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   Lfm2TensorProcessor.__init__  rP   r)   c                 P    SU;   a  [         R                  " USS9n[        X0 5      $ )Nzshortconv.conv.weightr   r   )r#   r   r   rB   s       r*   rD   Lfm2TensorProcessor.process  s'    "d*nnW15G',,r)   r   r/   r   rd   s   @r*   r  r    s    (- -r)   r  c                   8  ^  \ rS rSr\R
                  " S5      r\R
                  " S5      r\R
                  " S5      r\R
                  " S5      r	SU 4S jjr
S\S\4S	 jrS
\\\4   S\S\S\4S jrS\4S jrS\R"                  S\\\4   S\S\4S jrSrU =r$ )MiniMaxM2TensorProcessori  zmlp\.experts\.\d+\.z<(?:model\.)?layers\.(?P<bid>\d+)\.mlp\.experts\.gate_up_projrh   z>(?:model\.)?layers\.(?P<bid>\d+)\.mlp\.e_score_correction_biasc                     > [         TU ]  US9  g rJ   rK   rM   s     r*   r2   !MiniMaxM2TensorProcessor.__init__  rP   r)   r4   r5   c                 F    [         R                  " U R                  SU5      $ rl   rm   r7   s     r*   r8   (MiniMaxM2TensorProcessor.preprocess_name  rr   r)   r:   r;   r<   c                     [         R                  " U R                  U5      =n(       a  X4-   nXaSUS    SU 3'   XaSUS    SU 3'   g [         R                  " U R                  U5      =n(       a  X4-   USUS    S3'   g g )Nrt   ru   rv   rw   z.exp_probs_b.bias)rn   rx   ry   HF_BIAS_PATTERNrz   s          r*   r>   8MiniMaxM2TensorProcessor.perform_fallback_tensor_mapping  s     T44g>>1>$.LKW$qxjvh GHIU$qxjVH EF,,t33W==Q=FOFY$qxj0A BC >r)   r   c                    [         R                  " U R                  U5      =n(       aQ  UR                  S5      nUR                  S5      nU(       a  U R	                  XXTS      US   5        [        US 0 5      $ [        X0 5      $ )Nr   r   r   r`   )rn   rx   r   rT   r   r   r   s          r*   rD    MiniMaxM2TensorProcessor.process  s    T::DAA1A!',@!A &

+> ?!++GHZ]c[dHeghilgmngtR00',,r)   r   r   r`   c                    [         R                  " [        R                  " U5      5      nUS:X  a  XRS   U'   g [	        UR
                  5      nSnXg   nUS-  Xg'   X2S   ;  a$  [         R                  " XeR                  S9US   U'   US   U   n	US:X  a  U	R                  USU5      n	OU	R                  XxU5      n	U	R                  U5        g r   r   r   s
             r*   r   /MiniMaxM2TensorProcessor._set_moe_expert_tensor  s    (()9:;4Ai(1 'EI)J)A~E	::8=EQdQd8e!),W5 1) <W ECF{jjAz:jj
CIIm$r)   r   r/   )r   r    r!   r"   rn   r   rp   ry   r   r  r2   r&   r8   r'   r>   rD   r#   r$   r   r(   rc   rd   s   @r*   r  r    s    !zz*@A$cd!zz*`ajj!bcO(Ns Ns N
Z#'S>
Z;>
ZKN
ZY\
Z-S -%bjj %TRUW[R[_ %gj %or % %r)   r  )llamaqwen2moegpt_ossqwen3moebloomt5	t5encodergpt2mambanemotrongemma2gemma3lfm2
minimax-m2c                     XR                   ;  a  / $ U R                   U   nUR                   Vs/ s H&  n[        UR                  U   UR                  5      PM(     sn$ s  snf r/   )fieldsdatar	   partstypes)readerfieldvalue_data_indexs       r*   
read_fieldr7    sT    MM!	MM% EX]XbXbcXbekk+6DXbcccs   -A	processor
model_type
num_layersr<   c           
      4   [        5       (       a  [        5       (       a	  SSKJnJn  O [
        R                  S5        [        S5      eUc  U R                  R                  OUnUc  U R                  R                  OUnUS:X  a  SnO5US:X  a  S	nO,US
:X  a  SnO#US:X  a  SnOUS:X  a  SnOUS:X  a  SnOUS:X  a  SnSnUR                  5        H  u  pX:X  d  M  Un  O   Uc  [        SU S35      eU" Xs5      n
0 nU R                  5       nU H  nUR                  U5      nUSpUR                  S5      (       d  UR                  S5      (       a  UR!                  SS5      u  pSU-   nU
R#                  U5      nUc  UR%                  XXM5        M  XM-   UUU-   '   M     U R'                  5       =n(       a[  U HU  u  nn[)        UXX4 U S3S9nUR                  5        VVs0 s H  u  nnUU;  d  M  UU_M     nnnUR+                  U5        MW     U$ s  snnf )a=  
GGUF uses this naming convention for their tensors from HF checkpoint:
`blk.N.BB.weight` and `blk.N.BB.bias`
where N signifies the block number of a layer, and BB signifies the
attention/mlp layer components.
See "Standardized tensor names" in
https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.Ncoherez	command-r	qwen2_moer!  	qwen3_moer#  gemma3_textr+  umt5r%  
minimax_m2r-  r"  gpt-osszUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#development z.weightr   r   r   )r<   )r   r
   ggufr<  r=  loggererrorImportErrorr   r9  num_hidden_layersr   NotImplementedError
state_dictr8   endswithrsplitget_namer>   named_childrenget_gguf_hf_weights_mapupdate)hf_modelr8  r9  r:  r<   r<  r=  archr   r5  name_mapr:   rN  r4   r   r;   	gguf_namerR  childsub_mapr   r   s                         r*   rS  rS    s\    133>>A	
 ghh/9/A++zJ6@6H22jJX 
	{	"
	{	"
	}	$
	v	
	|	#!
	y	 
D&,,.
D / |!'
| 4U U
 	
 #44H $$&J++G4fI&&'*:*:7*C*C">>#q1LD6\F%%d+	556IS\f2;2EI./ " "0022~2)KD%-yjkRVQWWXDYG )0X11DW;Wtq!tGX&&w/ *  Ys   "H3Hc                   ^/^0 [        5       (       a  [        5       (       a	  SSKJnJn  O [
        R                  S5        [        S5      eU" U 5      nUR                  n[        UR                  5       5      n[         V	s0 s H  o0 _M     n
n	[        US5      S   n[        US5      nSnSU;   a	  S	U;   a  S	nOXS
U;   d  SU;   aJ  SU
S   S'   U(       a)  SUS   R                  5       ;   a  SnSU;   a	  S/U
S   S'   OSU;   a	  S/U
S   S'   S
nOUnSU;   a  SnO SU;   d  SU;   a  SnOSU;   a  SnOSU;   a  SnSU;   aX  1 Skm/Sm0[        U/4S jUR                   5       5      n[        U04S jUR                   5       5      nXS   S '   U(       + U
S   S!'   U[         ;  a  U[         ;  a  [#        S"U S#35      eS$S%/n[%        S& UR                   5       5      =(       d    UU;   U
S   S''   [&        R(                  " U[&        R(                  " U5      =(       d    0 5      nUR+                  5        H  u  nnU
S   R-                  UU5        M     UR                  R+                  5        GHH  u  nnUR/                  X5      nUR1                  S(5      nUS   nS(R3                  US)S 5      nUR4                   Vs/ s H&  n[7        UR8                  U   UR:                  5      PM(     nn[=        U5      S):X  a  US   n[?        U[@        5      (       a  UU;   a  UR/                  X5      n[        R+                  5        HM  u  nnUU;   d  M  UUU   ;   d  M  UU   U   nUS*:X  a  M)  Ub  UU
U   U'   UU;   d  M<  URC                  U5        MO     UU;   d  GM-  [
        RE                  S+U S,U 35        GMK     U
S   S-   S.:X  a  S/U
S   S-'   U
S   R)                  S-5      S:X  aG  S0S1S2S3.nU
S   R)                  S45      n[?        U[F        5      (       a  UR)                  US15      U
S   S4'   U
S   S-   S5:X  aP  U
S   S6   n[I        U5      U
S   S6'   S7U
S   S8'   [K        U5       V V!s/ s H  u  n n!U!S:  d  M  U PM     sn!n U
S   S9'   US:X  a  SNS: jn"U"" US;5      n#U#b  S<U#0n$UR                   H  nURM                  S=5      (       d  M  U[=        S=5      S n%U%S>:X  a  M1  UR                  U   R8                  S   n[?        U[N        5      (       a  URQ                  S?5      nU%S@;   a  [S        U5      nOU%SA;   a  SBn%[G        U5      nO UU$U%'   M     U$U
S   SC'   SDU
S   ;  a5  U
SE   n&SFU&;   a  [=        U&SF   5      U
S   SD'   O[
        RU                  SG5        U(       Ga  0 U
SH'   U
R)                  S0 5      n'[V        R)                  U[X        5      n(U(" U'SI9n)[[        UU)5      n*[]        UR                  SJSK9 H  n+U+R^                  n,U" U+R4                  U+R`                  5      n-U)Rc                  U-U,U*U
SL9n.U.Rd                  n-U.R^                  n,U,U*;  a  M^  U*U,   n,[f        Rh                  " [j        Rl                  " U-5      5      n+Ub  U+Ro                  U5      n+U+U
SH   U,'   M     [=        U5      S:  a  [
        RE                  SMU 35        U
$ s  sn	f s  snf s  sn!n f )Oa
  
Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
tokenizer and config attributes.

Args:
    gguf_checkpoint_path (`str`):
        The path the to GGUF file to load
    return_tensors (`bool`, defaults to `False`):
        Whether to read the tensors from the file and return them. Not doing so is faster
        and only loads the metadata in memory.
    model_to_load (`nn.Module`, *optional*):
        The model to load the weights into. This is used to map GGUF tensor names to
        Transformers parameter names.
    torch_dtype (`torch.dtype`, *optional*):
        The desired `torch.dtype` for the loaded tensors. If provided, tensors will be
        converted to this dtype immediately after dequantization to save memory.
r   )
GGUFReader
dequantizer>  r?  zgeneral.architecturezgeneral.nameNr   mistralr%  r&  Tr   is_gated_actrD  UMT5EncoderModelarchitecturesT5EncoderModelr!  rA  r"  rF  r#  rB  r-  rE  stablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc              3   T   >#    U  H  nT  H  o"UR                   ;   v   M     M     g 7fr/   r   ).0r   	bias_nameattn_bias_names      r*   	<genexpr>'load_gguf_checkpoint.<locals>.<genexpr>  s$     mnF^lQZFKK/^l/ns   %(c              3   B   >#    U  H  nTUR                   ;   v   M     g 7fr/   ri  )rj  r   ffn_norm_names     r*   rm  rn    s     #^~VMV[[$@~s   use_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.falconr$  c              3   >   #    U  H  oR                   S :g  v   M     g7f)r   Nri  )rj  r   s     r*   rm  rn    s     HvKK?*s   tie_word_embeddingsr   r   z1Some keys were not parsed and added into account z | r9  r+  rC  nonesoftmaxsigmoid)r   r   r[   scoring_funcr,  rS   Fblock_auto_adjust_ff_dimfull_attn_idxsc                     SU 3nX0R                   ;   aD  U R                   U   R                  S   n[        U[        5      (       a  UR	                  S5      nU$ U$ )Nzgpt-oss.r   utf-8)r/  r1  
isinstancebytesdecode)r3  r;   defaultr   vals        r*   read_gpt_key*load_gguf_checkpoint.<locals>.read_gpt_key  sV    VH%Cmm#mmC(..q1c5))**W-C
Nr)   zrope.scaling.type	rope_typezgpt-oss.rope.scaling.typer~  )factorattention_factor	beta_fast	beta_slow)original_context_length original_max_position_embeddingsr  rope_scaling
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.r   r0   z,Converting and de-quantizing GGUF tensors...)desc)r   r   r   r   z0Some keys of the GGUF file were not considered: r/   )8r   r
   rH  r\  r]  rI  rJ  rK  r/  r   keysGGUF_TO_TRANSFORMERS_MAPPINGr7  loweranyr   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rT   r   
setdefaultr   r   joinr0  r	   r1  r2  r   r  r&   removeinforb   max	enumerate
startswithr  r  floatwarningTENSOR_PROCESSORSr,   rS  r   r   tensor_typerD   r   r   r   r#   r   to)1gguf_checkpoint_pathreturn_tensorsmodel_to_loadtorch_dtyper\  r]  r3  r/  reader_keysr   r   architecture
model_nameupdated_architecturer   rr  
exceptionsconfig_defaultsr   r5  gguf_keyr4  r   prefix
config_keyr6  	parameterparameter_renamesrenamed_config_key_gating_func_map_scoringgguf_num_key_value_headsr   rW   r  r  r  r;   tokenizer_parametersr   ProcessorClassr8  r   r   r   r   resultrl  rp  s1                                                  @@r*   load_gguf_checkpointr  :  s}   $ 133//A	
 ghh,-F]]Fv{{}%K(DE(D1B(DEf&<=a@LFN3J ,9
#:( 
	!<6:(#N3&JqM$7$7$99#) l*@R?S!(+O<l*@P?Q!(+O<#' +\!*	l	"i<&?(	|	#*		%+
 \!F"mfnnmm ##^v~~#^ ^6>(#N3CX?X(#$;<77<PXt<t8F\]^^ G$JHHHfL\fLf h 56
 366:>>|LRPRO &++-
U(#..sE: . "==..0%##LGs#qXXeABi(
]b]g]gh]gk"5;;{#;U[[I]ghu:?!HEeS!!le&;MM,EE,H,N,N,P(I(**z=Nv=V/V%6v%>z%J"%+%1GL%i01CD{*&&x0 -Q {"KKKH:UXY^X_`a7 1< "<0H<4A(#L1 "&&|4D%)	B$X.22>Bh$$:J:N:NxYb:ch'7"<0F:#4X#>?T#U =@AY=Z(#$9:BG(#$>?
 &//G%H9
%H/!\L[\L\A%H9
(#$45 y(	 !)<=	 '3L }}~~&=>>S!89;<V#c*003eU++!LL1EUU!%LE^^?FJE',V$% %( ;Gh'7 ,X660=++8;<PQY<Z8[h'5NNe
 '))$"&&x4*..|_M"&1	4]IN6>>0^_F;;D f.@.@AG&&#5"3	 ' F nnG;;D--%d+D%%bggg&67F&;/17i(.- `0 ;!F{mTU[ FL iV9
s   ?[:-[%[6[)NNrG  )FNN),rn   typingr   numpyr#   	tqdm.autor   integrationsr   r   r   r	   utilsr
   utils.import_utilsr   utils.loggingr   r   r   rI  r  r   r  r  r   r,   rG   rf   r   r   r   r   r   r   r  r  r  r  r7  r&   rb   rS  r  r   r)   r*   <module>r     s    
     & 1 % 	H	 !*"

 "-F\] "5kBC$&<=O&PQ    $$@$J$O$O$QR  - -47? 7<5%o 5%pbWO bWJ$? $N
7 
7-/ -4-? - -o -	-O 	--/ -2% 2%l "'$'!
"!'##* $d "!UU d
U d
	U
 Uplr)   