1414 sys .path .insert (1 , str (Path (__file__ ).parent / 'gguf-py' ))
1515import gguf
1616
17+
1718class GGMLFormat (IntEnum ):
1819 GGML = 0
1920 GGMF = 1
2021 GGJT = 2
2122
23+
2224class GGMLFType (IntEnum ):
2325 ALL_F32 = 0
2426 MOSTLY_F16 = 1
@@ -38,6 +40,7 @@ class GGMLFType(IntEnum):
3840 MOSTLY_Q5_K_M = 17
3941 MOSTLY_Q6_K = 18
4042
43+
4144class Hyperparameters :
4245 def __init__ (self ):
4346 self .n_vocab = self .n_embd = self .n_mult = self .n_head = 0
@@ -69,6 +72,7 @@ def load(self, data, offset):
6972 def __str__ (self ):
7073 return f'<Hyperparameters: n_vocab={ self .n_vocab } , n_embd={ self .n_embd } , n_mult={ self .n_mult } , n_head={ self .n_head } , n_layer={ self .n_layer } , n_rot={ self .n_rot } , n_ff={ self .n_ff } , ftype={ self .ftype .name } >'
7174
75+
7276class Vocab :
7377 def __init__ (self , load_scores = True ):
7478 self .items = []
@@ -90,6 +94,7 @@ def load(self, data, offset, n_vocab):
9094 self .items .append ((item_text , item_score ))
9195 return offset - orig_offset
9296
97+
9398class Tensor :
9499 def __init__ (self , use_padding = True ):
95100 self .name = None
@@ -123,6 +128,7 @@ def load(self, data, offset):
123128 # print(n_dims, name_len, dtype, self.dims, self.name, pad)
124129 return offset - orig_offset
125130
131+
126132class GGMLModel :
127133 def __init__ (self ):
128134 self .hyperparameters = None
@@ -159,8 +165,8 @@ def validate_conversion(self, ftype):
159165 if ftype not in (GGMLFType .ALL_F32 , GGMLFType .MOSTLY_F16 ):
160166 err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
161167 elif (self .file_format == GGMLFormat .GGJT and self .format_version == 2 ):
162- if ftype in ( GGMLFType .MOSTLY_Q4_0 , GGMLFType .MOSTLY_Q4_1 ,
163- GGMLFType .MOSTLY_Q4_1_SOME_F16 , GGMLFType .MOSTLY_Q8_0 ):
168+ if ftype in (GGMLFType .MOSTLY_Q4_0 , GGMLFType .MOSTLY_Q4_1 ,
169+ GGMLFType .MOSTLY_Q4_1_SOME_F16 , GGMLFType .MOSTLY_Q8_0 ):
164170 err = 'Q4 and Q8 quantizations changed in GGJTv3.'
165171 if len (err ) > 0 :
166172 raise ValueError (f'{ err } Sorry, your { self .file_format .name } v{ self .format_version } file of type { ftype .name } is not eligible for conversion.' )
@@ -187,6 +193,7 @@ def load(self, data, offset):
187193 hp .set_n_ff (self )
188194 return offset
189195
196+
190197class GGMLToGGUF :
191198 def __init__ (self , ggml_model , data , cfg , params_override = None , vocab_override = None , special_vocab = None ):
192199 hp = ggml_model .hyperparameters
@@ -217,7 +224,7 @@ def save(self):
217224 gguf_writer = gguf .GGUFWriter (
218225 self .cfg .output ,
219226 gguf .MODEL_ARCH_NAMES [gguf .MODEL_ARCH .LLAMA ],
220- use_temp_file = False )
227+ use_temp_file = False )
221228 self .add_params (gguf_writer )
222229 self .add_vocab (gguf_writer )
223230 if self .special_vocab is not None :
@@ -341,7 +348,8 @@ def add_tensors(self, gguf_writer):
341348 mapped_name ,
342349 data [tensor .start_offset :tensor .start_offset + tensor .len_bytes ],
343350 raw_shape = tempdims ,
344- raw_dtype = tensor .dtype )
351+ raw_dtype = tensor .dtype )
352+
345353
346354def handle_metadata (cfg , hp ):
347355 import convert
@@ -365,38 +373,40 @@ def handle_metadata(cfg, hp):
365373 raise ValueError ('Unable to load metadata' )
366374 vocab = convert .load_vocab (
367375 cfg .vocab_dir if cfg .vocab_dir is not None else cfg .model_metadata_dir ,
368- cfg .vocabtype )
376+ cfg .vocabtype )
369377 # FIXME: Respect cfg.vocab_dir?
370378 svocab = gguf .SpecialVocab (cfg .model_metadata_dir ,
371- load_merges = cfg .vocabtype == 'bpe' ,
372- n_vocab = vocab .vocab_size )
379+ load_merges = cfg .vocabtype == 'bpe' ,
380+ n_vocab = vocab .vocab_size )
373381 convert .check_vocab_size (params , vocab )
374382 return (params , vocab , svocab )
375383
384+
376385def handle_args ():
377386 parser = argparse .ArgumentParser (description = 'Convert GGML models to GGUF' )
378387 parser .add_argument ('--input' , '-i' , type = Path , required = True ,
379- help = 'Input GGMLv3 filename' )
388+ help = 'Input GGMLv3 filename' )
380389 parser .add_argument ('--output' , '-o' , type = Path , required = True ,
381- help = 'Output GGUF filename' )
390+ help = 'Output GGUF filename' )
382391 parser .add_argument ('--name' ,
383- help = 'Set model name' )
392+ help = 'Set model name' )
384393 parser .add_argument ('--desc' ,
385- help = 'Set model description' )
394+ help = 'Set model description' )
386395 parser .add_argument ('--gqa' , type = int , default = 1 ,
387- help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
396+ help = 'grouped-query attention factor (use 8 for LLaMA2 70B)' )
388397 parser .add_argument ('--eps' , default = '5.0e-06' ,
389- help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
398+ help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2' )
390399 parser .add_argument ('--context-length' , '-c' , type = int , default = 2048 ,
391- help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
400+ help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096' )
392401 parser .add_argument ('--model-metadata-dir' , '-m' , type = Path ,
393- help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
402+ help = 'Load HuggingFace/.pth vocab and metadata from the specified directory' )
394403 parser .add_argument ("--vocab-dir" , type = Path ,
395- help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
404+ help = "directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir" )
396405 parser .add_argument ("--vocabtype" , choices = ["spm" , "bpe" ], default = "spm" ,
397- help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" )
406+ help = "vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)" )
398407 return parser .parse_args ()
399408
409+
400410def main ():
401411 cfg = handle_args ()
402412 print (f'* Using config: { cfg } ' )
@@ -406,7 +416,7 @@ def main():
406416 data = np .memmap (cfg .input , mode = 'r' )
407417 model = GGMLModel ()
408418 print ('* Scanning GGML input file' )
409- offset = model .load (data , 0 )
419+ offset = model .load (data , 0 ) # noqa
410420 print (f'* GGML model hyperparameters: { model .hyperparameters } ' )
411421 vocab_override = None
412422 params_override = None
@@ -421,12 +431,15 @@ def main():
421431 print ('\n === WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n ' )
422432 if model .file_format == GGMLFormat .GGML :
423433 print ('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!' )
424- converter = GGMLToGGUF (model , data , cfg ,
434+ converter = GGMLToGGUF (
435+ model , data , cfg ,
425436 params_override = params_override ,
426437 vocab_override = vocab_override ,
427- special_vocab = special_vocab )
438+ special_vocab = special_vocab
439+ )
428440 converter .save ()
429441 print (f'* Successful completion. Output saved to: { cfg .output } ' )
430442
443+
431444if __name__ == '__main__' :
432445 main ()
0 commit comments