|
3 | 3 |
|
4 | 4 | import argparse |
5 | 5 | import concurrent.futures |
6 | | -import copy |
7 | 6 | import enum |
8 | 7 | import faulthandler |
9 | 8 | import functools |
10 | | -import io |
11 | 9 | import itertools |
12 | 10 | import json |
13 | 11 | import math |
|
23 | 21 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor |
24 | 22 | from dataclasses import dataclass |
25 | 23 | from pathlib import Path |
26 | | -from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar |
| 24 | +from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar |
27 | 25 |
|
28 | 26 | import numpy as np |
29 | 27 | from sentencepiece import SentencePieceProcessor |
30 | 28 |
|
31 | 29 | import os |
32 | 30 | if 'NO_LOCAL_GGUF' not in os.environ: |
33 | | - sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) |
| 31 | + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) |
34 | 32 | import gguf |
35 | 33 |
|
36 | 34 | if TYPE_CHECKING: |
@@ -851,7 +849,7 @@ def add_meta_vocab(self, vocab: Vocab) -> None: |
851 | 849 | elif isinstance(vocab, BpeVocab): |
852 | 850 | self.gguf.add_tokenizer_model("gpt2") |
853 | 851 | else: |
854 | | - raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab') |
| 852 | + raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab') |
855 | 853 | self.gguf.add_token_list(tokens) |
856 | 854 | self.gguf.add_token_scores(scores) |
857 | 855 | self.gguf.add_token_types(toktypes) |
@@ -905,7 +903,7 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray: |
905 | 903 | return dt.quantize(arr) |
906 | 904 |
|
907 | 905 | @staticmethod |
908 | | - def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None: |
| 906 | + def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: |
909 | 907 | check_vocab_size(params, vocab) |
910 | 908 |
|
911 | 909 | of = OutputFile(fname_out, endianess=endianess) |
@@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None: |
1114 | 1112 |
|
1115 | 1113 |
|
1116 | 1114 | def main(args_in: list[str] | None = None) -> None: |
| 1115 | + output_choices = ["f32", "f16"] |
| 1116 | + if np.uint32(1) == np.uint32(1).newbyteorder("<"): |
| 1117 | + # We currently only support Q8_0 output on little endian systems. |
| 1118 | + output_choices.append("q8_0") |
1117 | 1119 | parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") |
1118 | 1120 | parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") |
1119 | 1121 | parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") |
1120 | 1122 | parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") |
1121 | | - parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") |
| 1123 | + parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") |
1122 | 1124 | parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") |
1123 | 1125 | parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") |
1124 | 1126 | parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)") |
|
0 commit comments