@@ -96,4 +96,32 @@ export const GGUF_QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string
9696 txt : "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix." ,
9797 src_url : "https://github.com/ggerganov/llama.cpp/pull/5590" ,
9898 } ,
99+ [ GGMLQuantizationType . I8 ] : {
100+ txt : "8-bit fixed-width integer number." ,
101+ src_url : "https://github.com/ggerganov/llama.cpp/pull/6045" ,
102+ } ,
103+ [ GGMLQuantizationType . I16 ] : {
104+ txt : "16-bit fixed-width integer number." ,
105+ src_url : "https://github.com/ggerganov/llama.cpp/pull/6045" ,
106+ } ,
107+ [ GGMLQuantizationType . I32 ] : {
108+ txt : "32-bit fixed-width integer number." ,
109+ src_url : "https://github.com/ggerganov/llama.cpp/pull/6045" ,
110+ } ,
111+ [ GGMLQuantizationType . I64 ] : {
112+ txt : "64-bit fixed-width integer number." ,
113+ src_url : "https://github.com/ggerganov/llama.cpp/pull/6062" ,
114+ } ,
115+ [ GGMLQuantizationType . F64 ] : {
116+ txt : "64-bit standard IEEE 754 double-precision floating-point number." ,
117+ src_url : "https://en.wikipedia.org/wiki/Double-precision_floating-point_format" ,
118+ } ,
119+ [ GGMLQuantizationType . IQ1_M ] : {
120+ txt : "1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.75 bits-per-weight." ,
121+ src_url : "https://github.com/ggerganov/llama.cpp/pull/6302" ,
122+ } ,
123+ [ GGMLQuantizationType . BF16 ] : {
124+ txt : "16-bit shortened version of the 32-bit IEEE 754 single-precision floating-point number." ,
125+ src_url : "https://en.wikipedia.org/wiki/Bfloat16_floating-point_format" ,
126+ } ,
99127} ;
0 commit comments