We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
LLM_ARCH_DEEPSEEK2
1 parent 8c02442 commit ddab5e4Copy full SHA for ddab5e4
src/llama-context.cpp
@@ -2278,6 +2278,11 @@ llama_context * llama_init_from_model(
2278
params.flash_attn = false;
2279
}
2280
2281
+ if (params.flash_attn && model->arch == LLM_ARCH_DEEPSEEK2) {
2282
+ LLAMA_LOG_WARN("%s: flash_attn is not compatible with Deepseek2 - forcing off\n", __func__);
2283
+ params.flash_attn = false;
2284
+ }
2285
+
2286
if (ggml_is_quantized(params.type_v) && !params.flash_attn) {
2287
LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
2288
return nullptr;
0 commit comments