From eee8d481d910afeddc20709dd703fff5d012ca7c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Jun 2025 10:53:26 +0300 Subject: [PATCH 1/3] kv-cache : fix shift ggml-ci --- src/llama-kv-cache-unified.cpp | 4 +--- src/llama-kv-cells.h | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index 3a40463fd29ca..d8f4e7ffc4e6c 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -944,11 +944,9 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift( const auto & n_embd_head_k = hparams.n_embd_head_k; //const auto & n_embd_head_v = hparams.n_embd_head_v; - //GGML_ASSERT(kv_self->size == n_ctx); - auto inp = std::make_unique(this); - inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cparams.n_ctx); + inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size()); ggml_set_input(inp->k_shift); for (const auto & layer : layers) { diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h index 9e2c4d927699d..94c842ce019c6 100644 --- a/src/llama-kv-cells.h +++ b/src/llama-kv-cells.h @@ -317,8 +317,6 @@ class llama_kv_cells_unified { pos[i] += d; shift[i] += d; - seq_pos_add(i); - has_shift = true; if (pos[i] < 0) { @@ -326,12 +324,15 @@ class llama_kv_cells_unified { seq[i].reset(); pos[i] = -1; + shift[i] = 0; used.erase(i); return true; } + seq_pos_add(i); + return false; } From d564e04ce87bae217f75906f960b9da5852cedeb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Jun 2025 19:24:25 +0300 Subject: [PATCH 2/3] cont : reset shift[i] ggml-ci --- src/llama-kv-cells.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h index 94c842ce019c6..d71853592f23e 100644 --- a/src/llama-kv-cells.h +++ b/src/llama-kv-cells.h @@ -144,9 +144,10 @@ class llama_kv_cells_unified { assert(pos[i] != -1); seq_pos_rm(i); + seq[i].reset(); pos[i] = -1; - seq[i].reset(); + shift[i] = 0; used.erase(i); } @@ -164,6 +165,7 @@ class llama_kv_cells_unified { if (seq[i].none()) { pos[i] = -1; + shift[i] = 0; used.erase(i); @@ -192,6 +194,7 @@ class llama_kv_cells_unified { seq[i].reset(); pos[i] = -1; + shift[i] = 0; used.erase(i); @@ -320,8 +323,6 @@ class llama_kv_cells_unified { has_shift = true; if (pos[i] < 0) { - seq_pos_rm(i); - seq[i].reset(); pos[i] = -1; shift[i] = 0; From c257a8871cc444df660e2dfa49d2b20c9fe77124 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Jun 2025 20:45:56 +0300 Subject: [PATCH 3/3] cont : fix defrag erasing cells that didn't move ggml-ci --- src/llama-kv-cache-unified.cpp | 2 +- src/llama-kv-cells.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index d8f4e7ffc4e6c..3566d5fd4d72b 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -462,7 +462,7 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d for (uint32_t i = 0; i < n_kv; ++i) { assert(dinfo.ids[i] <= n_kv); - if (dinfo.ids[i] == n_kv) { + if (dinfo.ids[i] == n_kv || dinfo.ids[i] == i) { continue; } diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h index d71853592f23e..acf30aebec69b 100644 --- a/src/llama-kv-cells.h +++ b/src/llama-kv-cells.h @@ -80,6 +80,9 @@ class llama_kv_cells_unified { assert(isrc < pos.size()); assert(idst < pos.size()); + assert(pos[idst] == -1); + assert(pos[isrc] != -1); + pos [idst] = pos [isrc]; shift[idst] = shift[isrc]; seq [idst] = seq [isrc];