From eee8d481d910afeddc20709dd703fff5d012ca7c Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 9 Jun 2025 10:53:26 +0300
Subject: [PATCH 1/3] kv-cache : fix shift

ggml-ci
---
 src/llama-kv-cache-unified.cpp | 4 +---
 src/llama-kv-cells.h           | 5 +++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
index 3a40463fd29ca..d8f4e7ffc4e6c 100644
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -944,11 +944,9 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift(
     const auto & n_embd_head_k = hparams.n_embd_head_k;
   //const auto & n_embd_head_v = hparams.n_embd_head_v;
 
-    //GGML_ASSERT(kv_self->size == n_ctx);
-
     auto inp = std::make_unique<llm_graph_input_k_shift>(this);
 
-    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cparams.n_ctx);
+    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size());
     ggml_set_input(inp->k_shift);
 
     for (const auto & layer : layers) {
diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h
index 9e2c4d927699d..94c842ce019c6 100644
--- a/src/llama-kv-cells.h
+++ b/src/llama-kv-cells.h
@@ -317,8 +317,6 @@ class llama_kv_cells_unified {
         pos[i]   += d;
         shift[i] += d;
 
-        seq_pos_add(i);
-
         has_shift = true;
 
         if (pos[i] < 0) {
@@ -326,12 +324,15 @@ class llama_kv_cells_unified {
 
             seq[i].reset();
             pos[i] = -1;
+            shift[i] = 0;
 
             used.erase(i);
 
             return true;
         }
 
+        seq_pos_add(i);
+
         return false;
     }
 

From d564e04ce87bae217f75906f960b9da5852cedeb Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 9 Jun 2025 19:24:25 +0300
Subject: [PATCH 2/3] cont : reset shift[i]

ggml-ci
---
 src/llama-kv-cells.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h
index 94c842ce019c6..d71853592f23e 100644
--- a/src/llama-kv-cells.h
+++ b/src/llama-kv-cells.h
@@ -144,9 +144,10 @@ class llama_kv_cells_unified {
         assert(pos[i] != -1);
 
         seq_pos_rm(i);
+        seq[i].reset();
 
         pos[i] = -1;
-        seq[i].reset();
+        shift[i] = 0;
 
         used.erase(i);
     }
@@ -164,6 +165,7 @@ class llama_kv_cells_unified {
 
         if (seq[i].none()) {
             pos[i] = -1;
+            shift[i] = 0;
 
             used.erase(i);
 
@@ -192,6 +194,7 @@ class llama_kv_cells_unified {
             seq[i].reset();
 
             pos[i] = -1;
+            shift[i] = 0;
 
             used.erase(i);
 
@@ -320,8 +323,6 @@ class llama_kv_cells_unified {
         has_shift = true;
 
         if (pos[i] < 0) {
-            seq_pos_rm(i);
-
             seq[i].reset();
             pos[i] = -1;
             shift[i] = 0;

From c257a8871cc444df660e2dfa49d2b20c9fe77124 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 9 Jun 2025 20:45:56 +0300
Subject: [PATCH 3/3] cont : fix defrag erasing cells that didn't move

ggml-ci
---
 src/llama-kv-cache-unified.cpp | 2 +-
 src/llama-kv-cells.h           | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
index d8f4e7ffc4e6c..3566d5fd4d72b 100644
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -462,7 +462,7 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
             for (uint32_t i = 0; i < n_kv; ++i) {
                 assert(dinfo.ids[i] <= n_kv);
 
-                if (dinfo.ids[i] == n_kv) {
+                if (dinfo.ids[i] == n_kv || dinfo.ids[i] == i) {
                     continue;
                 }
 
diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h
index d71853592f23e..acf30aebec69b 100644
--- a/src/llama-kv-cells.h
+++ b/src/llama-kv-cells.h
@@ -80,6 +80,9 @@ class llama_kv_cells_unified {
         assert(isrc < pos.size());
         assert(idst < pos.size());
 
+        assert(pos[idst] == -1);
+        assert(pos[isrc] != -1);
+
         pos  [idst] = pos  [isrc];
         shift[idst] = shift[isrc];
         seq  [idst] = seq  [isrc];