kv-cache : add comments

ggerganov · ggerganov · commit f23e4ccaae4c · 2025-05-30T11:46:00.000+03:00
ggml-ci
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -493,9 +493,13 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
         LLAMA_LOG_WARN("\n%s\n", ss.c_str());
     }
 
-    LLAMA_LOG_WARN("kv_cells: n_swa = %4d, min[0] = %5d, max[0] = %5d\n", n_swa, cells.seq_pos_min(0), cells.seq_pos_max(0));
-    LLAMA_LOG_WARN("kv_cells: n_swa = %4d, min[1] = %5d, max[1] = %5d\n", n_swa, cells.seq_pos_min(1), cells.seq_pos_max(1));
-    LLAMA_LOG_WARN("kv_cells: n_swa = %4d, min[2] = %5d, max[2] = %5d\n", n_swa, cells.seq_pos_min(2), cells.seq_pos_max(2));
+    for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
+        if (cells.seq_pos_min(s) < 0) {
+            continue;
+        }
+
+        LLAMA_LOG_WARN("kv_cells: n_swa = %4d, min[%d] = %5d, max[%d] = %5d\n", n_swa, s, cells.seq_pos_min(s), s, cells.seq_pos_max(s));
+    }
 #endif
 
     uint32_t n_tested = 0;
@@ -538,6 +542,9 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
                     const llama_seq_id seq_id_cell = cells.seq_get(head_cur + i);
 
                     // SWA mask
+                    // note: we insert only in the cell with minimum pos in order to preserve the invariant that
+                    //       all positions between [pos_min, pos_max] for each sequence will be present in the cache
+                    //       ref: https://github.com/ggml-org/llama.cpp/pull/13746#issuecomment-2916057092
                     if (pos_cell == seq_pos_min[seq_id_cell] &&
                         is_masked_swa(pos_cell, cells.seq_pos_max(seq_id_cell) + 1)) {
                         seq_pos_min[seq_id_cell]++;
diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h
@@ -138,6 +138,7 @@ class llama_kv_cells_unified {
         }
     }
 
+    // clear a non-empty cell
     void rm(uint32_t i) {
         assert(i < pos.size());
         assert(pos[i] != -1);
@@ -202,13 +203,15 @@ class llama_kv_cells_unified {
         return false;
     }
 
+    // number of different sequences in the cell
     int seq_count(uint32_t i) const {
         assert(i < pos.size());
         assert(pos[i] != -1);
 
         return seq[i].count();
     }
 
+    // check if the cell contains seq_id
     bool seq_has(uint32_t i, llama_seq_id seq_id) const {
         assert(i < pos.size());
         assert(seq_id >= 0);
@@ -226,6 +229,8 @@ class llama_kv_cells_unified {
         seq_pos[seq_id].insert(pos[i]);
     }
 
+    // return the sequence id of this cell
+    // note: call only for cells with exactly one sequence
     llama_seq_id seq_get(uint32_t i) const {
         assert(seq[i].count() == 1);
 

Original file line number	Diff line number	Diff line change
`@@ -138,6 +138,7 @@ class llama_kv_cells_unified {`
`138`	`138`	`}`
`139`	`139`	`}`
`140`	`140`
	`141`	`+ // clear a non-empty cell`
`141`	`142`	`void rm(uint32_t i) {`
`142`	`143`	`assert(i < pos.size());`
`143`	`144`	`assert(pos[i] != -1);`
`@@ -202,13 +203,15 @@ class llama_kv_cells_unified {`
`202`	`203`	`return false;`
`203`	`204`	`}`
`204`	`205`
	`206`	`+ // number of different sequences in the cell`
`205`	`207`	`int seq_count(uint32_t i) const {`
`206`	`208`	`assert(i < pos.size());`
`207`	`209`	`assert(pos[i] != -1);`
`208`	`210`
`209`	`211`	`return seq[i].count();`
`210`	`212`	`}`
`211`	`213`
	`214`	`+ // check if the cell contains seq_id`
`212`	`215`	`bool seq_has(uint32_t i, llama_seq_id seq_id) const {`
`213`	`216`	`assert(i < pos.size());`
`214`	`217`	`assert(seq_id >= 0);`
`@@ -226,6 +229,8 @@ class llama_kv_cells_unified {`
`226`	`229`	`seq_pos[seq_id].insert(pos[i]);`
`227`	`230`	`}`
`228`	`231`
	`232`	`+ // return the sequence id of this cell`
	`233`	`+ // note: call only for cells with exactly one sequence`
`229`	`234`	`llama_seq_id seq_get(uint32_t i) const {`
`230`	`235`	`assert(seq[i].count() == 1);`
`231`	`236`