@@ -127,6 +127,9 @@ llama_kv_cache_unified::llama_kv_cache_unified(
127127 ggml_type_name (type_k), (float )memory_size_k / (1024 .0f * 1024 .0f ),
128128 ggml_type_name (type_v), (float )memory_size_v / (1024 .0f * 1024 .0f ));
129129 }
130+
131+ const char * LLAMA_KV_CACHE_DEBUG = getenv (" LLAMA_KV_CACHE_DEBUG" );
132+ debug = LLAMA_KV_CACHE_DEBUG ? atoi (LLAMA_KV_CACHE_DEBUG) : 0 ;
130133}
131134
132135void llama_kv_cache_unified::clear (bool data) {
@@ -517,14 +520,12 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
517520 return -1 ;
518521 }
519522
520- // #define FIND_SLOT_DEBUG 1
521- # if FIND_SLOT_DEBUG
522- LLAMA_LOG_WARN ( " begin : n = %5d, used = %5d, head = %5d, n_swa = %5d\n " , cells.used_max_p1 (), cells.get_used (), head, n_swa);
523+ if (debug > 0 ) {
524+ LLAMA_LOG_CONT ( " \n " );
525+ LLAMA_LOG_DEBUG ( " %s : n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n " , __func__, cells.used_max_p1 (), cells.get_used (), head, get_size () , n_swa);
523526
524- // for debugging
525- {
526- std::string ss;
527- if (n_swa > 0 ) {
527+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
528+ std::string ss;
528529 for (uint32_t i = 0 ; i < cells.size (); ++i) {
529530 if (cells.is_empty (i)) {
530531 ss += ' .' ;
@@ -535,18 +536,41 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const {
535536 ss += ' \n ' ;
536537 }
537538 }
539+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
538540 }
539- LLAMA_LOG_WARN (" \n %s\n " , ss.c_str ());
540- }
541541
542- for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
543- if (cells.seq_pos_min (s) < 0 ) {
544- continue ;
542+ if ((debug == 2 && n_swa > 0 ) || debug > 2 ) {
543+ std::string ss;
544+ for (uint32_t i = 0 ; i < cells.size (); ++i) {
545+ std::string cur;
546+ if (cells.is_empty (i)) {
547+ cur = ' .' ;
548+ } else {
549+ cur = std::to_string (cells.pos_get (i));
550+ }
551+ const int n = cur.size ();
552+ for (int j = 0 ; j < 5 - n; ++j) {
553+ cur += ' ' ;
554+ }
555+ ss += cur;
556+ if (i%256 == 255 ) {
557+ ss += " *" ;
558+ }
559+ if (i%64 == 63 ) {
560+ ss += ' \n ' ;
561+ }
562+ }
563+ LLAMA_LOG_DEBUG (" \n %s\n " , ss.c_str ());
545564 }
546565
547- LLAMA_LOG_WARN (" kv_cells: n_swa = %4d, min[%d] = %5d, max[%d] = %5d\n " , n_swa, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
566+ for (int s = 0 ; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
567+ if (cells.seq_pos_min (s) < 0 ) {
568+ continue ;
569+ }
570+
571+ LLAMA_LOG_DEBUG (" %s: min[%d] = %5d, max[%d] = %5d\n " , __func__, s, cells.seq_pos_min (s), s, cells.seq_pos_max (s));
572+ }
548573 }
549- #endif
550574
551575 uint32_t n_tested = 0 ;
552576
0 commit comments