1717"
1818" start the llama.cpp server with a FIM-compatible model. for example:
1919"
20- " $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 512
20+ " $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 64
2121"
2222" --batch-size [512, model max context]
2323"
3333"
3434" :call llama#init()
3535"
36+ " more info: https://github.com/ggerganov/llama.cpp/pull/9787/files
37+ "
3638
37- " color of the suggested text
39+ " colors (adjust to your liking)
3840highlight llama_hl_hint guifg= #ff772f
3941highlight llama_hl_info guifg= #77 ff2f
4042
@@ -154,6 +156,8 @@ function! llama#init()
154156 endif
155157endfunction
156158
159+ " compute how similar two chunks of text are
160+ " 0 - no similarity, 1 - high similarity
157161" TODO: figure out something better
158162function ! s: chunk_sim (c0, c1)
159163 let l: lines0 = len (a: c0 )
@@ -173,17 +177,23 @@ function! s:chunk_sim(c0, c1)
173177 return 2.0 * l: common / (l: lines0 + l: lines1 )
174178endfunction
175179
176- " pick a chunk from the provided text and queue it for processing
180+ " pick a random chunk of size g:llama_config.ring_chunk_size from the provided text and queue it for processing
181+ "
182+ " no_mod - do not pick chunks from buffers with pending changes
183+ " do_evict - evict chunks that are very similar to the new one
184+ "
177185function ! s: pick_chunk (text, no_mod, do_evict)
178186 " do not pick chunks from buffers with pending changes or buffers that are not files
179187 if a: no_mod && (getbufvar (bufnr (' %' ), ' &modified' ) || ! buflisted (bufnr (' %' )) || ! filereadable (expand (' %' )))
180188 return
181189 endif
182190
191+ " if the extra context option is disabled - do nothing
183192 if g: llama_config .ring_n_chunks <= 0
184193 return
185194 endif
186195
196+ " don't pick very small chunks
187197 if len (a: text ) < 3
188198 return
189199 endif
@@ -220,9 +230,9 @@ function! s:pick_chunk(text, no_mod, do_evict)
220230 return
221231 endif
222232
223- " evict chunks that are very similar to the new one
233+ " evict queued chunks that are very similar to the new one
224234 for i in range (len (s: ring_queued ) - 1 , 0 , -1 )
225- if s: chunk_sim (s: ring_queued [i ].data, l: chunk ) > 0.5
235+ if s: chunk_sim (s: ring_queued [i ].data, l: chunk ) > 0.9
226236 if a: do_evict
227237 call remove (s: ring_queued , i )
228238 let s: ring_n_evict += 1
@@ -234,7 +244,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
234244
235245 " also from s:ring_chunks
236246 for i in range (len (s: ring_chunks ) - 1 , 0 , -1 )
237- if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.5
247+ if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.9
238248 if a: do_evict
239249 call remove (s: ring_chunks , i )
240250 let s: ring_n_evict += 1
@@ -244,6 +254,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
244254 endif
245255 endfor
246256
257+ " TODO: become parameter ?
247258 if len (s: ring_queued ) == 16
248259 call remove (s: ring_queued , 0 )
249260 endif
@@ -253,7 +264,8 @@ function! s:pick_chunk(text, no_mod, do_evict)
253264 " let &statusline = 'extra context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued)
254265endfunction
255266
256- " called every g:llama_config.ring_update_ms, processed chunks are moved to s:ring_chunks
267+ " picks a queued chunk, sends it for processing and adds it to s:ring_chunks
268+ " called every g:llama_config.ring_update_ms
257269function ! s: ring_update ()
258270 call timer_start (g: llama_config .ring_update_ms, {- > s: ring_update ()})
259271
@@ -306,15 +318,21 @@ function! s:ring_update()
306318 \ g: llama_config .endpoint, shellescape (l: request )
307319 \ )
308320
321+ " no callbacks because we don't need to process the response
309322 call jobstart (l: curl_command , {})
310323endfunction
311324
325+ " necessary for 'inoremap <expr>'
312326function ! llama#fim_inline (is_auto, on_hold) abort
313327 call llama#fim (a: is_auto , a: on_hold )
314328 return ' '
315329endfunction
316330
331+ " the main FIM call
332+ " takes local context around the cursor and sends it together with the extra context
333+ " to the llama.cpp server for completion
317334function ! llama#fim (is_auto, on_hold) abort
335+ " we already have a suggestion for the current cursor position
318336 if a: on_hold && (s: hint_shown || (s: pos_x == col (' .' ) - 1 && s: pos_y == line (' .' )))
319337 return
320338 endif
@@ -415,6 +433,7 @@ function! llama#fim(is_auto, on_hold) abort
415433 " TODO: per-file location
416434 let l: delta_y = abs (s: pos_y - s: pos_y_pick )
417435
436+ " gather some extra context nearby and process it in the background
418437 " only gather chunks if the cursor has moved a lot
419438 " TODO: something more clever? reranking?
420439 if a: is_auto && l: delta_y > 32
@@ -474,7 +493,7 @@ function! s:on_move()
474493 call llama#fim_cancel ()
475494endfunction
476495
477- " callback that processes the result from the server
496+ " callback that processes the FIM result from the server and displays the suggestion
478497function ! s: fim_on_stdout (job_id, data, event ) dict
479498 let l: raw = join (a: data , " \n " )
480499 if len (l: raw ) == 0
0 commit comments