1+ " LLM-based code completion using llama.cpp
2+ "
3+ " requires:
4+ " - neovim
5+ " - llama.cpp server instance
6+ "
17" sample config:
28"
3- " - Ctrl+F - trigger FIM completion manually
9+ " - Tab - accept the current suggestion
10+ " - Shift+Tab - accept just the first line
11+ " - Ctrl+F - trigger FIM completion manually
12+ "
13+ " make symlink or copy this file to ~/.config/nvim/autoload/llama.vim
14+ "
15+ " start the llama.cpp server with a FIM-compatible model. for example:
16+ "
17+ " llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -ub 1024 -b 2048
18+ "
19+ " adjust the batch size to control how much of the provided context will be used during the inference
20+ " lower values will use smaller part of the context, which will result in faster processing
421"
5- " run this once to initialise the plugin :
22+ " run this once to initialise llama.vim :
623"
7- " :call llama#init()
24+ " :call llama#init()
825"
926
1027" color of the suggested text
1128highlight llama_hl_hint guifg= #ff772f
1229highlight llama_hl_info guifg= #77 ff2f
1330
31+ " endpoint: llama.cpp server endpoint
32+ " n_prefix: number of lines to include in the prefix
33+ " n_suffix: number of lines to include in the suffix
34+ " n_predict: max number of tokens to predict
35+ " t_max_prompt_ms: max alloted time for the text generation
36+ " show_info: show extra info about the inference
37+ " auto_fim: trigger FIM completion automatically on cursor movement
1438let s: default_config = {
1539 \ ' endpoint' : ' http://127.0.0.1:8012/infill' ,
1640 \ ' n_prefix' : 128 ,
1741 \ ' n_suffix' : 128 ,
1842 \ ' n_predict' : 64 ,
1943 \ ' t_max_prompt_ms' : 300 ,
2044 \ ' t_max_predict_ms' : 200 ,
45+ \ ' show_info' : v: true ,
2146 \ ' auto_fim' : v: true ,
22- \ ' stop' : [" \n " ]
2347 \ }
2448
2549let g: llama_config = get (g: , ' llama_config' , s: default_config )
2650
2751function ! llama#init ()
28- let s: pos_x = 0
52+ let s: pos_x = 0 " cursor position upon start of completion
2953 let s: pos_y = 0
3054 let s: pos_x0 = 0 " pos_x corrected for end-of-line edge case
3155
@@ -46,8 +70,8 @@ function! llama#init()
4670
4771 augroup llama
4872 autocmd !
49- autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
50- autocmd InsertLeave * call llama#fim_cancel ()
73+ autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
74+ autocmd InsertLeavePre * call llama#fim_cancel ()
5175
5276 autocmd CursorMoved * call llama#fim_cancel ()
5377 augroup END
@@ -90,7 +114,6 @@ function! llama#fim(is_auto) abort
90114 \ ' prompt' : " " ,
91115 \ ' input_prefix' : l: prefix ,
92116 \ ' input_suffix' : l: suffix ,
93- " \ 'stop': g:llama_config.stop,
94117 \ ' n_predict' : g: llama_config .n_predict,
95118 \ ' penalty_last_n' : 0 ,
96119 \ ' top_k' : 100 ,
@@ -126,16 +149,23 @@ function! llama#fim(is_auto) abort
126149 endif
127150endfunction
128151
129- function ! llama#fim_accept ()
152+ " if first_line == v:true accept only the first line of the response
153+ function ! llama#fim_accept (first_line)
130154 " insert the suggestion at the cursor location
131155 if s: can_accept && len (s: content ) > 0
132156 call setline (s: pos_y , s: line_cur [:(s: pos_x0 - 1 )] . s: content [0 ])
133157 if len (s: content ) > 1
134- call append (s: pos_y , s: content [1 :-1 ])
158+ if ! a: first_line
159+ call append (s: pos_y , s: content [1 :-1 ])
160+ endif
135161 endif
136162
137163 " move the cursor to the end of the accepted text
138- call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
164+ if ! a: first_line
165+ call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
166+ else
167+ call cursor (s: pos_y , s: pos_x + len (s: content [0 ]) - 1 )
168+ endif
139169 endif
140170
141171 call llama#fim_cancel ()
@@ -146,6 +176,11 @@ function! llama#fim_cancel()
146176 call jobstop (s: current_job )
147177 endif
148178
179+ if s: timer_fim != -1
180+ call timer_stop (s: timer_fim )
181+ let s: timer_fim = -1
182+ endif
183+
149184 " clear the virtual text
150185 let l: bufnr = bufnr (' %' )
151186
@@ -155,7 +190,9 @@ function! llama#fim_cancel()
155190 call nvim_buf_clear_namespace (l: bufnr , l: id_vt_fim , 0 , -1 )
156191 call nvim_buf_clear_namespace (l: bufnr , l: id_vt_info , 0 , -1 )
157192
193+ " remove the mappings
158194 silent ! iunmap <buffer> <Tab>
195+ silent ! iunmap <buffer> <S-Tab>
159196 silent ! iunmap <buffer> <Esc>
160197
161198 augroup llama_insert
@@ -173,6 +210,8 @@ function! s:fim_auto_enable()
173210 augroup END
174211endfunction
175212
213+ " auto-start a fim job a short time after the cursor has moved
214+ " if there is already a job queued - cancel it
176215function ! s: fim_auto ()
177216 if s: current_job != v: null
178217 call jobstop (s: current_job )
@@ -189,7 +228,7 @@ function! s:fim_auto()
189228 let s: timer_fim = timer_start (500 , {- > llama#fim (v: true )})
190229endfunction
191230
192-
231+ " callback that processes the result from the server
193232function ! s: fim_on_stdout (job_id, data, event ) dict
194233 let l: raw = join (a: data , " \n " )
195234 if len (l: raw ) == 0
@@ -199,6 +238,13 @@ function! s:fim_on_stdout(job_id, data, event) dict
199238 let s: can_accept = v: true
200239 let l: has_info = v: false
201240
241+ if s: can_accept && v: shell_error
242+ if ! self .is_auto
243+ call add (s: content , " <| curl error: is the server on? |>" )
244+ endif
245+ let s: can_accept = v: false
246+ endif
247+
202248 let l: n_prompt = 0
203249 let l: t_prompt_ms = 1.0
204250 let l: s_prompt = 0
@@ -207,13 +253,6 @@ function! s:fim_on_stdout(job_id, data, event) dict
207253 let l: t_predict_ms = 1.0
208254 let l: s_predict = 0
209255
210- if s: can_accept && v: shell_error
211- if ! self .is_auto
212- call add (s: content , " <| curl error: is the server on? |>" )
213- endif
214- let s: can_accept = v: false
215- endif
216-
217256 " get the generated suggestion
218257 if s: can_accept
219258 let l: response = json_decode (l: raw )
@@ -227,7 +266,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
227266 call remove (s: content , -1 )
228267 endwhile
229268
230- " if response.timings
269+ " if response.timings is available
231270 if len (get (l: response , ' timings' , {})) > 0
232271 let l: has_info = v: true
233272 let l: timings = get (l: response , ' timings' , {})
@@ -264,8 +303,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
264303 let l: id_vt_fim = nvim_create_namespace (' vt_fim' )
265304 let l: id_vt_info = nvim_create_namespace (' vt_info' )
266305
267- " construct the info message:
268- if l: has_info
306+ " construct the info message and display it to the right of the current line
307+ if g: llama_config .show_info && l: has_info
269308 " prefix the info string with whitespace in order to offset it to the right of the fim overlay
270309 let l: prefix = repeat (' ' , len (s: content [0 ]) - len (s: line_cur_suffix ) + 3 )
271310
@@ -282,6 +321,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
282321 \ })
283322 endif
284323
324+ " display the suggestion
285325 call nvim_buf_set_extmark (l: bufnr , l: id_vt_fim , s: pos_y - 1 , s: pos_x - 1 , {
286326 \ ' virt_text' : [[s: content [0 ], ' llama_hl_hint' ]],
287327 \ ' virt_text_win_col' : virtcol (' .' ) - 1
@@ -293,8 +333,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
293333 \ })
294334
295335 " setup accept/cancel events
296- inoremap <buffer> <Tab> <C-O> :call llama#fim_accept()<CR>
297- inoremap <buffer> <Esc > <C-O> :call llama#fim_cancel( )<CR><Esc >
336+ inoremap <buffer> <Tab> <C-O> :call llama#fim_accept(v:false )<CR>
337+ inoremap <buffer> <S-Tab > <C-O> :call llama#fim_accept(v:true )<CR>
298338
299339 augroup llama_insert
300340 autocmd !
0 commit comments