Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions autoload/ollama.vim
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,24 @@ function! ollama#GetSuggestion(timer)
\ "Connecting to Ollama on " .. g:ollama_host
\ .. " using model " .. g:ollama_model)
call ollama#logger#Debug("model_options=" .. l:model_options)

if exists('g:ollama_model_sampling_denylist')
\ && len(g:ollama_model_sampling_denylist) > 0
\ && index(g:ollama_model_sampling_denylist, g:ollama_model) >= 0
let l:sampling_enabled = 0
else
let l:sampling_enabled = 1
endif
call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled)

" Convert plugin debug level to python logger levels
let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug)
let l:base_url = g:ollama_host
if g:ollama_model_provider =~ '^openai'
let l:base_url = g:ollama_openai_baseurl
elseif g:ollama_model_provider == 'claude'
" Claude uses default Anthropic API, don't set base_url
let l:base_url = ''
endif
" Adjust the command to use the prompt as stdin input
let l:command = [ g:ollama_python_interpreter,
Expand All @@ -234,6 +247,7 @@ function! ollama#GetSuggestion(timer)
\ "-m", g:ollama_model,
\ "-u", l:base_url,
\ "-o", l:model_options,
\ "-se", l:sampling_enabled,
\ "-l", l:log_level
\ ]
" Add optional credentialname for looking up the API key
Expand All @@ -247,6 +261,11 @@ function! ollama#GetSuggestion(timer)
" add credentialname option for Mistral
let l:command += [ '-k', g:ollama_mistral_credentialname ]
endif
elseif g:ollama_model_provider == 'claude'
if exists('g:ollama_claude_credentialname') && g:ollama_claude_credentialname != ''
" add credentialname option for Claude
let l:command += [ '-k', g:ollama_claude_credentialname ]
endif
endif
call ollama#logger#Debug("command=" .. join(l:command, " "))
let l:job_options = {
Expand Down
109 changes: 104 additions & 5 deletions autoload/ollama/review.vim
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ function! s:FindBufferWindow(bufnr)
endfunction

function! s:StartChat(lines) abort
" Counter for reducing redraw frequency
let s:token_count = 0

" Function handling a line of text that has been typed.
func! TextEntered(text)
call ollama#logger#Debug("TextEntered: " .. a:text)
Expand All @@ -65,12 +68,13 @@ function! s:StartChat(lines) abort
endif
" Send the text to a shell with Enter appended.
call ch_sendraw(s:job, a:text .. "\n")
" Reset token count for new request
let s:token_count = 0
endfunc

" Function handling output from the shell: Add it above the prompt.
func! GotOutput(channel, msg)
" OLD VERSION: Append each token as a new line (non-streaming)
func! GotOutputOld(channel, msg)
call ollama#logger#Debug("GotOutput: " .. a:msg)

" append lines
let l:lines = split(a:msg, "\n")
for l:line in l:lines
Expand All @@ -96,6 +100,91 @@ function! s:StartChat(lines) abort
endfor
endfunc

" NEW VERSION: Stream tokens on the same line with real-time cursor tracking
func! GotOutputNew(channel, msg)
" call ollama#logger#Debug("GotOutput: [" .. a:msg .. "]")

" Check for <EOT> marker
let l:idx = stridx(a:msg, "<EOT>")
let l:is_eot = l:idx != -1
let l:content = l:is_eot ? strpart(a:msg, 0, l:idx) : a:msg

" Append content to the last line for streaming effect
let l:updated_line_num = 0
let l:updated_line_content = ""
let l:line_count = 0

if !empty(l:content)
" Get buffer line count efficiently
let l:buf_info = getbufinfo(s:buf)[0]
let l:line_count = l:buf_info.linecount
" call ollama#logger#Debug("line_count=" .. l:line_count)

if l:line_count == 0
" Buffer is empty, append as new line
" call ollama#logger#Debug("Buffer empty, appending first line")
call appendbufline(s:buf, 0, l:content)
let l:updated_line_num = 1
let l:updated_line_content = l:content
else
" Get only the last line (much faster than getting all lines)
let l:last_line = getbufline(s:buf, l:line_count, l:line_count)[0]
let l:updated_line_content = l:last_line .. l:content
" call ollama#logger#Debug("Appending to line " .. l:line_count .. ": '" .. l:last_line .. "' + '" .. l:content .. "'")
call setbufline(s:buf, l:line_count, l:updated_line_content)
let l:updated_line_num = l:line_count
endif
endif

" When streaming is done, add a new line for the next input
if l:is_eot
" call ollama#logger#Debug("EOT received, adding newline")
call appendbufline(s:buf, "$", "")
" Reuse line_count if we already got it, otherwise fetch
if l:line_count > 0
let l:updated_line_num = l:line_count + 1
else
let l:buf_info = getbufinfo(s:buf)[0]
let l:updated_line_num = l:buf_info.linecount
endif
let l:updated_line_content = ""
endif

" Update cursor position if this is the active chat window
if bufname() == s:ollama_bufname " Check if current active window is Ollama Chat
let l:winid = bufwinid(s:buf)
if l:winid != -1 && l:updated_line_num > 0
" Set cursor position directly (much faster than feedkeys)
let l:col = len(l:updated_line_content) + 1
call win_execute(l:winid, 'call cursor(' . l:updated_line_num . ', ' . l:col . ')')

" Increment token counter and only redraw every N tokens (or always for EOT)
let s:token_count += 1
if l:is_eot || s:token_count % 5 == 0
redraw
endif

if l:is_eot
" Streaming done, enter insert mode
if mode() == 'i'
call feedkeys("\<Esc>")
endif
call feedkeys("a")
endif
endif
endif
endfunc

" Wrapper function that delegates to new version by default
" To use old version, set g:ollama_use_old_output = 1
func! GotOutput(channel, msg)
if exists('g:ollama_use_old_output') && g:ollama_use_old_output
call GotOutputOld(a:channel, a:msg)
else
call GotOutputNew(a:channel, a:msg)
endif
endfunc

" Function handling output from the shell: Add it above the prompt.
func! GotErrors(channel, msg)
call ollama#logger#Debug("GotErrors: " .. a:msg)
Expand Down Expand Up @@ -136,9 +225,18 @@ function! s:StartChat(lines) abort
endfunc

let l:model_options = json_encode(g:ollama_chat_options)
call ollama#logger#Debug("Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model)
call ollama#logger#Debug("Chat Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model)
call ollama#logger#Debug("model_options=" .. l:model_options)

if exists('g:ollama_model_sampling_denylist')
\ && len(g:ollama_model_sampling_denylist) > 0
\ && index(g:ollama_model_sampling_denylist, g:ollama_chat_model) >= 0
let l:sampling_enabled = 0
else
let l:sampling_enabled = 1
endif
call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled)

" Convert plugin debug level to python logger levels
let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug)
let l:base_url = g:ollama_host
Expand All @@ -154,6 +252,7 @@ function! s:StartChat(lines) abort
\ '-m', g:ollama_chat_model,
\ '-u', l:base_url,
\ '-o', l:model_options,
\ "-se", l:sampling_enabled,
\ '-t', g:ollama_chat_timeout,
\ '-l', l:log_level ]
" Check if a system prompt was configured
Expand Down Expand Up @@ -205,7 +304,7 @@ function! s:StartChat(lines) abort
silent execute 'new' l:bufname
endif
" Set the filetype to ollama-chat
" setlocal filetype=ollama-chat
" setlocal filetype=ollama-chat
setlocal filetype=markdown
setlocal buftype=prompt
" enable BufDelete event when closing buffer usig :q!
Expand Down
8 changes: 8 additions & 0 deletions plugin/ollama.vim
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ if !exists('g:ollama_openai_credentialname')
" UNIX Pass credential name to lookup API key for OpenAI service
let g:ollama_openai_credentialname = ''
endif
if !exists('g:ollama_claude_credentialname')
" UNIX Pass credential name to lookup API key for Anthropic Claude service
let g:ollama_claude_credentialname = ''
endif
" Tab completion specific settings
if !exists('g:ollama_debounce_time')
let g:ollama_debounce_time = 500
Expand Down Expand Up @@ -98,6 +102,10 @@ if !exists('g:ollama_model_options')
\ 'max_tokens': 500
\ }
endif
if !exists('g:ollama_model_sampling_denylist')
" default model sampling denylist
let g:ollama_model_sampling_denylist = []
endif
" Chat specific settings
if !exists('g:ollama_chat_provider')
" Provider for chat models: 'ollama' or 'openai'
Expand Down
3 changes: 3 additions & 0 deletions python/OllamaCredentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str:
- 'openai' → use OPENAI_API_KEY env var or pass entry
- 'openai_legacy' → same as 'openai', kept for compatibility
- 'mistral' → use MISTRAL_API_KEY env var or pass entry
- 'anthropic' → use ANTHROPIC_API_KEY env var or pass entry

Priority:
1. Environment variable override
Expand All @@ -36,6 +37,8 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str:
env_var = "OPENAI_API_KEY"
elif provider == "mistral":
env_var = "MISTRAL_API_KEY"
elif provider == "anthropic":
env_var = "ANTHROPIC_API_KEY"
else:
raise ValueError(f"Unknown provider: {provider}")

Expand Down
43 changes: 28 additions & 15 deletions python/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout
if "message" in message and "content" in message["message"]:
content = message["message"]["content"]
assistant_message += content
print(content, end="", flush=True)
# Print each token followed by newline so Vim's out_cb receives it immediately
# VimScript will need to handle concatenating tokens on the same line
print(content, flush=True)

# If <EOT> is detected, stop processing
if "<EOT>" in content:
Expand All @@ -90,7 +92,7 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout
messages.append({"role": "assistant", "content": assistant_message.strip()})


async def stream_chat_message_openai(messages, endpoint, model, options, credentialname):
async def stream_chat_message_openai(messages, endpoint, model, options, sampling_enabled, credentialname):
"""Stream chat responses from OpenAI API."""
if AsyncOpenAI is None:
raise ImportError("OpenAI package not found. Please install via 'pip install openai'.")
Expand All @@ -114,20 +116,30 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent
top_p = options.get('top_p', 1.0)

try:
stream = await client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stream=True,
)
# Build request parameters
request_params = {
'model': model,
'messages': messages,
'stream': True,
}

# Check if model supports sampling parameters
if sampling_enabled:
request_params['temperature'] = temperature
request_params['top_p'] = top_p
request_params['max_tokens'] = max_tokens
else:
request_params['max_completion_tokens'] = max_tokens

stream = await client.chat.completions.create(**request_params)

async for chunk in stream:
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
assistant_message += token
print(token, end="", flush=True)
# Print each token followed by newline so Vim's out_cb receives it immediately
# VimScript will need to handle concatenating tokens on the same line
print(token, flush=True)

print("<EOT>", flush=True)

Expand All @@ -139,7 +151,7 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent
messages.append({"role": "assistant", "content": assistant_message.strip()})


async def main(provider, endpoint, model, options, systemprompt, timeout, credentialname):
async def main(provider, endpoint, model, options, sampling_enabled, systemprompt, timeout, credentialname):
conversation_history = []
log.debug("endpoint: " + str(endpoint))

Expand Down Expand Up @@ -169,7 +181,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
)
else:
task = asyncio.create_task(
stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname)
stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname)
)
await task
else:
Expand All @@ -189,7 +201,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
)
else:
task = asyncio.create_task(
stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname)
stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname)
)
await task

Expand All @@ -213,6 +225,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
help="Base endpoint URL.")
parser.add_argument("-o", "--options", type=str, default=DEFAULT_OPTIONS,
help="Ollama REST API options.")
parser.add_argument("-se", "--sampling-enabled", type=int, default=1, help="Enable or disable sampling.")
parser.add_argument("-s", "--system-prompt", type=str, default="", help="Specify system prompt.")
parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT, help="Timeout in seconds.")
parser.add_argument("-l", "--log-level", type=int, default=OllamaLogger.ERROR, help="Log level.")
Expand Down Expand Up @@ -243,7 +256,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
try:
while True:
try:
asyncio.run(main(args.provider, endpoint, model, options, args.system_prompt, args.timeout, args.keyname))
asyncio.run(main(args.provider, endpoint, model, options, args.sampling_enabled, args.system_prompt, args.timeout, args.keyname))
except KeyboardInterrupt:
print("Canceled.")
break
Expand Down
Loading