Files
go-llm/v2/ollama/native.go
T
steve 583f8724b2 feat(v2/ollama): implement native Complete() with tools, vision, thinking
Non-streaming /api/chat support including:
- Vision via images: []base64
- Tool calls on assistant + tool-role response messages
- think field accepting string reasoning levels (or "true"/"false")
- Authorization header when apiKey is non-empty (cloud mode)

Tool-call arguments are passed as JSON objects to the wire and surfaced
as JSON-string Arguments on provider.ToolCall. Tool calls are assigned
synthetic IDs (tc_<index>) when Ollama omits one, so the round-trip
back as an assistant tool_calls + tool-role message remains correlated.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-01 18:24:02 +00:00

337 lines
10 KiB
Go

// Package ollama implements the go-llm v2 provider interface for Ollama,
// targeting Ollama's native /api/chat endpoint. Supports both local Ollama
// instances (no API key) and Ollama Cloud (https://ollama.com, requires an
// API key).
package ollama
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
)
// DefaultLocalBaseURL is the default base URL for a locally-running Ollama
// instance.
const DefaultLocalBaseURL = "http://localhost:11434"
// DefaultCloudBaseURL is the default base URL for Ollama Cloud.
const DefaultCloudBaseURL = "https://ollama.com"
// Provider implements provider.Provider over Ollama's native /api/chat
// endpoint. An empty apiKey means local-mode (no Authorization header sent);
// a non-empty apiKey is sent as a Bearer token (cloud-mode).
type Provider struct {
apiKey string
baseURL string
client *http.Client
}
// newNative constructs a native Ollama provider. Callers should use the
// package-level New() constructor or the v2 llm.Ollama() / llm.OllamaCloud()
// helpers.
func newNative(apiKey, baseURL string) *Provider {
return &Provider{
apiKey: apiKey,
baseURL: baseURL,
client: &http.Client{},
}
}
// nativeChatRequest is the JSON body POSTed to /api/chat.
type nativeChatRequest struct {
Model string `json:"model"`
Messages []nativeChatMessage `json:"messages"`
Tools []nativeToolDef `json:"tools,omitempty"`
Stream bool `json:"stream"`
// Think is polymorphic — Ollama accepts true/false or "low"/"medium"/"high".
Think json.RawMessage `json:"think,omitempty"`
Options map[string]any `json:"options,omitempty"`
}
// nativeChatMessage is one entry in the messages array on the wire. It also
// carries assistant tool calls and tool-role responses.
type nativeChatMessage struct {
Role string `json:"role"`
Content string `json:"content,omitempty"`
Images []string `json:"images,omitempty"`
ToolCalls []nativeToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
Thinking string `json:"thinking,omitempty"`
}
// nativeToolCall mirrors Ollama's tool-call wire shape: a function with name
// and JSON-encoded arguments. Ollama's spec doesn't require an id, but some
// builds and some streaming chunks include one — we accept it on both wire and
// internal sides.
type nativeToolCall struct {
ID string `json:"id,omitempty"`
Function nativeFunctionCall `json:"function"`
}
type nativeFunctionCall struct {
Index *int `json:"index,omitempty"`
Name string `json:"name,omitempty"`
Arguments json.RawMessage `json:"arguments,omitempty"`
}
// nativeChatResponse is the JSON body returned from a non-streaming /api/chat
// call (and is also the per-line shape during streaming).
type nativeChatResponse struct {
Model string `json:"model,omitempty"`
Message nativeChatMessage `json:"message"`
Done bool `json:"done"`
DoneReason string `json:"done_reason,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
TotalDuration int64 `json:"total_duration,omitempty"`
}
// nativeToolDef is the wire shape of a tool definition sent to Ollama.
type nativeToolDef struct {
Type string `json:"type"`
Function nativeFunctionDef `json:"function"`
}
type nativeFunctionDef struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]any `json:"parameters,omitempty"`
}
// encodeThink converts a go-llm Reasoning string ("", "low", "medium",
// "high", or the literal strings "true"/"false") into Ollama's polymorphic
// `think` field. Returns nil for the empty string so the field is omitted.
func encodeThink(reasoning string) json.RawMessage {
switch reasoning {
case "":
return nil
case "true":
return json.RawMessage(`true`)
case "false":
return json.RawMessage(`false`)
default:
// "low" / "medium" / "high" — encode as a JSON string.
b, _ := json.Marshal(reasoning)
return b
}
}
// Complete performs a non-streaming chat completion via /api/chat.
func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) {
body, err := p.buildChatRequest(req, false)
if err != nil {
return provider.Response{}, err
}
httpResp, err := p.doChatRequest(ctx, body)
if err != nil {
return provider.Response{}, err
}
defer httpResp.Body.Close()
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
b, _ := io.ReadAll(httpResp.Body)
return provider.Response{}, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b))
}
var chat nativeChatResponse
if err := json.NewDecoder(httpResp.Body).Decode(&chat); err != nil {
return provider.Response{}, fmt.Errorf("ollama: decode response: %w", err)
}
resp := provider.Response{
Text: chat.Message.Content,
Thinking: chat.Message.Thinking,
}
for i, tc := range chat.Message.ToolCalls {
resp.ToolCalls = append(resp.ToolCalls, provider.ToolCall{
ID: toolCallID(tc, i),
Name: tc.Function.Name,
Arguments: rawMessageToArgString(tc.Function.Arguments),
})
}
if chat.PromptEvalCount > 0 || chat.EvalCount > 0 {
resp.Usage = &provider.Usage{
InputTokens: chat.PromptEvalCount,
OutputTokens: chat.EvalCount,
TotalTokens: chat.PromptEvalCount + chat.EvalCount,
}
}
return resp, nil
}
// Stream performs a streaming chat completion via /api/chat with
// `stream: true`, parsing NDJSON line-by-line.
func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error {
return fmt.Errorf("ollama native provider: Stream not implemented")
}
// buildChatRequest converts a provider.Request into the native wire body
// JSON. stream toggles the stream flag (true for /api/chat streaming).
func (p *Provider) buildChatRequest(req provider.Request, stream bool) ([]byte, error) {
wire := nativeChatRequest{
Model: req.Model,
Stream: stream,
Think: encodeThink(req.Reasoning),
}
for _, msg := range req.Messages {
m, err := convertMessage(msg)
if err != nil {
return nil, err
}
wire.Messages = append(wire.Messages, m)
}
for _, t := range req.Tools {
wire.Tools = append(wire.Tools, nativeToolDef{
Type: "function",
Function: nativeFunctionDef{
Name: t.Name,
Description: t.Description,
Parameters: t.Schema,
},
})
}
if req.Temperature != nil || req.MaxTokens != nil || req.TopP != nil || len(req.Stop) > 0 {
wire.Options = map[string]any{}
if req.Temperature != nil {
wire.Options["temperature"] = *req.Temperature
}
if req.TopP != nil {
wire.Options["top_p"] = *req.TopP
}
if req.MaxTokens != nil {
wire.Options["num_predict"] = *req.MaxTokens
}
if len(req.Stop) > 0 {
wire.Options["stop"] = req.Stop
}
}
return json.Marshal(wire)
}
// doChatRequest POSTs the wire body to /api/chat and returns the raw HTTP
// response. The caller is responsible for closing the response body.
func (p *Provider) doChatRequest(ctx context.Context, body []byte) (*http.Response, error) {
url := strings.TrimRight(p.baseURL, "/") + "/api/chat"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("ollama: build request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
if p.apiKey != "" {
httpReq.Header.Set("Authorization", "Bearer "+p.apiKey)
}
resp, err := p.client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("ollama: HTTP request: %w", err)
}
return resp, nil
}
// convertMessage maps a provider.Message into a native wire message.
func convertMessage(msg provider.Message) (nativeChatMessage, error) {
out := nativeChatMessage{
Role: msg.Role,
Content: msg.Content,
ToolCallID: msg.ToolCallID,
}
for _, img := range msg.Images {
b64, err := imageToBase64(img)
if err != nil {
return nativeChatMessage{}, err
}
if b64 != "" {
out.Images = append(out.Images, b64)
}
}
for i, tc := range msg.ToolCalls {
raw := json.RawMessage(strings.TrimSpace(tc.Arguments))
if len(raw) == 0 {
raw = json.RawMessage(`{}`)
}
// Preserve a stable index so streaming peers can correlate deltas.
idx := i
out.ToolCalls = append(out.ToolCalls, nativeToolCall{
ID: tc.ID,
Function: nativeFunctionCall{
Index: &idx,
Name: tc.Name,
Arguments: raw,
},
})
}
return out, nil
}
// imageToBase64 returns the base64-encoded payload of an image, fetching
// URL-only images over HTTP if no inline base64 is supplied.
func imageToBase64(img provider.Image) (string, error) {
if img.Base64 != "" {
return img.Base64, nil
}
if img.URL == "" {
return "", nil
}
resp, err := http.Get(img.URL)
if err != nil {
return "", fmt.Errorf("ollama: fetch image %q: %w", img.URL, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("ollama: fetch image %q: HTTP %d", img.URL, resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("ollama: read image %q: %w", img.URL, err)
}
return base64.StdEncoding.EncodeToString(data), nil
}
// rawMessageToArgString converts a JSON-encoded arguments value into the
// string form the provider package uses for ToolCall.Arguments. Object/array
// values pass through verbatim; bare string values (some Ollama builds emit
// pre-stringified arguments) are unwrapped.
func rawMessageToArgString(raw json.RawMessage) string {
if len(raw) == 0 {
return "{}"
}
trimmed := strings.TrimSpace(string(raw))
if len(trimmed) == 0 {
return "{}"
}
if trimmed[0] == '"' {
var s string
if err := json.Unmarshal([]byte(trimmed), &s); err == nil {
return s
}
}
return trimmed
}
// toolCallID returns a stable identifier for a tool call. Ollama's native
// API typically does not include an id, so we synthesize one from the index
// when missing.
func toolCallID(tc nativeToolCall, index int) string {
if tc.ID != "" {
return tc.ID
}
if tc.Function.Index != nil {
return fmt.Sprintf("tc_%d", *tc.Function.Index)
}
return fmt.Sprintf("tc_%d", index)
}