// Package ollama implements the go-llm v2 provider interface for Ollama, // targeting Ollama's native /api/chat endpoint. Supports both local Ollama // instances (no API key) and Ollama Cloud (https://ollama.com, requires an // API key). package ollama import ( "bufio" "bytes" "context" "encoding/base64" "encoding/json" "fmt" "io" "net/http" "strings" "gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider" ) // DefaultLocalBaseURL is the default base URL for a locally-running Ollama // instance. const DefaultLocalBaseURL = "http://localhost:11434" // DefaultCloudBaseURL is the default base URL for Ollama Cloud. const DefaultCloudBaseURL = "https://ollama.com" // Provider implements provider.Provider over Ollama's native /api/chat // endpoint. An empty apiKey means local-mode (no Authorization header sent); // a non-empty apiKey is sent as a Bearer token (cloud-mode). type Provider struct { apiKey string baseURL string client *http.Client } // newNative constructs a native Ollama provider. Callers should use the // package-level New() constructor or the v2 llm.Ollama() / llm.OllamaCloud() // helpers. func newNative(apiKey, baseURL string) *Provider { return &Provider{ apiKey: apiKey, baseURL: baseURL, client: &http.Client{}, } } // nativeChatRequest is the JSON body POSTed to /api/chat. type nativeChatRequest struct { Model string `json:"model"` Messages []nativeChatMessage `json:"messages"` Tools []nativeToolDef `json:"tools,omitempty"` Stream bool `json:"stream"` // Think is polymorphic — Ollama accepts true/false or "low"/"medium"/"high". Think json.RawMessage `json:"think,omitempty"` Options map[string]any `json:"options,omitempty"` } // nativeChatMessage is one entry in the messages array on the wire. It also // carries assistant tool calls and tool-role responses. type nativeChatMessage struct { Role string `json:"role"` Content string `json:"content,omitempty"` Images []string `json:"images,omitempty"` ToolCalls []nativeToolCall `json:"tool_calls,omitempty"` ToolCallID string `json:"tool_call_id,omitempty"` Thinking string `json:"thinking,omitempty"` } // nativeToolCall mirrors Ollama's tool-call wire shape: a function with name // and JSON-encoded arguments. Ollama's spec doesn't require an id, but some // builds and some streaming chunks include one — we accept it on both wire and // internal sides. type nativeToolCall struct { ID string `json:"id,omitempty"` Function nativeFunctionCall `json:"function"` } type nativeFunctionCall struct { Index *int `json:"index,omitempty"` Name string `json:"name,omitempty"` Arguments json.RawMessage `json:"arguments,omitempty"` } // nativeChatResponse is the JSON body returned from a non-streaming /api/chat // call (and is also the per-line shape during streaming). type nativeChatResponse struct { Model string `json:"model,omitempty"` Message nativeChatMessage `json:"message"` Done bool `json:"done"` DoneReason string `json:"done_reason,omitempty"` PromptEvalCount int `json:"prompt_eval_count,omitempty"` EvalCount int `json:"eval_count,omitempty"` TotalDuration int64 `json:"total_duration,omitempty"` } // nativeToolDef is the wire shape of a tool definition sent to Ollama. type nativeToolDef struct { Type string `json:"type"` Function nativeFunctionDef `json:"function"` } type nativeFunctionDef struct { Name string `json:"name"` Description string `json:"description,omitempty"` Parameters map[string]any `json:"parameters,omitempty"` } // encodeThink converts a go-llm Reasoning string ("", "low", "medium", // "high", or the literal strings "true"/"false") into Ollama's polymorphic // `think` field. Returns nil for the empty string so the field is omitted. func encodeThink(reasoning string) json.RawMessage { switch reasoning { case "": return nil case "true": return json.RawMessage(`true`) case "false": return json.RawMessage(`false`) default: // "low" / "medium" / "high" — encode as a JSON string. b, _ := json.Marshal(reasoning) return b } } // Complete performs a non-streaming chat completion via /api/chat. func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) { body, err := p.buildChatRequest(req, false) if err != nil { return provider.Response{}, err } httpResp, err := p.doChatRequest(ctx, body) if err != nil { return provider.Response{}, err } defer httpResp.Body.Close() if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) return provider.Response{}, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b)) } var chat nativeChatResponse if err := json.NewDecoder(httpResp.Body).Decode(&chat); err != nil { return provider.Response{}, fmt.Errorf("ollama: decode response: %w", err) } resp := provider.Response{ Text: chat.Message.Content, Thinking: chat.Message.Thinking, } for i, tc := range chat.Message.ToolCalls { resp.ToolCalls = append(resp.ToolCalls, provider.ToolCall{ ID: toolCallID(tc, i), Name: tc.Function.Name, Arguments: rawMessageToArgString(tc.Function.Arguments), }) } if chat.PromptEvalCount > 0 || chat.EvalCount > 0 { resp.Usage = &provider.Usage{ InputTokens: chat.PromptEvalCount, OutputTokens: chat.EvalCount, TotalTokens: chat.PromptEvalCount + chat.EvalCount, } } return resp, nil } // Stream performs a streaming chat completion via /api/chat with // `stream: true`, parsing NDJSON line-by-line. Tool-call argument deltas are // accumulated across chunks keyed by id (or function index) and finalized // when the upstream Done flag arrives. func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error { defer close(events) body, err := p.buildChatRequest(req, true) if err != nil { return err } httpResp, err := p.doChatRequest(ctx, body) if err != nil { return err } defer httpResp.Body.Close() if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) return fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b)) } scanner := bufio.NewScanner(httpResp.Body) // Ollama can emit multi-KB lines on tool-call deltas. Generous buffer. const maxLineSize = 4 * 1024 * 1024 scanner.Buffer(make([]byte, 0, 64*1024), maxLineSize) type toolAcc struct { id string name string args strings.Builder index int // ToolIndex emitted on stream events } tools := map[string]*toolAcc{} var toolOrder []*toolAcc var ( fullText strings.Builder fullThinking strings.Builder usage *provider.Usage streamErr error ) for scanner.Scan() { line := scanner.Bytes() if len(bytes.TrimSpace(line)) == 0 { continue } var chunk nativeChatResponse if err := json.Unmarshal(line, &chunk); err != nil { streamErr = fmt.Errorf("ollama: decode stream chunk: %w", err) break } if chunk.Message.Thinking != "" { fullThinking.WriteString(chunk.Message.Thinking) events <- provider.StreamEvent{ Type: provider.StreamEventThinking, Text: chunk.Message.Thinking, } } if chunk.Message.Content != "" { fullText.WriteString(chunk.Message.Content) events <- provider.StreamEvent{ Type: provider.StreamEventText, Text: chunk.Message.Content, } } for pos, tc := range chunk.Message.ToolCalls { key := streamToolKey(tc, pos) acc, exists := tools[key] if !exists { acc = &toolAcc{ id: tc.ID, name: tc.Function.Name, index: len(toolOrder), } if acc.id == "" { acc.id = fmt.Sprintf("tc_%d", acc.index) } tools[key] = acc toolOrder = append(toolOrder, acc) events <- provider.StreamEvent{ Type: provider.StreamEventToolStart, ToolIndex: acc.index, ToolCall: &provider.ToolCall{ ID: acc.id, Name: acc.name, }, } } else { // Continuation chunk may carry the tool's name late; capture it. if tc.Function.Name != "" && acc.name == "" { acc.name = tc.Function.Name } } delta := decodeArgumentDelta(tc.Function.Arguments) if delta != "" { acc.args.WriteString(delta) events <- provider.StreamEvent{ Type: provider.StreamEventToolDelta, ToolIndex: acc.index, ToolCall: &provider.ToolCall{ Arguments: delta, }, } } } if chunk.Done { if chunk.PromptEvalCount > 0 || chunk.EvalCount > 0 { usage = &provider.Usage{ InputTokens: chunk.PromptEvalCount, OutputTokens: chunk.EvalCount, TotalTokens: chunk.PromptEvalCount + chunk.EvalCount, } } break } } if err := scanner.Err(); err != nil && streamErr == nil { streamErr = fmt.Errorf("ollama: stream read: %w", err) } if streamErr != nil { events <- provider.StreamEvent{ Type: provider.StreamEventError, Error: streamErr, } return streamErr } // Finalize accumulated tool calls. finalCalls := make([]provider.ToolCall, 0, len(toolOrder)) for _, acc := range toolOrder { args := acc.args.String() if args == "" { args = "{}" } final := provider.ToolCall{ ID: acc.id, Name: acc.name, Arguments: args, } finalCalls = append(finalCalls, final) events <- provider.StreamEvent{ Type: provider.StreamEventToolEnd, ToolIndex: acc.index, ToolCall: &final, } } events <- provider.StreamEvent{ Type: provider.StreamEventDone, Response: &provider.Response{ Text: fullText.String(), Thinking: fullThinking.String(), ToolCalls: finalCalls, Usage: usage, }, } return nil } // streamToolKey computes a stable map key correlating tool-call deltas // across stream chunks. Prefer the wire id, fall back to function index, // finally fall back to the tool's position in the chunk's tool_calls array // (a single-tool stream collapses cleanly under any strategy). func streamToolKey(tc nativeToolCall, position int) string { if tc.ID != "" { return "id:" + tc.ID } if tc.Function.Index != nil { return fmt.Sprintf("idx:%d", *tc.Function.Index) } return fmt.Sprintf("pos:%d", position) } // decodeArgumentDelta returns the string fragment to append when a streamed // tool-call chunk includes arguments. Ollama may emit arguments either as a // JSON-encoded string fragment (chunk-by-chunk concatenation, openaicompat // style) or as a complete object value (one-shot delivery). We accept both: // strings are unwrapped, objects/arrays pass through verbatim. func decodeArgumentDelta(raw json.RawMessage) string { if len(raw) == 0 { return "" } trimmed := bytes.TrimSpace(raw) if len(trimmed) == 0 || string(trimmed) == "null" { return "" } if trimmed[0] == '"' { var s string if err := json.Unmarshal(trimmed, &s); err == nil { return s } } return string(trimmed) } // buildChatRequest converts a provider.Request into the native wire body // JSON. stream toggles the stream flag (true for /api/chat streaming). func (p *Provider) buildChatRequest(req provider.Request, stream bool) ([]byte, error) { wire := nativeChatRequest{ Model: req.Model, Stream: stream, Think: encodeThink(req.Reasoning), } for _, msg := range req.Messages { m, err := convertMessage(msg) if err != nil { return nil, err } wire.Messages = append(wire.Messages, m) } for _, t := range req.Tools { wire.Tools = append(wire.Tools, nativeToolDef{ Type: "function", Function: nativeFunctionDef{ Name: t.Name, Description: t.Description, Parameters: t.Schema, }, }) } if req.Temperature != nil || req.MaxTokens != nil || req.TopP != nil || len(req.Stop) > 0 { wire.Options = map[string]any{} if req.Temperature != nil { wire.Options["temperature"] = *req.Temperature } if req.TopP != nil { wire.Options["top_p"] = *req.TopP } if req.MaxTokens != nil { wire.Options["num_predict"] = *req.MaxTokens } if len(req.Stop) > 0 { wire.Options["stop"] = req.Stop } } return json.Marshal(wire) } // doChatRequest POSTs the wire body to /api/chat and returns the raw HTTP // response. The caller is responsible for closing the response body. func (p *Provider) doChatRequest(ctx context.Context, body []byte) (*http.Response, error) { url := strings.TrimRight(p.baseURL, "/") + "/api/chat" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("ollama: build request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") if p.apiKey != "" { httpReq.Header.Set("Authorization", "Bearer "+p.apiKey) } resp, err := p.client.Do(httpReq) if err != nil { return nil, fmt.Errorf("ollama: HTTP request: %w", err) } return resp, nil } // convertMessage maps a provider.Message into a native wire message. func convertMessage(msg provider.Message) (nativeChatMessage, error) { out := nativeChatMessage{ Role: msg.Role, Content: msg.Content, ToolCallID: msg.ToolCallID, } for _, img := range msg.Images { b64, err := imageToBase64(img) if err != nil { return nativeChatMessage{}, err } if b64 != "" { out.Images = append(out.Images, b64) } } for i, tc := range msg.ToolCalls { raw := json.RawMessage(strings.TrimSpace(tc.Arguments)) if len(raw) == 0 { raw = json.RawMessage(`{}`) } // Preserve a stable index so streaming peers can correlate deltas. idx := i out.ToolCalls = append(out.ToolCalls, nativeToolCall{ ID: tc.ID, Function: nativeFunctionCall{ Index: &idx, Name: tc.Name, Arguments: raw, }, }) } return out, nil } // imageToBase64 returns the base64-encoded payload of an image, fetching // URL-only images over HTTP if no inline base64 is supplied. func imageToBase64(img provider.Image) (string, error) { if img.Base64 != "" { return img.Base64, nil } if img.URL == "" { return "", nil } resp, err := http.Get(img.URL) if err != nil { return "", fmt.Errorf("ollama: fetch image %q: %w", img.URL, err) } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", fmt.Errorf("ollama: fetch image %q: HTTP %d", img.URL, resp.StatusCode) } data, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("ollama: read image %q: %w", img.URL, err) } return base64.StdEncoding.EncodeToString(data), nil } // rawMessageToArgString converts a JSON-encoded arguments value into the // string form the provider package uses for ToolCall.Arguments. Object/array // values pass through verbatim; bare string values (some Ollama builds emit // pre-stringified arguments) are unwrapped. func rawMessageToArgString(raw json.RawMessage) string { if len(raw) == 0 { return "{}" } trimmed := strings.TrimSpace(string(raw)) if len(trimmed) == 0 { return "{}" } if trimmed[0] == '"' { var s string if err := json.Unmarshal([]byte(trimmed), &s); err == nil { return s } } return trimmed } // toolCallID returns a stable identifier for a tool call. Ollama's native // API typically does not include an id, so we synthesize one from the index // when missing. func toolCallID(tc nativeToolCall, index int) string { if tc.ID != "" { return tc.ID } if tc.Function.Index != nil { return fmt.Sprintf("tc_%d", *tc.Function.Index) } return fmt.Sprintf("tc_%d", index) }