feat(v2/ollama): implement native Complete() with tools, vision, thinking

Non-streaming /api/chat support including: - Vision via images: []base64 - Tool calls on assistant + tool-role response messages - think field accepting string reasoning levels (or "true"/"false") - Authorization header when apiKey is non-empty (cloud mode) Tool-call arguments are passed as JSON objects to the wire and surfaced as JSON-string Arguments on provider.ToolCall. Tool calls are assigned synthetic IDs (tc_<index>) when Ollama omits one, so the round-trip back as an assistant tool_calls + tool-role message remains correlated. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-01 18:24:02 +00:00
parent 0e358148eb
commit 583f8724b2
2 changed files with 555 additions and 5 deletions
@@ -5,10 +5,14 @@
 package ollama

 import (
+	"bytes"
 	"context"
+	"encoding/base64"
 	"encoding/json"
-	"errors"
+	"fmt"
+	"io"
 	"net/http"
+	"strings"

 	"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
 )
@@ -119,15 +123,214 @@ func encodeThink(reasoning string) json.RawMessage {
 	}
 }

-var errNotImplemented = errors.New("ollama native provider: not implemented")
-
 // Complete performs a non-streaming chat completion via /api/chat.
 func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) {
-	return provider.Response{}, errNotImplemented
+	body, err := p.buildChatRequest(req, false)
+	if err != nil {
+		return provider.Response{}, err
+	}
+
+	httpResp, err := p.doChatRequest(ctx, body)
+	if err != nil {
+		return provider.Response{}, err
+	}
+	defer httpResp.Body.Close()
+
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		b, _ := io.ReadAll(httpResp.Body)
+		return provider.Response{}, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b))
+	}
+
+	var chat nativeChatResponse
+	if err := json.NewDecoder(httpResp.Body).Decode(&chat); err != nil {
+		return provider.Response{}, fmt.Errorf("ollama: decode response: %w", err)
+	}
+
+	resp := provider.Response{
+		Text:     chat.Message.Content,
+		Thinking: chat.Message.Thinking,
+	}
+	for i, tc := range chat.Message.ToolCalls {
+		resp.ToolCalls = append(resp.ToolCalls, provider.ToolCall{
+			ID:        toolCallID(tc, i),
+			Name:      tc.Function.Name,
+			Arguments: rawMessageToArgString(tc.Function.Arguments),
+		})
+	}
+	if chat.PromptEvalCount > 0 || chat.EvalCount > 0 {
+		resp.Usage = &provider.Usage{
+			InputTokens:  chat.PromptEvalCount,
+			OutputTokens: chat.EvalCount,
+			TotalTokens:  chat.PromptEvalCount + chat.EvalCount,
+		}
+	}
+	return resp, nil
 }

 // Stream performs a streaming chat completion via /api/chat with
 // `stream: true`, parsing NDJSON line-by-line.
 func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error {
-	return errNotImplemented
+	return fmt.Errorf("ollama native provider: Stream not implemented")
+}
+
+// buildChatRequest converts a provider.Request into the native wire body
+// JSON. stream toggles the stream flag (true for /api/chat streaming).
+func (p *Provider) buildChatRequest(req provider.Request, stream bool) ([]byte, error) {
+	wire := nativeChatRequest{
+		Model:  req.Model,
+		Stream: stream,
+		Think:  encodeThink(req.Reasoning),
+	}
+
+	for _, msg := range req.Messages {
+		m, err := convertMessage(msg)
+		if err != nil {
+			return nil, err
+		}
+		wire.Messages = append(wire.Messages, m)
+	}
+
+	for _, t := range req.Tools {
+		wire.Tools = append(wire.Tools, nativeToolDef{
+			Type: "function",
+			Function: nativeFunctionDef{
+				Name:        t.Name,
+				Description: t.Description,
+				Parameters:  t.Schema,
+			},
+		})
+	}
+
+	if req.Temperature != nil || req.MaxTokens != nil || req.TopP != nil || len(req.Stop) > 0 {
+		wire.Options = map[string]any{}
+		if req.Temperature != nil {
+			wire.Options["temperature"] = *req.Temperature
+		}
+		if req.TopP != nil {
+			wire.Options["top_p"] = *req.TopP
+		}
+		if req.MaxTokens != nil {
+			wire.Options["num_predict"] = *req.MaxTokens
+		}
+		if len(req.Stop) > 0 {
+			wire.Options["stop"] = req.Stop
+		}
+	}
+
+	return json.Marshal(wire)
+}
+
+// doChatRequest POSTs the wire body to /api/chat and returns the raw HTTP
+// response. The caller is responsible for closing the response body.
+func (p *Provider) doChatRequest(ctx context.Context, body []byte) (*http.Response, error) {
+	url := strings.TrimRight(p.baseURL, "/") + "/api/chat"
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("ollama: build request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if p.apiKey != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+p.apiKey)
+	}
+	resp, err := p.client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("ollama: HTTP request: %w", err)
+	}
+	return resp, nil
+}
+
+// convertMessage maps a provider.Message into a native wire message.
+func convertMessage(msg provider.Message) (nativeChatMessage, error) {
+	out := nativeChatMessage{
+		Role:       msg.Role,
+		Content:    msg.Content,
+		ToolCallID: msg.ToolCallID,
+	}
+
+	for _, img := range msg.Images {
+		b64, err := imageToBase64(img)
+		if err != nil {
+			return nativeChatMessage{}, err
+		}
+		if b64 != "" {
+			out.Images = append(out.Images, b64)
+		}
+	}
+
+	for i, tc := range msg.ToolCalls {
+		raw := json.RawMessage(strings.TrimSpace(tc.Arguments))
+		if len(raw) == 0 {
+			raw = json.RawMessage(`{}`)
+		}
+		// Preserve a stable index so streaming peers can correlate deltas.
+		idx := i
+		out.ToolCalls = append(out.ToolCalls, nativeToolCall{
+			ID: tc.ID,
+			Function: nativeFunctionCall{
+				Index:     &idx,
+				Name:      tc.Name,
+				Arguments: raw,
+			},
+		})
+	}
+
+	return out, nil
+}
+
+// imageToBase64 returns the base64-encoded payload of an image, fetching
+// URL-only images over HTTP if no inline base64 is supplied.
+func imageToBase64(img provider.Image) (string, error) {
+	if img.Base64 != "" {
+		return img.Base64, nil
+	}
+	if img.URL == "" {
+		return "", nil
+	}
+	resp, err := http.Get(img.URL)
+	if err != nil {
+		return "", fmt.Errorf("ollama: fetch image %q: %w", img.URL, err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", fmt.Errorf("ollama: fetch image %q: HTTP %d", img.URL, resp.StatusCode)
+	}
+	data, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("ollama: read image %q: %w", img.URL, err)
+	}
+	return base64.StdEncoding.EncodeToString(data), nil
+}
+
+// rawMessageToArgString converts a JSON-encoded arguments value into the
+// string form the provider package uses for ToolCall.Arguments. Object/array
+// values pass through verbatim; bare string values (some Ollama builds emit
+// pre-stringified arguments) are unwrapped.
+func rawMessageToArgString(raw json.RawMessage) string {
+	if len(raw) == 0 {
+		return "{}"
+	}
+	trimmed := strings.TrimSpace(string(raw))
+	if len(trimmed) == 0 {
+		return "{}"
+	}
+	if trimmed[0] == '"' {
+		var s string
+		if err := json.Unmarshal([]byte(trimmed), &s); err == nil {
+			return s
+		}
+	}
+	return trimmed
+}
+
+// toolCallID returns a stable identifier for a tool call. Ollama's native
+// API typically does not include an id, so we synthesize one from the index
+// when missing.
+func toolCallID(tc nativeToolCall, index int) string {
+	if tc.ID != "" {
+		return tc.ID
+	}
+	if tc.Function.Index != nil {
+		return fmt.Sprintf("tc_%d", *tc.Function.Index)
+	}
+	return fmt.Sprintf("tc_%d", index)
 }