feat(v2/ollama): implement native Complete() with tools, vision, thinking
Non-streaming /api/chat support including: - Vision via images: []base64 - Tool calls on assistant + tool-role response messages - think field accepting string reasoning levels (or "true"/"false") - Authorization header when apiKey is non-empty (cloud mode) Tool-call arguments are passed as JSON objects to the wire and surfaced as JSON-string Arguments on provider.ToolCall. Tool calls are assigned synthetic IDs (tc_<index>) when Ollama omits one, so the round-trip back as an assistant tool_calls + tool-role message remains correlated. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+208
-5
@@ -5,10 +5,14 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
|
||||
)
|
||||
@@ -119,15 +123,214 @@ func encodeThink(reasoning string) json.RawMessage {
|
||||
}
|
||||
}
|
||||
|
||||
var errNotImplemented = errors.New("ollama native provider: not implemented")
|
||||
|
||||
// Complete performs a non-streaming chat completion via /api/chat.
|
||||
func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) {
|
||||
return provider.Response{}, errNotImplemented
|
||||
body, err := p.buildChatRequest(req, false)
|
||||
if err != nil {
|
||||
return provider.Response{}, err
|
||||
}
|
||||
|
||||
httpResp, err := p.doChatRequest(ctx, body)
|
||||
if err != nil {
|
||||
return provider.Response{}, err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
return provider.Response{}, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b))
|
||||
}
|
||||
|
||||
var chat nativeChatResponse
|
||||
if err := json.NewDecoder(httpResp.Body).Decode(&chat); err != nil {
|
||||
return provider.Response{}, fmt.Errorf("ollama: decode response: %w", err)
|
||||
}
|
||||
|
||||
resp := provider.Response{
|
||||
Text: chat.Message.Content,
|
||||
Thinking: chat.Message.Thinking,
|
||||
}
|
||||
for i, tc := range chat.Message.ToolCalls {
|
||||
resp.ToolCalls = append(resp.ToolCalls, provider.ToolCall{
|
||||
ID: toolCallID(tc, i),
|
||||
Name: tc.Function.Name,
|
||||
Arguments: rawMessageToArgString(tc.Function.Arguments),
|
||||
})
|
||||
}
|
||||
if chat.PromptEvalCount > 0 || chat.EvalCount > 0 {
|
||||
resp.Usage = &provider.Usage{
|
||||
InputTokens: chat.PromptEvalCount,
|
||||
OutputTokens: chat.EvalCount,
|
||||
TotalTokens: chat.PromptEvalCount + chat.EvalCount,
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Stream performs a streaming chat completion via /api/chat with
|
||||
// `stream: true`, parsing NDJSON line-by-line.
|
||||
func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error {
|
||||
return errNotImplemented
|
||||
return fmt.Errorf("ollama native provider: Stream not implemented")
|
||||
}
|
||||
|
||||
// buildChatRequest converts a provider.Request into the native wire body
|
||||
// JSON. stream toggles the stream flag (true for /api/chat streaming).
|
||||
func (p *Provider) buildChatRequest(req provider.Request, stream bool) ([]byte, error) {
|
||||
wire := nativeChatRequest{
|
||||
Model: req.Model,
|
||||
Stream: stream,
|
||||
Think: encodeThink(req.Reasoning),
|
||||
}
|
||||
|
||||
for _, msg := range req.Messages {
|
||||
m, err := convertMessage(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wire.Messages = append(wire.Messages, m)
|
||||
}
|
||||
|
||||
for _, t := range req.Tools {
|
||||
wire.Tools = append(wire.Tools, nativeToolDef{
|
||||
Type: "function",
|
||||
Function: nativeFunctionDef{
|
||||
Name: t.Name,
|
||||
Description: t.Description,
|
||||
Parameters: t.Schema,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if req.Temperature != nil || req.MaxTokens != nil || req.TopP != nil || len(req.Stop) > 0 {
|
||||
wire.Options = map[string]any{}
|
||||
if req.Temperature != nil {
|
||||
wire.Options["temperature"] = *req.Temperature
|
||||
}
|
||||
if req.TopP != nil {
|
||||
wire.Options["top_p"] = *req.TopP
|
||||
}
|
||||
if req.MaxTokens != nil {
|
||||
wire.Options["num_predict"] = *req.MaxTokens
|
||||
}
|
||||
if len(req.Stop) > 0 {
|
||||
wire.Options["stop"] = req.Stop
|
||||
}
|
||||
}
|
||||
|
||||
return json.Marshal(wire)
|
||||
}
|
||||
|
||||
// doChatRequest POSTs the wire body to /api/chat and returns the raw HTTP
|
||||
// response. The caller is responsible for closing the response body.
|
||||
func (p *Provider) doChatRequest(ctx context.Context, body []byte) (*http.Response, error) {
|
||||
url := strings.TrimRight(p.baseURL, "/") + "/api/chat"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: build request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if p.apiKey != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+p.apiKey)
|
||||
}
|
||||
resp, err := p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: HTTP request: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// convertMessage maps a provider.Message into a native wire message.
|
||||
func convertMessage(msg provider.Message) (nativeChatMessage, error) {
|
||||
out := nativeChatMessage{
|
||||
Role: msg.Role,
|
||||
Content: msg.Content,
|
||||
ToolCallID: msg.ToolCallID,
|
||||
}
|
||||
|
||||
for _, img := range msg.Images {
|
||||
b64, err := imageToBase64(img)
|
||||
if err != nil {
|
||||
return nativeChatMessage{}, err
|
||||
}
|
||||
if b64 != "" {
|
||||
out.Images = append(out.Images, b64)
|
||||
}
|
||||
}
|
||||
|
||||
for i, tc := range msg.ToolCalls {
|
||||
raw := json.RawMessage(strings.TrimSpace(tc.Arguments))
|
||||
if len(raw) == 0 {
|
||||
raw = json.RawMessage(`{}`)
|
||||
}
|
||||
// Preserve a stable index so streaming peers can correlate deltas.
|
||||
idx := i
|
||||
out.ToolCalls = append(out.ToolCalls, nativeToolCall{
|
||||
ID: tc.ID,
|
||||
Function: nativeFunctionCall{
|
||||
Index: &idx,
|
||||
Name: tc.Name,
|
||||
Arguments: raw,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// imageToBase64 returns the base64-encoded payload of an image, fetching
|
||||
// URL-only images over HTTP if no inline base64 is supplied.
|
||||
func imageToBase64(img provider.Image) (string, error) {
|
||||
if img.Base64 != "" {
|
||||
return img.Base64, nil
|
||||
}
|
||||
if img.URL == "" {
|
||||
return "", nil
|
||||
}
|
||||
resp, err := http.Get(img.URL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ollama: fetch image %q: %w", img.URL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return "", fmt.Errorf("ollama: fetch image %q: HTTP %d", img.URL, resp.StatusCode)
|
||||
}
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ollama: read image %q: %w", img.URL, err)
|
||||
}
|
||||
return base64.StdEncoding.EncodeToString(data), nil
|
||||
}
|
||||
|
||||
// rawMessageToArgString converts a JSON-encoded arguments value into the
|
||||
// string form the provider package uses for ToolCall.Arguments. Object/array
|
||||
// values pass through verbatim; bare string values (some Ollama builds emit
|
||||
// pre-stringified arguments) are unwrapped.
|
||||
func rawMessageToArgString(raw json.RawMessage) string {
|
||||
if len(raw) == 0 {
|
||||
return "{}"
|
||||
}
|
||||
trimmed := strings.TrimSpace(string(raw))
|
||||
if len(trimmed) == 0 {
|
||||
return "{}"
|
||||
}
|
||||
if trimmed[0] == '"' {
|
||||
var s string
|
||||
if err := json.Unmarshal([]byte(trimmed), &s); err == nil {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return trimmed
|
||||
}
|
||||
|
||||
// toolCallID returns a stable identifier for a tool call. Ollama's native
|
||||
// API typically does not include an id, so we synthesize one from the index
|
||||
// when missing.
|
||||
func toolCallID(tc nativeToolCall, index int) string {
|
||||
if tc.ID != "" {
|
||||
return tc.ID
|
||||
}
|
||||
if tc.Function.Index != nil {
|
||||
return fmt.Sprintf("tc_%d", *tc.Function.Index)
|
||||
}
|
||||
return fmt.Sprintf("tc_%d", index)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user