043249e0e1
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
493 lines
16 KiB
Go
493 lines
16 KiB
Go
package ollama
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// capture spins up an httptest server that records the request and replies
|
|
// with the given handler.
|
|
type captured struct {
|
|
auth string
|
|
contentType string
|
|
path string
|
|
body map[string]any
|
|
raw []byte
|
|
}
|
|
|
|
func serve(t *testing.T, status int, respond func(w http.ResponseWriter)) (*Provider, *captured) {
|
|
t.Helper()
|
|
cap := &captured{}
|
|
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
cap.auth = r.Header.Get("Authorization")
|
|
cap.contentType = r.Header.Get("Content-Type")
|
|
cap.path = r.URL.Path
|
|
cap.raw, _ = io.ReadAll(r.Body)
|
|
_ = json.Unmarshal(cap.raw, &cap.body)
|
|
w.WriteHeader(status)
|
|
respond(w)
|
|
}))
|
|
t.Cleanup(ts.Close)
|
|
return New(WithBaseURL(ts.URL), WithToken("test-token")), cap
|
|
}
|
|
|
|
func jsonReply(obj string) func(w http.ResponseWriter) {
|
|
return func(w http.ResponseWriter) { _, _ = io.WriteString(w, obj) }
|
|
}
|
|
|
|
func basicRequest() llm.Request {
|
|
return llm.Request{Messages: []llm.Message{llm.UserText("hi")}}
|
|
}
|
|
|
|
func TestGenerateRoundTrip(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{
|
|
"model":"qwen3:30b",
|
|
"message":{"role":"assistant","content":"hello there"},
|
|
"done":true,"done_reason":"stop",
|
|
"prompt_eval_count":12,"eval_count":7
|
|
}`))
|
|
|
|
m, _ := p.Model("qwen3:30b")
|
|
temp := 0.2
|
|
resp, err := m.Generate(context.Background(), llm.Request{
|
|
System: "be terse",
|
|
Messages: []llm.Message{llm.SystemText("extra sys"), llm.UserText("hi")},
|
|
Temperature: &temp,
|
|
MaxTokens: 64,
|
|
StopSequences: []string{"END"},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
|
|
// Wire assertions.
|
|
if cap.path != "/api/chat" {
|
|
t.Errorf("path = %q", cap.path)
|
|
}
|
|
if cap.auth != "Bearer test-token" {
|
|
t.Errorf("auth = %q", cap.auth)
|
|
}
|
|
if cap.body["model"] != "qwen3:30b" {
|
|
t.Errorf("model = %v", cap.body["model"])
|
|
}
|
|
if stream, ok := cap.body["stream"].(bool); !ok || stream {
|
|
t.Errorf("stream must be explicit false, got %v", cap.body["stream"])
|
|
}
|
|
msgs := cap.body["messages"].([]any)
|
|
first := msgs[0].(map[string]any)
|
|
if first["role"] != "system" || first["content"] != "be terse\n\nextra sys" {
|
|
t.Errorf("system fold = %v", first)
|
|
}
|
|
second := msgs[1].(map[string]any)
|
|
if second["role"] != "user" || second["content"] != "hi" {
|
|
t.Errorf("user msg = %v", second)
|
|
}
|
|
opts := cap.body["options"].(map[string]any)
|
|
if opts["temperature"] != 0.2 || opts["num_predict"] != float64(64) {
|
|
t.Errorf("options = %v", opts)
|
|
}
|
|
|
|
// Response assertions.
|
|
if resp.Text() != "hello there" {
|
|
t.Errorf("text = %q", resp.Text())
|
|
}
|
|
if resp.FinishReason != llm.FinishStop {
|
|
t.Errorf("finish = %v", resp.FinishReason)
|
|
}
|
|
if resp.Usage.InputTokens != 12 || resp.Usage.OutputTokens != 7 {
|
|
t.Errorf("usage = %+v", resp.Usage)
|
|
}
|
|
if resp.Model != "ollama/qwen3:30b" {
|
|
t.Errorf("resp.Model = %q", resp.Model)
|
|
}
|
|
}
|
|
|
|
func TestImagesEncodeAsBase64(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"a cat"},"done":true,"done_reason":"stop"}`))
|
|
imgBytes := []byte{0xFF, 0xD8, 0xFF, 0xE0, 1, 2, 3}
|
|
|
|
m, _ := p.Model("llava")
|
|
_, err := m.Generate(context.Background(), llm.Request{
|
|
Messages: []llm.Message{llm.UserParts(llm.Text("describe"), llm.Image("image/jpeg", imgBytes))},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
msgs := cap.body["messages"].([]any)
|
|
user := msgs[0].(map[string]any)
|
|
images := user["images"].([]any)
|
|
if len(images) != 1 || images[0] != base64.StdEncoding.EncodeToString(imgBytes) {
|
|
t.Errorf("images = %v", images)
|
|
}
|
|
if strings.Contains(images[0].(string), "data:") {
|
|
t.Error("images must be raw base64 without data: prefix")
|
|
}
|
|
}
|
|
|
|
func TestToolsAndToolCallRoundTrip(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{
|
|
"message":{"role":"assistant","content":"","tool_calls":[
|
|
{"function":{"index":0,"name":"get_weather","arguments":{"city":"Tokyo"}}}
|
|
]},
|
|
"done":true,"done_reason":"stop"
|
|
}`))
|
|
|
|
tool := llm.Tool{
|
|
Name: "get_weather", Description: "weather",
|
|
Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}`),
|
|
}
|
|
m, _ := p.Model("qwen3")
|
|
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithTools(tool))
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
|
|
// Tools serialize with parameters as an object.
|
|
tools := cap.body["tools"].([]any)
|
|
fn := tools[0].(map[string]any)["function"].(map[string]any)
|
|
if fn["name"] != "get_weather" {
|
|
t.Errorf("tool fn = %v", fn)
|
|
}
|
|
if _, ok := fn["parameters"].(map[string]any); !ok {
|
|
t.Errorf("parameters must be an object, got %T", fn["parameters"])
|
|
}
|
|
|
|
// Tool call comes back with arguments as a JSON object → RawMessage.
|
|
if len(resp.ToolCalls) != 1 {
|
|
t.Fatalf("tool calls = %v", resp.ToolCalls)
|
|
}
|
|
tc := resp.ToolCalls[0]
|
|
if tc.Name != "get_weather" || tc.ID == "" {
|
|
t.Errorf("call = %+v (id must be synthesized)", tc)
|
|
}
|
|
var args struct {
|
|
City string `json:"city"`
|
|
}
|
|
if err := json.Unmarshal(tc.Arguments, &args); err != nil || args.City != "Tokyo" {
|
|
t.Errorf("arguments = %s (%v)", tc.Arguments, err)
|
|
}
|
|
if resp.FinishReason != llm.FinishToolCalls {
|
|
t.Errorf("finish = %v, want tool_calls", resp.FinishReason)
|
|
}
|
|
}
|
|
|
|
func TestToolResultsAndHistoryToolCalls(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"21C"},"done":true,"done_reason":"stop"}`))
|
|
|
|
m, _ := p.Model("qwen3")
|
|
_, err := m.Generate(context.Background(), llm.Request{
|
|
Messages: []llm.Message{
|
|
llm.UserText("weather?"),
|
|
{Role: llm.RoleAssistant, ToolCalls: []llm.ToolCall{
|
|
{ID: "call_0", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Tokyo"}`)},
|
|
}},
|
|
llm.ToolResultsMessage(
|
|
llm.ToolResult{ID: "call_0", Name: "get_weather", Content: `{"temp":21}`},
|
|
llm.ToolResult{ID: "call_1", Name: "broken_tool", Content: "boom", IsError: true},
|
|
),
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
msgs := cap.body["messages"].([]any)
|
|
if len(msgs) != 4 {
|
|
t.Fatalf("messages = %d, want 4 (user, assistant, 2 tool results)", len(msgs))
|
|
}
|
|
asst := msgs[1].(map[string]any)
|
|
calls := asst["tool_calls"].([]any)
|
|
args := calls[0].(map[string]any)["function"].(map[string]any)["arguments"]
|
|
if _, ok := args.(map[string]any); !ok {
|
|
t.Errorf("history tool-call arguments must be an object, got %T", args)
|
|
}
|
|
tr1 := msgs[2].(map[string]any)
|
|
if tr1["role"] != "tool" || tr1["tool_name"] != "get_weather" || tr1["content"] != `{"temp":21}` {
|
|
t.Errorf("tool result 1 = %v", tr1)
|
|
}
|
|
tr2 := msgs[3].(map[string]any)
|
|
if tr2["content"] != "ERROR: boom" {
|
|
t.Errorf("error result content = %v", tr2["content"])
|
|
}
|
|
}
|
|
|
|
func TestStructuredOutputFormat(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"{\"name\":\"Ada\"}"},"done":true,"done_reason":"stop"}`))
|
|
schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}`)
|
|
|
|
m, _ := p.Model("qwen3")
|
|
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithSchema(schema, "person"))
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
format, ok := cap.body["format"].(map[string]any)
|
|
if !ok || format["type"] != "object" {
|
|
t.Errorf("format = %v, want the schema object", cap.body["format"])
|
|
}
|
|
if resp.Text() != `{"name":"Ada"}` {
|
|
t.Errorf("text = %q", resp.Text())
|
|
}
|
|
}
|
|
|
|
func TestThinkMapping(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
|
|
m, _ := p.Model("gpt-oss:120b")
|
|
_, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("high"))
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
if cap.body["think"] != "high" {
|
|
t.Errorf("think = %v", cap.body["think"])
|
|
}
|
|
|
|
if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("max")); err == nil {
|
|
t.Error("invalid reasoning effort should error")
|
|
}
|
|
}
|
|
|
|
func TestToolChoiceNoneDropsTools(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
|
|
m, _ := p.Model("qwen3")
|
|
_, err := m.Generate(context.Background(), basicRequest(),
|
|
llm.WithTools(llm.Tool{Name: "t"}), llm.WithToolChoice("none"))
|
|
if err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
if _, present := cap.body["tools"]; present {
|
|
t.Error("tool_choice none must omit tools")
|
|
}
|
|
}
|
|
|
|
func TestStreamingNDJSON(t *testing.T) {
|
|
p, _ := serve(t, 200, func(w http.ResponseWriter) {
|
|
w.Header().Set("Content-Type", "application/x-ndjson")
|
|
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"Hel"},"done":false}
|
|
{"message":{"role":"assistant","content":"lo"},"done":false}
|
|
{"message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"ping","arguments":{}}}]},"done":false}
|
|
{"message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","prompt_eval_count":5,"eval_count":9}
|
|
`)
|
|
})
|
|
|
|
m, _ := p.Model("qwen3")
|
|
s, err := m.Stream(context.Background(), basicRequest())
|
|
if err != nil {
|
|
t.Fatalf("Stream: %v", err)
|
|
}
|
|
defer s.Close()
|
|
|
|
var text strings.Builder
|
|
var toolCalls []llm.ToolCall
|
|
var final *llm.Response
|
|
for {
|
|
ev, err := s.Next()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
t.Fatalf("Next: %v", err)
|
|
}
|
|
text.WriteString(ev.TextDelta)
|
|
if ev.ToolCall != nil {
|
|
toolCalls = append(toolCalls, *ev.ToolCall)
|
|
}
|
|
if ev.Response != nil {
|
|
final = ev.Response
|
|
}
|
|
}
|
|
if text.String() != "Hello" {
|
|
t.Errorf("text = %q", text.String())
|
|
}
|
|
if len(toolCalls) != 1 || toolCalls[0].Name != "ping" {
|
|
t.Errorf("tool calls = %+v", toolCalls)
|
|
}
|
|
if final == nil {
|
|
t.Fatal("no final response event")
|
|
}
|
|
if final.Usage.InputTokens != 5 || final.Usage.OutputTokens != 9 {
|
|
t.Errorf("final usage = %+v", final.Usage)
|
|
}
|
|
if final.FinishReason != llm.FinishToolCalls {
|
|
t.Errorf("final finish = %v", final.FinishReason)
|
|
}
|
|
if final.Text() != "Hello" {
|
|
t.Errorf("final text = %q", final.Text())
|
|
}
|
|
}
|
|
|
|
// TestStreamingForemanSingleObject: foreman returns one buffered JSON
|
|
// object to a stream:true request; the stream must still deliver the text
|
|
// and a final response.
|
|
func TestStreamingForemanSingleObject(t *testing.T) {
|
|
p, cap := serve(t, 200, func(w http.ResponseWriter) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"queued answer"},"done":true,"done_reason":"stop","prompt_eval_count":3,"eval_count":4}`)
|
|
})
|
|
|
|
m, _ := p.Model("qwen3:30b")
|
|
s, err := m.Stream(context.Background(), basicRequest())
|
|
if err != nil {
|
|
t.Fatalf("Stream: %v", err)
|
|
}
|
|
defer s.Close()
|
|
|
|
if stream, ok := cap.body["stream"].(bool); !ok || !stream {
|
|
t.Errorf("stream flag = %v, want true", cap.body["stream"])
|
|
}
|
|
|
|
var text strings.Builder
|
|
var final *llm.Response
|
|
for {
|
|
ev, err := s.Next()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
t.Fatalf("Next: %v", err)
|
|
}
|
|
text.WriteString(ev.TextDelta)
|
|
if ev.Response != nil {
|
|
final = ev.Response
|
|
}
|
|
}
|
|
if text.String() != "queued answer" || final == nil || final.Usage.OutputTokens != 4 {
|
|
t.Errorf("text=%q final=%+v", text.String(), final)
|
|
}
|
|
}
|
|
|
|
func TestErrorMapping(t *testing.T) {
|
|
t.Run("404 is model-not-found", func(t *testing.T) {
|
|
p, _ := serve(t, 404, jsonReply(`{"error":"model not found"}`))
|
|
m, _ := p.Model("nope")
|
|
_, err := m.Generate(context.Background(), basicRequest())
|
|
if !errors.Is(err, llm.ErrModelNotFound) {
|
|
t.Errorf("error = %v, want ErrModelNotFound", err)
|
|
}
|
|
if llm.Classify(err) != llm.ClassPermanent {
|
|
t.Error("404 must classify permanent")
|
|
}
|
|
})
|
|
|
|
t.Run("503 transient with message", func(t *testing.T) {
|
|
p, _ := serve(t, 503, jsonReply(`{"error":"request cancelled while waiting"}`))
|
|
m, _ := p.Model("qwen3")
|
|
_, err := m.Generate(context.Background(), basicRequest())
|
|
var apiErr *llm.APIError
|
|
if !errors.As(err, &apiErr) || apiErr.Status != 503 || !strings.Contains(apiErr.Message, "cancelled") {
|
|
t.Errorf("error = %v", err)
|
|
}
|
|
if llm.Classify(err) != llm.ClassTransient {
|
|
t.Error("503 must classify transient")
|
|
}
|
|
})
|
|
|
|
t.Run("non-JSON error body", func(t *testing.T) {
|
|
p, _ := serve(t, 500, jsonReply(`upstream exploded`))
|
|
m, _ := p.Model("qwen3")
|
|
_, err := m.Generate(context.Background(), basicRequest())
|
|
var apiErr *llm.APIError
|
|
if !errors.As(err, &apiErr) || !strings.Contains(apiErr.Message, "upstream exploded") {
|
|
t.Errorf("error = %v", err)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestCapabilityEnforcement(t *testing.T) {
|
|
p, _ := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
|
|
|
|
t.Run("too many images", func(t *testing.T) {
|
|
m, _ := p.Model("llava", llm.WithCapabilities(llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/png"}}))
|
|
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
|
llm.UserParts(llm.Image("image/png", []byte{1}), llm.Image("image/png", []byte{2})),
|
|
}})
|
|
if !errors.Is(err, llm.ErrUnsupported) {
|
|
t.Errorf("error = %v, want ErrUnsupported", err)
|
|
}
|
|
})
|
|
|
|
t.Run("images on text-only model", func(t *testing.T) {
|
|
m, _ := p.Model("qwen3", llm.WithCapabilities(llm.Capabilities{}))
|
|
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
|
llm.UserParts(llm.Image("image/png", []byte{1})),
|
|
}})
|
|
if !errors.Is(err, llm.ErrUnsupported) {
|
|
t.Errorf("error = %v, want ErrUnsupported", err)
|
|
}
|
|
})
|
|
|
|
t.Run("disallowed mime", func(t *testing.T) {
|
|
m, _ := p.Model("llava") // default caps: jpeg/png only
|
|
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
|
llm.UserParts(llm.Image("image/tiff", []byte{1})),
|
|
}})
|
|
if !errors.Is(err, llm.ErrUnsupported) {
|
|
t.Errorf("error = %v, want ErrUnsupported", err)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestNoBaseURL(t *testing.T) {
|
|
p := New(WithBaseURL(""))
|
|
m, _ := p.Model("x")
|
|
if _, err := m.Generate(context.Background(), basicRequest()); err == nil ||
|
|
!strings.Contains(err.Error(), "no base URL") {
|
|
t.Errorf("error = %v, want a clear no-base-URL message", err)
|
|
}
|
|
}
|
|
|
|
func TestNormalizeHost(t *testing.T) {
|
|
for in, want := range map[string]string{
|
|
"myhost": "http://myhost:11434",
|
|
"myhost:8080": "http://myhost:8080",
|
|
"http://myhost:8080/": "http://myhost:8080",
|
|
"https://ollama.com": "https://ollama.com",
|
|
" 127.0.0.1:11434 ": "http://127.0.0.1:11434",
|
|
} {
|
|
if got := NormalizeHost(in); got != want {
|
|
t.Errorf("NormalizeHost(%q) = %q, want %q", in, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestPresets(t *testing.T) {
|
|
t.Run("cloud", func(t *testing.T) {
|
|
t.Setenv("OLLAMA_API_KEY", "cloud-key")
|
|
p := Cloud()
|
|
if p.Name() != "ollama-cloud" || p.baseURL != DefaultCloudBaseURL || p.token != "cloud-key" {
|
|
t.Errorf("cloud preset = %+v", p)
|
|
}
|
|
})
|
|
t.Run("local respects OLLAMA_HOST", func(t *testing.T) {
|
|
t.Setenv("OLLAMA_HOST", "box.lan:9999")
|
|
p := Local()
|
|
if p.Name() != "ollama" || p.baseURL != "http://box.lan:9999" || p.token != "" {
|
|
t.Errorf("local preset = %+v", p)
|
|
}
|
|
})
|
|
t.Run("foreman", func(t *testing.T) {
|
|
p := Foreman("http://foreman-m1:8080", "tok")
|
|
if p.Name() != "foreman" || p.baseURL != "http://foreman-m1:8080" || p.token != "tok" {
|
|
t.Errorf("foreman preset = %+v", p)
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestLocalNoAuthHeader(t *testing.T) {
|
|
p, cap := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
|
|
p.token = "" // simulate local mode on the test server
|
|
m, _ := p.Model("llama3")
|
|
if _, err := m.Generate(context.Background(), basicRequest()); err != nil {
|
|
t.Fatalf("Generate: %v", err)
|
|
}
|
|
if cap.auth != "" {
|
|
t.Errorf("auth header = %q, want none in local mode", cap.auth)
|
|
}
|
|
}
|