Files
majordomo/provider/ollama/ollama_test.go
T
steve 04b21fdad2 feat: live-validated against Ollama Cloud; schema instruction fallback for cloud
Phase 8: all six live checks pass (tier aliases, thinking-tier chat, real
tool invocation, structured Generate[T], forced failover with bench+skip,
skill agent). Discovery: ollama.com ignores the format field — the
provider now also states the schema as a system instruction (constrained
decoding locally, instruction-guided JSON on cloud), with hermetic test.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:22:54 +02:00

501 lines
16 KiB
Go

package ollama
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// capture spins up an httptest server that records the request and replies
// with the given handler.
type captured struct {
auth string
contentType string
path string
body map[string]any
raw []byte
}
func serve(t *testing.T, status int, respond func(w http.ResponseWriter)) (*Provider, *captured) {
t.Helper()
cap := &captured{}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
cap.auth = r.Header.Get("Authorization")
cap.contentType = r.Header.Get("Content-Type")
cap.path = r.URL.Path
cap.raw, _ = io.ReadAll(r.Body)
_ = json.Unmarshal(cap.raw, &cap.body)
w.WriteHeader(status)
respond(w)
}))
t.Cleanup(ts.Close)
return New(WithBaseURL(ts.URL), WithToken("test-token")), cap
}
func jsonReply(obj string) func(w http.ResponseWriter) {
return func(w http.ResponseWriter) { _, _ = io.WriteString(w, obj) }
}
func basicRequest() llm.Request {
return llm.Request{Messages: []llm.Message{llm.UserText("hi")}}
}
func TestGenerateRoundTrip(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{
"model":"qwen3:30b",
"message":{"role":"assistant","content":"hello there"},
"done":true,"done_reason":"stop",
"prompt_eval_count":12,"eval_count":7
}`))
m, _ := p.Model("qwen3:30b")
temp := 0.2
resp, err := m.Generate(context.Background(), llm.Request{
System: "be terse",
Messages: []llm.Message{llm.SystemText("extra sys"), llm.UserText("hi")},
Temperature: &temp,
MaxTokens: 64,
StopSequences: []string{"END"},
})
if err != nil {
t.Fatalf("Generate: %v", err)
}
// Wire assertions.
if cap.path != "/api/chat" {
t.Errorf("path = %q", cap.path)
}
if cap.auth != "Bearer test-token" {
t.Errorf("auth = %q", cap.auth)
}
if cap.body["model"] != "qwen3:30b" {
t.Errorf("model = %v", cap.body["model"])
}
if stream, ok := cap.body["stream"].(bool); !ok || stream {
t.Errorf("stream must be explicit false, got %v", cap.body["stream"])
}
msgs := cap.body["messages"].([]any)
first := msgs[0].(map[string]any)
if first["role"] != "system" || first["content"] != "be terse\n\nextra sys" {
t.Errorf("system fold = %v", first)
}
second := msgs[1].(map[string]any)
if second["role"] != "user" || second["content"] != "hi" {
t.Errorf("user msg = %v", second)
}
opts := cap.body["options"].(map[string]any)
if opts["temperature"] != 0.2 || opts["num_predict"] != float64(64) {
t.Errorf("options = %v", opts)
}
// Response assertions.
if resp.Text() != "hello there" {
t.Errorf("text = %q", resp.Text())
}
if resp.FinishReason != llm.FinishStop {
t.Errorf("finish = %v", resp.FinishReason)
}
if resp.Usage.InputTokens != 12 || resp.Usage.OutputTokens != 7 {
t.Errorf("usage = %+v", resp.Usage)
}
if resp.Model != "ollama/qwen3:30b" {
t.Errorf("resp.Model = %q", resp.Model)
}
}
func TestImagesEncodeAsBase64(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"a cat"},"done":true,"done_reason":"stop"}`))
imgBytes := []byte{0xFF, 0xD8, 0xFF, 0xE0, 1, 2, 3}
m, _ := p.Model("llava")
_, err := m.Generate(context.Background(), llm.Request{
Messages: []llm.Message{llm.UserParts(llm.Text("describe"), llm.Image("image/jpeg", imgBytes))},
})
if err != nil {
t.Fatalf("Generate: %v", err)
}
msgs := cap.body["messages"].([]any)
user := msgs[0].(map[string]any)
images := user["images"].([]any)
if len(images) != 1 || images[0] != base64.StdEncoding.EncodeToString(imgBytes) {
t.Errorf("images = %v", images)
}
if strings.Contains(images[0].(string), "data:") {
t.Error("images must be raw base64 without data: prefix")
}
}
func TestToolsAndToolCallRoundTrip(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{
"message":{"role":"assistant","content":"","tool_calls":[
{"function":{"index":0,"name":"get_weather","arguments":{"city":"Tokyo"}}}
]},
"done":true,"done_reason":"stop"
}`))
tool := llm.Tool{
Name: "get_weather", Description: "weather",
Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}`),
}
m, _ := p.Model("qwen3")
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithTools(tool))
if err != nil {
t.Fatalf("Generate: %v", err)
}
// Tools serialize with parameters as an object.
tools := cap.body["tools"].([]any)
fn := tools[0].(map[string]any)["function"].(map[string]any)
if fn["name"] != "get_weather" {
t.Errorf("tool fn = %v", fn)
}
if _, ok := fn["parameters"].(map[string]any); !ok {
t.Errorf("parameters must be an object, got %T", fn["parameters"])
}
// Tool call comes back with arguments as a JSON object → RawMessage.
if len(resp.ToolCalls) != 1 {
t.Fatalf("tool calls = %v", resp.ToolCalls)
}
tc := resp.ToolCalls[0]
if tc.Name != "get_weather" || tc.ID == "" {
t.Errorf("call = %+v (id must be synthesized)", tc)
}
var args struct {
City string `json:"city"`
}
if err := json.Unmarshal(tc.Arguments, &args); err != nil || args.City != "Tokyo" {
t.Errorf("arguments = %s (%v)", tc.Arguments, err)
}
if resp.FinishReason != llm.FinishToolCalls {
t.Errorf("finish = %v, want tool_calls", resp.FinishReason)
}
}
func TestToolResultsAndHistoryToolCalls(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"21C"},"done":true,"done_reason":"stop"}`))
m, _ := p.Model("qwen3")
_, err := m.Generate(context.Background(), llm.Request{
Messages: []llm.Message{
llm.UserText("weather?"),
{Role: llm.RoleAssistant, ToolCalls: []llm.ToolCall{
{ID: "call_0", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Tokyo"}`)},
}},
llm.ToolResultsMessage(
llm.ToolResult{ID: "call_0", Name: "get_weather", Content: `{"temp":21}`},
llm.ToolResult{ID: "call_1", Name: "broken_tool", Content: "boom", IsError: true},
),
},
})
if err != nil {
t.Fatalf("Generate: %v", err)
}
msgs := cap.body["messages"].([]any)
if len(msgs) != 4 {
t.Fatalf("messages = %d, want 4 (user, assistant, 2 tool results)", len(msgs))
}
asst := msgs[1].(map[string]any)
calls := asst["tool_calls"].([]any)
args := calls[0].(map[string]any)["function"].(map[string]any)["arguments"]
if _, ok := args.(map[string]any); !ok {
t.Errorf("history tool-call arguments must be an object, got %T", args)
}
tr1 := msgs[2].(map[string]any)
if tr1["role"] != "tool" || tr1["tool_name"] != "get_weather" || tr1["content"] != `{"temp":21}` {
t.Errorf("tool result 1 = %v", tr1)
}
tr2 := msgs[3].(map[string]any)
if tr2["content"] != "ERROR: boom" {
t.Errorf("error result content = %v", tr2["content"])
}
}
func TestStructuredOutputFormat(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"{\"name\":\"Ada\"}"},"done":true,"done_reason":"stop"}`))
schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}`)
m, _ := p.Model("qwen3")
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithSchema(schema, "person"))
if err != nil {
t.Fatalf("Generate: %v", err)
}
format, ok := cap.body["format"].(map[string]any)
if !ok || format["type"] != "object" {
t.Errorf("format = %v, want the schema object", cap.body["format"])
}
if resp.Text() != `{"name":"Ada"}` {
t.Errorf("text = %q", resp.Text())
}
// Ollama Cloud ignores "format", so the schema must also be stated as
// a system instruction.
msgs := cap.body["messages"].([]any)
sys := msgs[0].(map[string]any)
if sys["role"] != "system" || !strings.Contains(sys["content"].(string), `"name"`) ||
!strings.Contains(sys["content"].(string), "JSON Schema") {
t.Errorf("system fold must carry the schema instruction, got %v", sys)
}
}
func TestThinkMapping(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
m, _ := p.Model("gpt-oss:120b")
_, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("high"))
if err != nil {
t.Fatalf("Generate: %v", err)
}
if cap.body["think"] != "high" {
t.Errorf("think = %v", cap.body["think"])
}
if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("max")); err == nil {
t.Error("invalid reasoning effort should error")
}
}
func TestToolChoiceNoneDropsTools(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
m, _ := p.Model("qwen3")
_, err := m.Generate(context.Background(), basicRequest(),
llm.WithTools(llm.Tool{Name: "t"}), llm.WithToolChoice("none"))
if err != nil {
t.Fatalf("Generate: %v", err)
}
if _, present := cap.body["tools"]; present {
t.Error("tool_choice none must omit tools")
}
}
func TestStreamingNDJSON(t *testing.T) {
p, _ := serve(t, 200, func(w http.ResponseWriter) {
w.Header().Set("Content-Type", "application/x-ndjson")
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"Hel"},"done":false}
{"message":{"role":"assistant","content":"lo"},"done":false}
{"message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"ping","arguments":{}}}]},"done":false}
{"message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","prompt_eval_count":5,"eval_count":9}
`)
})
m, _ := p.Model("qwen3")
s, err := m.Stream(context.Background(), basicRequest())
if err != nil {
t.Fatalf("Stream: %v", err)
}
defer s.Close()
var text strings.Builder
var toolCalls []llm.ToolCall
var final *llm.Response
for {
ev, err := s.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("Next: %v", err)
}
text.WriteString(ev.TextDelta)
if ev.ToolCall != nil {
toolCalls = append(toolCalls, *ev.ToolCall)
}
if ev.Response != nil {
final = ev.Response
}
}
if text.String() != "Hello" {
t.Errorf("text = %q", text.String())
}
if len(toolCalls) != 1 || toolCalls[0].Name != "ping" {
t.Errorf("tool calls = %+v", toolCalls)
}
if final == nil {
t.Fatal("no final response event")
}
if final.Usage.InputTokens != 5 || final.Usage.OutputTokens != 9 {
t.Errorf("final usage = %+v", final.Usage)
}
if final.FinishReason != llm.FinishToolCalls {
t.Errorf("final finish = %v", final.FinishReason)
}
if final.Text() != "Hello" {
t.Errorf("final text = %q", final.Text())
}
}
// TestStreamingForemanSingleObject: foreman returns one buffered JSON
// object to a stream:true request; the stream must still deliver the text
// and a final response.
func TestStreamingForemanSingleObject(t *testing.T) {
p, cap := serve(t, 200, func(w http.ResponseWriter) {
w.Header().Set("Content-Type", "application/json")
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"queued answer"},"done":true,"done_reason":"stop","prompt_eval_count":3,"eval_count":4}`)
})
m, _ := p.Model("qwen3:30b")
s, err := m.Stream(context.Background(), basicRequest())
if err != nil {
t.Fatalf("Stream: %v", err)
}
defer s.Close()
if stream, ok := cap.body["stream"].(bool); !ok || !stream {
t.Errorf("stream flag = %v, want true", cap.body["stream"])
}
var text strings.Builder
var final *llm.Response
for {
ev, err := s.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("Next: %v", err)
}
text.WriteString(ev.TextDelta)
if ev.Response != nil {
final = ev.Response
}
}
if text.String() != "queued answer" || final == nil || final.Usage.OutputTokens != 4 {
t.Errorf("text=%q final=%+v", text.String(), final)
}
}
func TestErrorMapping(t *testing.T) {
t.Run("404 is model-not-found", func(t *testing.T) {
p, _ := serve(t, 404, jsonReply(`{"error":"model not found"}`))
m, _ := p.Model("nope")
_, err := m.Generate(context.Background(), basicRequest())
if !errors.Is(err, llm.ErrModelNotFound) {
t.Errorf("error = %v, want ErrModelNotFound", err)
}
if llm.Classify(err) != llm.ClassPermanent {
t.Error("404 must classify permanent")
}
})
t.Run("503 transient with message", func(t *testing.T) {
p, _ := serve(t, 503, jsonReply(`{"error":"request cancelled while waiting"}`))
m, _ := p.Model("qwen3")
_, err := m.Generate(context.Background(), basicRequest())
var apiErr *llm.APIError
if !errors.As(err, &apiErr) || apiErr.Status != 503 || !strings.Contains(apiErr.Message, "cancelled") {
t.Errorf("error = %v", err)
}
if llm.Classify(err) != llm.ClassTransient {
t.Error("503 must classify transient")
}
})
t.Run("non-JSON error body", func(t *testing.T) {
p, _ := serve(t, 500, jsonReply(`upstream exploded`))
m, _ := p.Model("qwen3")
_, err := m.Generate(context.Background(), basicRequest())
var apiErr *llm.APIError
if !errors.As(err, &apiErr) || !strings.Contains(apiErr.Message, "upstream exploded") {
t.Errorf("error = %v", err)
}
})
}
func TestCapabilityEnforcement(t *testing.T) {
p, _ := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
t.Run("too many images", func(t *testing.T) {
m, _ := p.Model("llava", llm.WithCapabilities(llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/png"}}))
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
llm.UserParts(llm.Image("image/png", []byte{1}), llm.Image("image/png", []byte{2})),
}})
if !errors.Is(err, llm.ErrUnsupported) {
t.Errorf("error = %v, want ErrUnsupported", err)
}
})
t.Run("images on text-only model", func(t *testing.T) {
m, _ := p.Model("qwen3", llm.WithCapabilities(llm.Capabilities{}))
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
llm.UserParts(llm.Image("image/png", []byte{1})),
}})
if !errors.Is(err, llm.ErrUnsupported) {
t.Errorf("error = %v, want ErrUnsupported", err)
}
})
t.Run("disallowed mime", func(t *testing.T) {
m, _ := p.Model("llava") // default caps: jpeg/png only
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
llm.UserParts(llm.Image("image/tiff", []byte{1})),
}})
if !errors.Is(err, llm.ErrUnsupported) {
t.Errorf("error = %v, want ErrUnsupported", err)
}
})
}
func TestNoBaseURL(t *testing.T) {
p := New(WithBaseURL(""))
m, _ := p.Model("x")
if _, err := m.Generate(context.Background(), basicRequest()); err == nil ||
!strings.Contains(err.Error(), "no base URL") {
t.Errorf("error = %v, want a clear no-base-URL message", err)
}
}
func TestNormalizeHost(t *testing.T) {
for in, want := range map[string]string{
"myhost": "http://myhost:11434",
"myhost:8080": "http://myhost:8080",
"http://myhost:8080/": "http://myhost:8080",
"https://ollama.com": "https://ollama.com",
" 127.0.0.1:11434 ": "http://127.0.0.1:11434",
} {
if got := NormalizeHost(in); got != want {
t.Errorf("NormalizeHost(%q) = %q, want %q", in, got, want)
}
}
}
func TestPresets(t *testing.T) {
t.Run("cloud", func(t *testing.T) {
t.Setenv("OLLAMA_API_KEY", "cloud-key")
p := Cloud()
if p.Name() != "ollama-cloud" || p.baseURL != DefaultCloudBaseURL || p.token != "cloud-key" {
t.Errorf("cloud preset = %+v", p)
}
})
t.Run("local respects OLLAMA_HOST", func(t *testing.T) {
t.Setenv("OLLAMA_HOST", "box.lan:9999")
p := Local()
if p.Name() != "ollama" || p.baseURL != "http://box.lan:9999" || p.token != "" {
t.Errorf("local preset = %+v", p)
}
})
t.Run("foreman", func(t *testing.T) {
p := Foreman("http://foreman-m1:8080", "tok")
if p.Name() != "foreman" || p.baseURL != "http://foreman-m1:8080" || p.token != "tok" {
t.Errorf("foreman preset = %+v", p)
}
})
}
func TestLocalNoAuthHeader(t *testing.T) {
p, cap := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
p.token = "" // simulate local mode on the test server
m, _ := p.Model("llama3")
if _, err := m.Generate(context.Background(), basicRequest()); err != nil {
t.Fatalf("Generate: %v", err)
}
if cap.auth != "" {
t.Errorf("auth header = %q, want none in local mode", cap.auth)
}
}