majordomo/provider/ollama/ollama_test.go

package ollama

import (
	"context"
	"encoding/base64"
	"encoding/json"
	"errors"
	"io"
	"net/http"
	"net/http/httptest"
	"strings"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// capture spins up an httptest server that records the request and replies
// with the given handler.
type captured struct {
	auth        string
	contentType string
	path        string
	body        map[string]any
	raw         []byte
}

func serve(t *testing.T, status int, respond func(w http.ResponseWriter)) (*Provider, *captured) {
	t.Helper()
	cap := &captured{}
	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		cap.auth = r.Header.Get("Authorization")
		cap.contentType = r.Header.Get("Content-Type")
		cap.path = r.URL.Path
		cap.raw, _ = io.ReadAll(r.Body)
		_ = json.Unmarshal(cap.raw, &cap.body)
		w.WriteHeader(status)
		respond(w)
	}))
	t.Cleanup(ts.Close)
	return New(WithBaseURL(ts.URL), WithToken("test-token")), cap
}

func jsonReply(obj string) func(w http.ResponseWriter) {
	return func(w http.ResponseWriter) { _, _ = io.WriteString(w, obj) }
}

func basicRequest() llm.Request {
	return llm.Request{Messages: []llm.Message{llm.UserText("hi")}}
}

func TestGenerateRoundTrip(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{
		"model":"qwen3:30b",
		"message":{"role":"assistant","content":"hello there"},
		"done":true,"done_reason":"stop",
		"prompt_eval_count":12,"eval_count":7
	}`))

	m, _ := p.Model("qwen3:30b")
	temp := 0.2
	resp, err := m.Generate(context.Background(), llm.Request{
		System:        "be terse",
		Messages:      []llm.Message{llm.SystemText("extra sys"), llm.UserText("hi")},
		Temperature:   &temp,
		MaxTokens:     64,
		StopSequences: []string{"END"},
	})
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}

	// Wire assertions.
	if cap.path != "/api/chat" {
		t.Errorf("path = %q", cap.path)
	}
	if cap.auth != "Bearer test-token" {
		t.Errorf("auth = %q", cap.auth)
	}
	if cap.body["model"] != "qwen3:30b" {
		t.Errorf("model = %v", cap.body["model"])
	}
	if stream, ok := cap.body["stream"].(bool); !ok || stream {
		t.Errorf("stream must be explicit false, got %v", cap.body["stream"])
	}
	msgs := cap.body["messages"].([]any)
	first := msgs[0].(map[string]any)
	if first["role"] != "system" || first["content"] != "be terse\n\nextra sys" {
		t.Errorf("system fold = %v", first)
	}
	second := msgs[1].(map[string]any)
	if second["role"] != "user" || second["content"] != "hi" {
		t.Errorf("user msg = %v", second)
	}
	opts := cap.body["options"].(map[string]any)
	if opts["temperature"] != 0.2 || opts["num_predict"] != float64(64) {
		t.Errorf("options = %v", opts)
	}

	// Response assertions.
	if resp.Text() != "hello there" {
		t.Errorf("text = %q", resp.Text())
	}
	if resp.FinishReason != llm.FinishStop {
		t.Errorf("finish = %v", resp.FinishReason)
	}
	if resp.Usage.InputTokens != 12 || resp.Usage.OutputTokens != 7 {
		t.Errorf("usage = %+v", resp.Usage)
	}
	if resp.Model != "ollama/qwen3:30b" {
		t.Errorf("resp.Model = %q", resp.Model)
	}
}

func TestImagesEncodeAsBase64(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"a cat"},"done":true,"done_reason":"stop"}`))
	imgBytes := []byte{0xFF, 0xD8, 0xFF, 0xE0, 1, 2, 3}

	m, _ := p.Model("llava")
	_, err := m.Generate(context.Background(), llm.Request{
		Messages: []llm.Message{llm.UserParts(llm.Text("describe"), llm.Image("image/jpeg", imgBytes))},
	})
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}
	msgs := cap.body["messages"].([]any)
	user := msgs[0].(map[string]any)
	images := user["images"].([]any)
	if len(images) != 1 || images[0] != base64.StdEncoding.EncodeToString(imgBytes) {
		t.Errorf("images = %v", images)
	}
	if strings.Contains(images[0].(string), "data:") {
		t.Error("images must be raw base64 without data: prefix")
	}
}

func TestToolsAndToolCallRoundTrip(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{
		"message":{"role":"assistant","content":"","tool_calls":[
			{"function":{"index":0,"name":"get_weather","arguments":{"city":"Tokyo"}}}
		]},
		"done":true,"done_reason":"stop"
	}`))

	tool := llm.Tool{
		Name: "get_weather", Description: "weather",
		Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}`),
	}
	m, _ := p.Model("qwen3")
	resp, err := m.Generate(context.Background(), basicRequest(), llm.WithTools(tool))
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}

	// Tools serialize with parameters as an object.
	tools := cap.body["tools"].([]any)
	fn := tools[0].(map[string]any)["function"].(map[string]any)
	if fn["name"] != "get_weather" {
		t.Errorf("tool fn = %v", fn)
	}
	if _, ok := fn["parameters"].(map[string]any); !ok {
		t.Errorf("parameters must be an object, got %T", fn["parameters"])
	}

	// Tool call comes back with arguments as a JSON object → RawMessage.
	if len(resp.ToolCalls) != 1 {
		t.Fatalf("tool calls = %v", resp.ToolCalls)
	}
	tc := resp.ToolCalls[0]
	if tc.Name != "get_weather" || tc.ID == "" {
		t.Errorf("call = %+v (id must be synthesized)", tc)
	}
	var args struct {
		City string `json:"city"`
	}
	if err := json.Unmarshal(tc.Arguments, &args); err != nil || args.City != "Tokyo" {
		t.Errorf("arguments = %s (%v)", tc.Arguments, err)
	}
	if resp.FinishReason != llm.FinishToolCalls {
		t.Errorf("finish = %v, want tool_calls", resp.FinishReason)
	}
}

func TestToolResultsAndHistoryToolCalls(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"21C"},"done":true,"done_reason":"stop"}`))

	m, _ := p.Model("qwen3")
	_, err := m.Generate(context.Background(), llm.Request{
		Messages: []llm.Message{
			llm.UserText("weather?"),
			{Role: llm.RoleAssistant, ToolCalls: []llm.ToolCall{
				{ID: "call_0", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Tokyo"}`)},
			}},
			llm.ToolResultsMessage(
				llm.ToolResult{ID: "call_0", Name: "get_weather", Content: `{"temp":21}`},
				llm.ToolResult{ID: "call_1", Name: "broken_tool", Content: "boom", IsError: true},
			),
		},
	})
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}
	msgs := cap.body["messages"].([]any)
	if len(msgs) != 4 {
		t.Fatalf("messages = %d, want 4 (user, assistant, 2 tool results)", len(msgs))
	}
	asst := msgs[1].(map[string]any)
	calls := asst["tool_calls"].([]any)
	args := calls[0].(map[string]any)["function"].(map[string]any)["arguments"]
	if _, ok := args.(map[string]any); !ok {
		t.Errorf("history tool-call arguments must be an object, got %T", args)
	}
	tr1 := msgs[2].(map[string]any)
	if tr1["role"] != "tool" || tr1["tool_name"] != "get_weather" || tr1["content"] != `{"temp":21}` {
		t.Errorf("tool result 1 = %v", tr1)
	}
	tr2 := msgs[3].(map[string]any)
	if tr2["content"] != "ERROR: boom" {
		t.Errorf("error result content = %v", tr2["content"])
	}
}

func TestStructuredOutputFormat(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"{\"name\":\"Ada\"}"},"done":true,"done_reason":"stop"}`))
	schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}`)

	m, _ := p.Model("qwen3")
	resp, err := m.Generate(context.Background(), basicRequest(), llm.WithSchema(schema, "person"))
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}
	format, ok := cap.body["format"].(map[string]any)
	if !ok || format["type"] != "object" {
		t.Errorf("format = %v, want the schema object", cap.body["format"])
	}
	if resp.Text() != `{"name":"Ada"}` {
		t.Errorf("text = %q", resp.Text())
	}
	// Ollama Cloud ignores "format", so the schema must also be stated as
	// a system instruction.
	msgs := cap.body["messages"].([]any)
	sys := msgs[0].(map[string]any)
	if sys["role"] != "system" || !strings.Contains(sys["content"].(string), `"name"`) ||
		!strings.Contains(sys["content"].(string), "JSON Schema") {
		t.Errorf("system fold must carry the schema instruction, got %v", sys)
	}
}

func TestThinkMapping(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
	m, _ := p.Model("gpt-oss:120b")
	_, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("high"))
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}
	if cap.body["think"] != "high" {
		t.Errorf("think = %v", cap.body["think"])
	}

	if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("max")); err == nil {
		t.Error("invalid reasoning effort should error")
	}
}

func TestToolChoiceNoneDropsTools(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
	m, _ := p.Model("qwen3")
	_, err := m.Generate(context.Background(), basicRequest(),
		llm.WithTools(llm.Tool{Name: "t"}), llm.WithToolChoice("none"))
	if err != nil {
		t.Fatalf("Generate: %v", err)
	}
	if _, present := cap.body["tools"]; present {
		t.Error("tool_choice none must omit tools")
	}
}

func TestStreamingNDJSON(t *testing.T) {
	p, _ := serve(t, 200, func(w http.ResponseWriter) {
		w.Header().Set("Content-Type", "application/x-ndjson")
		_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"Hel"},"done":false}
{"message":{"role":"assistant","content":"lo"},"done":false}
{"message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"ping","arguments":{}}}]},"done":false}
{"message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","prompt_eval_count":5,"eval_count":9}
`)
	})

	m, _ := p.Model("qwen3")
	s, err := m.Stream(context.Background(), basicRequest())
	if err != nil {
		t.Fatalf("Stream: %v", err)
	}
	defer s.Close()

	var text strings.Builder
	var toolCalls []llm.ToolCall
	var final *llm.Response
	for {
		ev, err := s.Next()
		if errors.Is(err, io.EOF) {
			break
		}
		if err != nil {
			t.Fatalf("Next: %v", err)
		}
		text.WriteString(ev.TextDelta)
		if ev.ToolCall != nil {
			toolCalls = append(toolCalls, *ev.ToolCall)
		}
		if ev.Response != nil {
			final = ev.Response
		}
	}
	if text.String() != "Hello" {
		t.Errorf("text = %q", text.String())
	}
	if len(toolCalls) != 1 || toolCalls[0].Name != "ping" {
		t.Errorf("tool calls = %+v", toolCalls)
	}
	if final == nil {
		t.Fatal("no final response event")
	}
	if final.Usage.InputTokens != 5 || final.Usage.OutputTokens != 9 {
		t.Errorf("final usage = %+v", final.Usage)
	}
	if final.FinishReason != llm.FinishToolCalls {
		t.Errorf("final finish = %v", final.FinishReason)
	}
	if final.Text() != "Hello" {
		t.Errorf("final text = %q", final.Text())
	}
}

// TestStreamingForemanSingleObject: foreman returns one buffered JSON
// object to a stream:true request; the stream must still deliver the text
// and a final response.
func TestStreamingForemanSingleObject(t *testing.T) {
	p, cap := serve(t, 200, func(w http.ResponseWriter) {
		w.Header().Set("Content-Type", "application/json")
		_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"queued answer"},"done":true,"done_reason":"stop","prompt_eval_count":3,"eval_count":4}`)
	})

	m, _ := p.Model("qwen3:30b")
	s, err := m.Stream(context.Background(), basicRequest())
	if err != nil {
		t.Fatalf("Stream: %v", err)
	}
	defer s.Close()

	if stream, ok := cap.body["stream"].(bool); !ok || !stream {
		t.Errorf("stream flag = %v, want true", cap.body["stream"])
	}

	var text strings.Builder
	var final *llm.Response
	for {
		ev, err := s.Next()
		if errors.Is(err, io.EOF) {
			break
		}
		if err != nil {
			t.Fatalf("Next: %v", err)
		}
		text.WriteString(ev.TextDelta)
		if ev.Response != nil {
			final = ev.Response
		}
	}
	if text.String() != "queued answer" || final == nil || final.Usage.OutputTokens != 4 {
		t.Errorf("text=%q final=%+v", text.String(), final)
	}
}

func TestErrorMapping(t *testing.T) {
	t.Run("404 is model-not-found", func(t *testing.T) {
		p, _ := serve(t, 404, jsonReply(`{"error":"model not found"}`))
		m, _ := p.Model("nope")
		_, err := m.Generate(context.Background(), basicRequest())
		if !errors.Is(err, llm.ErrModelNotFound) {
			t.Errorf("error = %v, want ErrModelNotFound", err)
		}
		if llm.Classify(err) != llm.ClassPermanent {
			t.Error("404 must classify permanent")
		}
	})

	t.Run("503 transient with message", func(t *testing.T) {
		p, _ := serve(t, 503, jsonReply(`{"error":"request cancelled while waiting"}`))
		m, _ := p.Model("qwen3")
		_, err := m.Generate(context.Background(), basicRequest())
		var apiErr *llm.APIError
		if !errors.As(err, &apiErr) || apiErr.Status != 503 || !strings.Contains(apiErr.Message, "cancelled") {
			t.Errorf("error = %v", err)
		}
		if llm.Classify(err) != llm.ClassTransient {
			t.Error("503 must classify transient")
		}
	})

	t.Run("non-JSON error body", func(t *testing.T) {
		p, _ := serve(t, 500, jsonReply(`upstream exploded`))
		m, _ := p.Model("qwen3")
		_, err := m.Generate(context.Background(), basicRequest())
		var apiErr *llm.APIError
		if !errors.As(err, &apiErr) || !strings.Contains(apiErr.Message, "upstream exploded") {
			t.Errorf("error = %v", err)
		}
	})
}

func TestCapabilityEnforcement(t *testing.T) {
	p, _ := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))

	t.Run("too many images", func(t *testing.T) {
		m, _ := p.Model("llava", llm.WithCapabilities(llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/png"}}))
		_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
			llm.UserParts(llm.Image("image/png", []byte{1}), llm.Image("image/png", []byte{2})),
		}})
		if !errors.Is(err, llm.ErrUnsupported) {
			t.Errorf("error = %v, want ErrUnsupported", err)
		}
	})

	t.Run("images on text-only model", func(t *testing.T) {
		m, _ := p.Model("qwen3", llm.WithCapabilities(llm.Capabilities{}))
		_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
			llm.UserParts(llm.Image("image/png", []byte{1})),
		}})
		if !errors.Is(err, llm.ErrUnsupported) {
			t.Errorf("error = %v, want ErrUnsupported", err)
		}
	})

	t.Run("disallowed mime", func(t *testing.T) {
		m, _ := p.Model("llava") // default caps: jpeg/png only
		_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
			llm.UserParts(llm.Image("image/tiff", []byte{1})),
		}})
		if !errors.Is(err, llm.ErrUnsupported) {
			t.Errorf("error = %v, want ErrUnsupported", err)
		}
	})
}

func TestNoBaseURL(t *testing.T) {
	p := New(WithBaseURL(""))
	m, _ := p.Model("x")
	if _, err := m.Generate(context.Background(), basicRequest()); err == nil ||
		!strings.Contains(err.Error(), "no base URL") {
		t.Errorf("error = %v, want a clear no-base-URL message", err)
	}
}

func TestNormalizeHost(t *testing.T) {
	for in, want := range map[string]string{
		"myhost":              "http://myhost:11434",
		"myhost:8080":         "http://myhost:8080",
		"http://myhost:8080/": "http://myhost:8080",
		"https://ollama.com":  "https://ollama.com",
		" 127.0.0.1:11434 ":   "http://127.0.0.1:11434",
	} {
		if got := NormalizeHost(in); got != want {
			t.Errorf("NormalizeHost(%q) = %q, want %q", in, got, want)
		}
	}
}

func TestPresets(t *testing.T) {
	t.Run("cloud", func(t *testing.T) {
		t.Setenv("OLLAMA_API_KEY", "cloud-key")
		p := Cloud()
		if p.Name() != "ollama-cloud" || p.baseURL != DefaultCloudBaseURL || p.token != "cloud-key" {
			t.Errorf("cloud preset = %+v", p)
		}
	})
	t.Run("local respects OLLAMA_HOST", func(t *testing.T) {
		t.Setenv("OLLAMA_HOST", "box.lan:9999")
		p := Local()
		if p.Name() != "ollama" || p.baseURL != "http://box.lan:9999" || p.token != "" {
			t.Errorf("local preset = %+v", p)
		}
	})
	t.Run("foreman", func(t *testing.T) {
		p := Foreman("http://foreman-m1:8080", "tok")
		if p.Name() != "foreman" || p.baseURL != "http://foreman-m1:8080" || p.token != "tok" {
			t.Errorf("foreman preset = %+v", p)
		}
	})
}

func TestLocalNoAuthHeader(t *testing.T) {
	p, cap := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
	p.token = "" // simulate local mode on the test server
	m, _ := p.Model("llama3")
	if _, err := m.Generate(context.Background(), basicRequest()); err != nil {
		t.Fatalf("Generate: %v", err)
	}
	if cap.auth != "" {
		t.Errorf("auth header = %q, want none in local mode", cap.auth)
	}
}