diff --git a/README.md b/README.md index f242736..781fa71 100644 --- a/README.md +++ b/README.md @@ -281,7 +281,11 @@ streaming); majordomo's stream API works against it and delivers the response as a single delta plus final event. Notes: Ollama has no native tool_choice — `"none"` drops the tools; -`"required"`/named choices are best-effort ignored there. +`"required"`/named choices are best-effort ignored there. Ollama Cloud +ignores the `format` field (verified live), so the provider also states +the schema as an explicit system instruction — constrained decoding on +local Ollama, instruction-guided JSON on cloud, one canonical API either +way. Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ · health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline ✅ diff --git a/examples/live/main.go b/examples/live/main.go new file mode 100644 index 0000000..9f86258 --- /dev/null +++ b/examples/live/main.go @@ -0,0 +1,251 @@ +// Command live is the Phase 8 live-validation harness: it proves majordomo +// end to end against real Ollama Cloud models before the library goes near +// mort. It needs OLLAMA_API_KEY (loaded from ./.env when present) and is +// NOT part of the hermetic suite — run it manually: +// +// go run ./examples/live +// +// Checks: tier aliases resolve; plain chat (thinking tier); a tool call +// the model actually invokes; structured Generate[T]; a forced failover +// chain (dead head → retry → bench → fall through, then skip-on-second- +// request); and an agent with a skill attached. If a model tag is +// unavailable it falls back to the tier's alternates (that's what chains +// do) and the harness notes which model served each check. +package main + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/majordomo" + "gitea.stevedudenhoeffer.com/steve/majordomo/agent" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/ollama" + "gitea.stevedudenhoeffer.com/steve/majordomo/skill/calc" +) + +const ( + thinkingSpec = "ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud" + workhorseSpec = "ollama-cloud/minimax-m2.7:cloud,ollama-cloud/qwen3-coder:480b-cloud" +) + +func main() { + loadDotEnv(".env") + if os.Getenv("OLLAMA_API_KEY") == "" { + fmt.Println("FATAL: OLLAMA_API_KEY not set (and no .env found) — cannot run live validation") + os.Exit(1) + } + + reg := majordomo.New() + reg.RegisterAlias("thinking", thinkingSpec) + reg.RegisterAlias("workhorse", workhorseSpec) + // A provider that can never answer: connection-refused is a transient + // error, so it exercises retry + bench + failover against a live tail. + reg.RegisterProvider(ollama.New(ollama.WithName("dead"), ollama.WithBaseURL("http://127.0.0.1:9"))) + + failed := 0 + check := func(name string, fn func() (string, error)) { + fmt.Printf("\n=== %s ===\n", name) + start := time.Now() + out, err := fn() + if err != nil { + failed++ + fmt.Printf("FAIL (%.1fs): %v\n", time.Since(start).Seconds(), err) + return + } + fmt.Printf("PASS (%.1fs)\n%s\n", time.Since(start).Seconds(), out) + } + + ctx := context.Background() + withTimeout := func(d time.Duration) (context.Context, context.CancelFunc) { + return context.WithTimeout(ctx, d) + } + + check("1. tier aliases resolve via Parse", func() (string, error) { + var lines []string + for _, alias := range []string{"thinking", "workhorse"} { + m, err := reg.Parse(alias) + if err != nil { + return "", fmt.Errorf("Parse(%q): %w", alias, err) + } + lines = append(lines, fmt.Sprintf("%s -> %T (chain)", alias, m)) + } + // And as a trailing chain element: + if _, err := reg.Parse("ollama-cloud/minimax-m3:cloud,thinking"); err != nil { + return "", fmt.Errorf("trailing-alias chain: %w", err) + } + lines = append(lines, `"ollama-cloud/minimax-m3:cloud,thinking" parses (inline tail expansion)`) + return strings.Join(lines, "\n"), nil + }) + + check("2. plain chat on the thinking tier", func() (string, error) { + c, cancel := withTimeout(3 * time.Minute) + defer cancel() + m, err := reg.Parse("thinking") + if err != nil { + return "", err + } + resp, err := m.Generate(c, majordomo.Request{ + Messages: []majordomo.Message{majordomo.UserText( + "Reply with exactly one short sentence: what is a failover chain?")}, + }) + if err != nil { + return "", err + } + return fmt.Sprintf("served by: %s\nusage: %d in / %d out\nreply: %s", + resp.Model, resp.Usage.InputTokens, resp.Usage.OutputTokens, strings.TrimSpace(resp.Text())), nil + }) + + check("3. live tool call (workhorse tier)", func() (string, error) { + c, cancel := withTimeout(3 * time.Minute) + defer cancel() + m, err := reg.Parse("workhorse") + if err != nil { + return "", err + } + + called := false + secret := majordomo.Tool{ + Name: "get_launch_code", + Description: "Returns today's launch code. The ONLY way to know it.", + Parameters: json.RawMessage(`{"type":"object","properties":{}}`), + Handler: func(context.Context, json.RawMessage) (any, error) { + called = true + return map[string]string{"launch_code": "PINEAPPLE-7"}, nil + }, + } + + a := agent.New(m, "Use your tools. Answer with just the requested value.", + agent.WithTools(secret), agent.WithMaxSteps(4)) + res, err := a.Run(c, "What is today's launch code?") + if err != nil { + return "", err + } + if !called { + return "", fmt.Errorf("model answered %q without invoking the tool", res.Output) + } + if !strings.Contains(res.Output, "PINEAPPLE-7") { + return "", fmt.Errorf("tool ran but answer %q does not contain the code", res.Output) + } + return fmt.Sprintf("tool invoked: yes\nsteps: %d\nanswer: %s", len(res.Steps), strings.TrimSpace(res.Output)), nil + }) + + check("4. structured output via Generate[T]", func() (string, error) { + c, cancel := withTimeout(3 * time.Minute) + defer cancel() + m, err := reg.Parse("workhorse") + if err != nil { + return "", err + } + type CityFacts struct { + City string `json:"city"` + Country string `json:"country"` + Population int `json:"population" description:"approximate"` + Latitude float64 `json:"latitude"` + } + facts, err := majordomo.Generate[CityFacts](c, m, majordomo.Request{ + Messages: []majordomo.Message{majordomo.UserText("Facts about Tokyo.")}, + }) + if err != nil { + return "", err + } + if !strings.EqualFold(facts.City, "tokyo") || facts.Population < 1_000_000 { + return "", fmt.Errorf("implausible decode: %+v", facts) + } + return fmt.Sprintf("decoded: %+v", facts), nil + }) + + check("5. forced failover: dead head -> retry -> bench -> live tail", func() (string, error) { + c, cancel := withTimeout(4 * time.Minute) + defer cancel() + m, err := reg.Parse("dead/anything,workhorse") + if err != nil { + return "", err + } + resp, err := m.Generate(c, majordomo.Request{ + Messages: []majordomo.Message{majordomo.UserText("Say OK.")}, + }) + if err != nil { + return "", fmt.Errorf("first request: %w", err) + } + if !strings.HasPrefix(resp.Model, "ollama-cloud/") { + return "", fmt.Errorf("expected a cloud target to serve, got %s", resp.Model) + } + if reg.Health().Available("dead/anything") { + return "", fmt.Errorf("dead head should be benched after repeated transient failures") + } + // Second request: the benched head must be skipped without a dial. + resp2, err := m.Generate(c, majordomo.Request{ + Messages: []majordomo.Message{majordomo.UserText("Say OK again.")}, + }) + if err != nil { + return "", fmt.Errorf("second request: %w", err) + } + return fmt.Sprintf("first served by %s after head retry+bench\nhead benched: yes\nsecond served by %s (head skipped while benched)", + resp.Model, resp2.Model), nil + }) + + check("6. agent with a skill attached", func() (string, error) { + c, cancel := withTimeout(3 * time.Minute) + defer cancel() + m, err := reg.Parse("workhorse") + if err != nil { + return "", err + } + a := agent.New(m, "You are precise.", agent.WithMaxSteps(4)) + a.AddSkill(calc.New()) + + res, err := a.Run(c, "Compute 1337*42+7 with your calculate tool, then answer with just the number.") + if err != nil { + return "", err + } + var usedCalc bool + for _, step := range res.Steps { + for _, r := range step.Results { + if r.Name == "calculate" && !r.IsError { + usedCalc = true + } + } + } + if !usedCalc { + return "", fmt.Errorf("calculate was never invoked; answer: %q", res.Output) + } + if !strings.Contains(res.Output, "56161") { + return "", fmt.Errorf("wrong answer %q (want 56161)", res.Output) + } + return fmt.Sprintf("calculate invoked: yes\nanswer: %s", strings.TrimSpace(res.Output)), nil + }) + + fmt.Printf("\n=== live validation: %d failed ===\n", failed) + if failed > 0 { + os.Exit(1) + } +} + +// loadDotEnv reads KEY=VALUE lines into the environment without +// overriding variables that are already set. Quiet on a missing file. +func loadDotEnv(path string) { + f, err := os.Open(path) + if err != nil { + return + } + defer f.Close() + sc := bufio.NewScanner(f) + for sc.Scan() { + line := strings.TrimSpace(sc.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + if k, v, ok := strings.Cut(line, "="); ok { + k = strings.TrimSpace(k) + if os.Getenv(k) == "" { + os.Setenv(k, strings.Trim(strings.TrimSpace(v), `"'`)) + } + } + } + _ = sc.Err() // best-effort loader; a truncated .env just loads less +} diff --git a/progress.md b/progress.md index 6e974e0..78f327a 100644 --- a/progress.md +++ b/progress.md @@ -1,5 +1,32 @@ # progress +## 2026-06-10 — Phase 8: live validation against real Ollama Cloud + +**All six checks PASS** (examples/live harness, OLLAMA_API_KEY from .env): +1. Tier aliases (`thinking` = minimax-m3:cloud→kimi-k2.6:cloud, + `workhorse` = minimax-m2.7:cloud→qwen3-coder:480b-cloud) resolve via + Parse, incl. as a trailing chain element. +2. Plain chat served by ollama-cloud/minimax-m3:cloud (189 in/48 out). +3. Live tool call: the workhorse agent actually invoked get_launch_code + and answered from its result in 2 steps. +4. Structured Generate[T] decoded {City:Tokyo Country:Japan + Population:14000000 Latitude:35.6762}. +5. Forced failover: an unreachable head (connection refused = transient) + was retried, benched, and fell through to a live cloud tail; the second + request skipped the benched head without dialing it. +6. Agent with the calc skill attached invoked calculate and answered + 56161. + +**Discovery + fix:** Ollama Cloud ignores the `format` field entirely +(verified with raw curl — markdown came back despite a schema). The +ollama provider now also states the schema as an explicit system +instruction (local stays constrained-decoded; cloud becomes +instruction-guided); hermetic test added. The `:cloud`-suffixed model +names work verbatim against ollama.com — mort's tier strings carry over +unchanged. + +**Next:** Phase 9 — convert mort onto majordomo, open the PR. + ## 2026-06-10 — Phase 7: examples, migration blueprint, README finalization **Landed:** `examples/` — nine runnable programs, one per hard requirement diff --git a/provider/ollama/ollama_test.go b/provider/ollama/ollama_test.go index 197adeb..2eb4b64 100644 --- a/provider/ollama/ollama_test.go +++ b/provider/ollama/ollama_test.go @@ -235,6 +235,14 @@ func TestStructuredOutputFormat(t *testing.T) { if resp.Text() != `{"name":"Ada"}` { t.Errorf("text = %q", resp.Text()) } + // Ollama Cloud ignores "format", so the schema must also be stated as + // a system instruction. + msgs := cap.body["messages"].([]any) + sys := msgs[0].(map[string]any) + if sys["role"] != "system" || !strings.Contains(sys["content"].(string), `"name"`) || + !strings.Contains(sys["content"].(string), "JSON Schema") { + t.Errorf("system fold must carry the schema instruction, got %v", sys) + } } func TestThinkMapping(t *testing.T) { diff --git a/provider/ollama/wire.go b/provider/ollama/wire.go index 72ad8f0..2eedda1 100644 --- a/provider/ollama/wire.go +++ b/provider/ollama/wire.go @@ -138,6 +138,14 @@ func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) } } } + if len(req.Schema) > 0 { + // Belt and braces: local Ollama enforces the "format" schema by + // constrained decoding, but Ollama Cloud ignores the field + // (verified live 2026-06-10) — so the schema is also stated as an + // explicit instruction. Harmless where format works, essential + // where it doesn't. + sys = append(sys, "Respond with a single JSON object that validates against this JSON Schema — no markdown, no code fences, no prose before or after the JSON:\n"+string(req.Schema)) + } if len(sys) > 0 { out.Messages = append(out.Messages, chatMessage{ Role: "system", Content: strings.Join(sys, "\n\n"),