feat: live-validated against Ollama Cloud; schema instruction fallback for cloud

Phase 8: all six live checks pass (tier aliases, thinking-tier chat, real
tool invocation, structured Generate[T], forced failover with bench+skip,
skill agent). Discovery: ollama.com ignores the format field — the
provider now also states the schema as a system instruction (constrained
decoding locally, instruction-guided JSON on cloud), with hermetic test.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:22:54 +02:00
parent 97513141dc
commit 04b21fdad2
5 changed files with 299 additions and 1 deletions
+251
View File
@@ -0,0 +1,251 @@
// Command live is the Phase 8 live-validation harness: it proves majordomo
// end to end against real Ollama Cloud models before the library goes near
// mort. It needs OLLAMA_API_KEY (loaded from ./.env when present) and is
// NOT part of the hermetic suite — run it manually:
//
// go run ./examples/live
//
// Checks: tier aliases resolve; plain chat (thinking tier); a tool call
// the model actually invokes; structured Generate[T]; a forced failover
// chain (dead head → retry → bench → fall through, then skip-on-second-
// request); and an agent with a skill attached. If a model tag is
// unavailable it falls back to the tier's alternates (that's what chains
// do) and the harness notes which model served each check.
package main
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/majordomo"
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/ollama"
"gitea.stevedudenhoeffer.com/steve/majordomo/skill/calc"
)
const (
thinkingSpec = "ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud"
workhorseSpec = "ollama-cloud/minimax-m2.7:cloud,ollama-cloud/qwen3-coder:480b-cloud"
)
func main() {
loadDotEnv(".env")
if os.Getenv("OLLAMA_API_KEY") == "" {
fmt.Println("FATAL: OLLAMA_API_KEY not set (and no .env found) — cannot run live validation")
os.Exit(1)
}
reg := majordomo.New()
reg.RegisterAlias("thinking", thinkingSpec)
reg.RegisterAlias("workhorse", workhorseSpec)
// A provider that can never answer: connection-refused is a transient
// error, so it exercises retry + bench + failover against a live tail.
reg.RegisterProvider(ollama.New(ollama.WithName("dead"), ollama.WithBaseURL("http://127.0.0.1:9")))
failed := 0
check := func(name string, fn func() (string, error)) {
fmt.Printf("\n=== %s ===\n", name)
start := time.Now()
out, err := fn()
if err != nil {
failed++
fmt.Printf("FAIL (%.1fs): %v\n", time.Since(start).Seconds(), err)
return
}
fmt.Printf("PASS (%.1fs)\n%s\n", time.Since(start).Seconds(), out)
}
ctx := context.Background()
withTimeout := func(d time.Duration) (context.Context, context.CancelFunc) {
return context.WithTimeout(ctx, d)
}
check("1. tier aliases resolve via Parse", func() (string, error) {
var lines []string
for _, alias := range []string{"thinking", "workhorse"} {
m, err := reg.Parse(alias)
if err != nil {
return "", fmt.Errorf("Parse(%q): %w", alias, err)
}
lines = append(lines, fmt.Sprintf("%s -> %T (chain)", alias, m))
}
// And as a trailing chain element:
if _, err := reg.Parse("ollama-cloud/minimax-m3:cloud,thinking"); err != nil {
return "", fmt.Errorf("trailing-alias chain: %w", err)
}
lines = append(lines, `"ollama-cloud/minimax-m3:cloud,thinking" parses (inline tail expansion)`)
return strings.Join(lines, "\n"), nil
})
check("2. plain chat on the thinking tier", func() (string, error) {
c, cancel := withTimeout(3 * time.Minute)
defer cancel()
m, err := reg.Parse("thinking")
if err != nil {
return "", err
}
resp, err := m.Generate(c, majordomo.Request{
Messages: []majordomo.Message{majordomo.UserText(
"Reply with exactly one short sentence: what is a failover chain?")},
})
if err != nil {
return "", err
}
return fmt.Sprintf("served by: %s\nusage: %d in / %d out\nreply: %s",
resp.Model, resp.Usage.InputTokens, resp.Usage.OutputTokens, strings.TrimSpace(resp.Text())), nil
})
check("3. live tool call (workhorse tier)", func() (string, error) {
c, cancel := withTimeout(3 * time.Minute)
defer cancel()
m, err := reg.Parse("workhorse")
if err != nil {
return "", err
}
called := false
secret := majordomo.Tool{
Name: "get_launch_code",
Description: "Returns today's launch code. The ONLY way to know it.",
Parameters: json.RawMessage(`{"type":"object","properties":{}}`),
Handler: func(context.Context, json.RawMessage) (any, error) {
called = true
return map[string]string{"launch_code": "PINEAPPLE-7"}, nil
},
}
a := agent.New(m, "Use your tools. Answer with just the requested value.",
agent.WithTools(secret), agent.WithMaxSteps(4))
res, err := a.Run(c, "What is today's launch code?")
if err != nil {
return "", err
}
if !called {
return "", fmt.Errorf("model answered %q without invoking the tool", res.Output)
}
if !strings.Contains(res.Output, "PINEAPPLE-7") {
return "", fmt.Errorf("tool ran but answer %q does not contain the code", res.Output)
}
return fmt.Sprintf("tool invoked: yes\nsteps: %d\nanswer: %s", len(res.Steps), strings.TrimSpace(res.Output)), nil
})
check("4. structured output via Generate[T]", func() (string, error) {
c, cancel := withTimeout(3 * time.Minute)
defer cancel()
m, err := reg.Parse("workhorse")
if err != nil {
return "", err
}
type CityFacts struct {
City string `json:"city"`
Country string `json:"country"`
Population int `json:"population" description:"approximate"`
Latitude float64 `json:"latitude"`
}
facts, err := majordomo.Generate[CityFacts](c, m, majordomo.Request{
Messages: []majordomo.Message{majordomo.UserText("Facts about Tokyo.")},
})
if err != nil {
return "", err
}
if !strings.EqualFold(facts.City, "tokyo") || facts.Population < 1_000_000 {
return "", fmt.Errorf("implausible decode: %+v", facts)
}
return fmt.Sprintf("decoded: %+v", facts), nil
})
check("5. forced failover: dead head -> retry -> bench -> live tail", func() (string, error) {
c, cancel := withTimeout(4 * time.Minute)
defer cancel()
m, err := reg.Parse("dead/anything,workhorse")
if err != nil {
return "", err
}
resp, err := m.Generate(c, majordomo.Request{
Messages: []majordomo.Message{majordomo.UserText("Say OK.")},
})
if err != nil {
return "", fmt.Errorf("first request: %w", err)
}
if !strings.HasPrefix(resp.Model, "ollama-cloud/") {
return "", fmt.Errorf("expected a cloud target to serve, got %s", resp.Model)
}
if reg.Health().Available("dead/anything") {
return "", fmt.Errorf("dead head should be benched after repeated transient failures")
}
// Second request: the benched head must be skipped without a dial.
resp2, err := m.Generate(c, majordomo.Request{
Messages: []majordomo.Message{majordomo.UserText("Say OK again.")},
})
if err != nil {
return "", fmt.Errorf("second request: %w", err)
}
return fmt.Sprintf("first served by %s after head retry+bench\nhead benched: yes\nsecond served by %s (head skipped while benched)",
resp.Model, resp2.Model), nil
})
check("6. agent with a skill attached", func() (string, error) {
c, cancel := withTimeout(3 * time.Minute)
defer cancel()
m, err := reg.Parse("workhorse")
if err != nil {
return "", err
}
a := agent.New(m, "You are precise.", agent.WithMaxSteps(4))
a.AddSkill(calc.New())
res, err := a.Run(c, "Compute 1337*42+7 with your calculate tool, then answer with just the number.")
if err != nil {
return "", err
}
var usedCalc bool
for _, step := range res.Steps {
for _, r := range step.Results {
if r.Name == "calculate" && !r.IsError {
usedCalc = true
}
}
}
if !usedCalc {
return "", fmt.Errorf("calculate was never invoked; answer: %q", res.Output)
}
if !strings.Contains(res.Output, "56161") {
return "", fmt.Errorf("wrong answer %q (want 56161)", res.Output)
}
return fmt.Sprintf("calculate invoked: yes\nanswer: %s", strings.TrimSpace(res.Output)), nil
})
fmt.Printf("\n=== live validation: %d failed ===\n", failed)
if failed > 0 {
os.Exit(1)
}
}
// loadDotEnv reads KEY=VALUE lines into the environment without
// overriding variables that are already set. Quiet on a missing file.
func loadDotEnv(path string) {
f, err := os.Open(path)
if err != nil {
return
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if k, v, ok := strings.Cut(line, "="); ok {
k = strings.TrimSpace(k)
if os.Getenv(k) == "" {
os.Setenv(k, strings.Trim(strings.TrimSpace(v), `"'`))
}
}
}
_ = sc.Err() // best-effort loader; a truncated .env just loads less
}