feat: live-validated against Ollama Cloud; schema instruction fallback for cloud
Phase 8: all six live checks pass (tier aliases, thinking-tier chat, real tool invocation, structured Generate[T], forced failover with bench+skip, skill agent). Discovery: ollama.com ignores the format field — the provider now also states the schema as a system instruction (constrained decoding locally, instruction-guided JSON on cloud), with hermetic test. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,251 @@
|
||||
// Command live is the Phase 8 live-validation harness: it proves majordomo
|
||||
// end to end against real Ollama Cloud models before the library goes near
|
||||
// mort. It needs OLLAMA_API_KEY (loaded from ./.env when present) and is
|
||||
// NOT part of the hermetic suite — run it manually:
|
||||
//
|
||||
// go run ./examples/live
|
||||
//
|
||||
// Checks: tier aliases resolve; plain chat (thinking tier); a tool call
|
||||
// the model actually invokes; structured Generate[T]; a forced failover
|
||||
// chain (dead head → retry → bench → fall through, then skip-on-second-
|
||||
// request); and an agent with a skill attached. If a model tag is
|
||||
// unavailable it falls back to the tier's alternates (that's what chains
|
||||
// do) and the harness notes which model served each check.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/ollama"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/skill/calc"
|
||||
)
|
||||
|
||||
const (
|
||||
thinkingSpec = "ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud"
|
||||
workhorseSpec = "ollama-cloud/minimax-m2.7:cloud,ollama-cloud/qwen3-coder:480b-cloud"
|
||||
)
|
||||
|
||||
func main() {
|
||||
loadDotEnv(".env")
|
||||
if os.Getenv("OLLAMA_API_KEY") == "" {
|
||||
fmt.Println("FATAL: OLLAMA_API_KEY not set (and no .env found) — cannot run live validation")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
reg := majordomo.New()
|
||||
reg.RegisterAlias("thinking", thinkingSpec)
|
||||
reg.RegisterAlias("workhorse", workhorseSpec)
|
||||
// A provider that can never answer: connection-refused is a transient
|
||||
// error, so it exercises retry + bench + failover against a live tail.
|
||||
reg.RegisterProvider(ollama.New(ollama.WithName("dead"), ollama.WithBaseURL("http://127.0.0.1:9")))
|
||||
|
||||
failed := 0
|
||||
check := func(name string, fn func() (string, error)) {
|
||||
fmt.Printf("\n=== %s ===\n", name)
|
||||
start := time.Now()
|
||||
out, err := fn()
|
||||
if err != nil {
|
||||
failed++
|
||||
fmt.Printf("FAIL (%.1fs): %v\n", time.Since(start).Seconds(), err)
|
||||
return
|
||||
}
|
||||
fmt.Printf("PASS (%.1fs)\n%s\n", time.Since(start).Seconds(), out)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
withTimeout := func(d time.Duration) (context.Context, context.CancelFunc) {
|
||||
return context.WithTimeout(ctx, d)
|
||||
}
|
||||
|
||||
check("1. tier aliases resolve via Parse", func() (string, error) {
|
||||
var lines []string
|
||||
for _, alias := range []string{"thinking", "workhorse"} {
|
||||
m, err := reg.Parse(alias)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Parse(%q): %w", alias, err)
|
||||
}
|
||||
lines = append(lines, fmt.Sprintf("%s -> %T (chain)", alias, m))
|
||||
}
|
||||
// And as a trailing chain element:
|
||||
if _, err := reg.Parse("ollama-cloud/minimax-m3:cloud,thinking"); err != nil {
|
||||
return "", fmt.Errorf("trailing-alias chain: %w", err)
|
||||
}
|
||||
lines = append(lines, `"ollama-cloud/minimax-m3:cloud,thinking" parses (inline tail expansion)`)
|
||||
return strings.Join(lines, "\n"), nil
|
||||
})
|
||||
|
||||
check("2. plain chat on the thinking tier", func() (string, error) {
|
||||
c, cancel := withTimeout(3 * time.Minute)
|
||||
defer cancel()
|
||||
m, err := reg.Parse("thinking")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := m.Generate(c, majordomo.Request{
|
||||
Messages: []majordomo.Message{majordomo.UserText(
|
||||
"Reply with exactly one short sentence: what is a failover chain?")},
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("served by: %s\nusage: %d in / %d out\nreply: %s",
|
||||
resp.Model, resp.Usage.InputTokens, resp.Usage.OutputTokens, strings.TrimSpace(resp.Text())), nil
|
||||
})
|
||||
|
||||
check("3. live tool call (workhorse tier)", func() (string, error) {
|
||||
c, cancel := withTimeout(3 * time.Minute)
|
||||
defer cancel()
|
||||
m, err := reg.Parse("workhorse")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
called := false
|
||||
secret := majordomo.Tool{
|
||||
Name: "get_launch_code",
|
||||
Description: "Returns today's launch code. The ONLY way to know it.",
|
||||
Parameters: json.RawMessage(`{"type":"object","properties":{}}`),
|
||||
Handler: func(context.Context, json.RawMessage) (any, error) {
|
||||
called = true
|
||||
return map[string]string{"launch_code": "PINEAPPLE-7"}, nil
|
||||
},
|
||||
}
|
||||
|
||||
a := agent.New(m, "Use your tools. Answer with just the requested value.",
|
||||
agent.WithTools(secret), agent.WithMaxSteps(4))
|
||||
res, err := a.Run(c, "What is today's launch code?")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !called {
|
||||
return "", fmt.Errorf("model answered %q without invoking the tool", res.Output)
|
||||
}
|
||||
if !strings.Contains(res.Output, "PINEAPPLE-7") {
|
||||
return "", fmt.Errorf("tool ran but answer %q does not contain the code", res.Output)
|
||||
}
|
||||
return fmt.Sprintf("tool invoked: yes\nsteps: %d\nanswer: %s", len(res.Steps), strings.TrimSpace(res.Output)), nil
|
||||
})
|
||||
|
||||
check("4. structured output via Generate[T]", func() (string, error) {
|
||||
c, cancel := withTimeout(3 * time.Minute)
|
||||
defer cancel()
|
||||
m, err := reg.Parse("workhorse")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
type CityFacts struct {
|
||||
City string `json:"city"`
|
||||
Country string `json:"country"`
|
||||
Population int `json:"population" description:"approximate"`
|
||||
Latitude float64 `json:"latitude"`
|
||||
}
|
||||
facts, err := majordomo.Generate[CityFacts](c, m, majordomo.Request{
|
||||
Messages: []majordomo.Message{majordomo.UserText("Facts about Tokyo.")},
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !strings.EqualFold(facts.City, "tokyo") || facts.Population < 1_000_000 {
|
||||
return "", fmt.Errorf("implausible decode: %+v", facts)
|
||||
}
|
||||
return fmt.Sprintf("decoded: %+v", facts), nil
|
||||
})
|
||||
|
||||
check("5. forced failover: dead head -> retry -> bench -> live tail", func() (string, error) {
|
||||
c, cancel := withTimeout(4 * time.Minute)
|
||||
defer cancel()
|
||||
m, err := reg.Parse("dead/anything,workhorse")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := m.Generate(c, majordomo.Request{
|
||||
Messages: []majordomo.Message{majordomo.UserText("Say OK.")},
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("first request: %w", err)
|
||||
}
|
||||
if !strings.HasPrefix(resp.Model, "ollama-cloud/") {
|
||||
return "", fmt.Errorf("expected a cloud target to serve, got %s", resp.Model)
|
||||
}
|
||||
if reg.Health().Available("dead/anything") {
|
||||
return "", fmt.Errorf("dead head should be benched after repeated transient failures")
|
||||
}
|
||||
// Second request: the benched head must be skipped without a dial.
|
||||
resp2, err := m.Generate(c, majordomo.Request{
|
||||
Messages: []majordomo.Message{majordomo.UserText("Say OK again.")},
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("second request: %w", err)
|
||||
}
|
||||
return fmt.Sprintf("first served by %s after head retry+bench\nhead benched: yes\nsecond served by %s (head skipped while benched)",
|
||||
resp.Model, resp2.Model), nil
|
||||
})
|
||||
|
||||
check("6. agent with a skill attached", func() (string, error) {
|
||||
c, cancel := withTimeout(3 * time.Minute)
|
||||
defer cancel()
|
||||
m, err := reg.Parse("workhorse")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
a := agent.New(m, "You are precise.", agent.WithMaxSteps(4))
|
||||
a.AddSkill(calc.New())
|
||||
|
||||
res, err := a.Run(c, "Compute 1337*42+7 with your calculate tool, then answer with just the number.")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var usedCalc bool
|
||||
for _, step := range res.Steps {
|
||||
for _, r := range step.Results {
|
||||
if r.Name == "calculate" && !r.IsError {
|
||||
usedCalc = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !usedCalc {
|
||||
return "", fmt.Errorf("calculate was never invoked; answer: %q", res.Output)
|
||||
}
|
||||
if !strings.Contains(res.Output, "56161") {
|
||||
return "", fmt.Errorf("wrong answer %q (want 56161)", res.Output)
|
||||
}
|
||||
return fmt.Sprintf("calculate invoked: yes\nanswer: %s", strings.TrimSpace(res.Output)), nil
|
||||
})
|
||||
|
||||
fmt.Printf("\n=== live validation: %d failed ===\n", failed)
|
||||
if failed > 0 {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// loadDotEnv reads KEY=VALUE lines into the environment without
|
||||
// overriding variables that are already set. Quiet on a missing file.
|
||||
func loadDotEnv(path string) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
line := strings.TrimSpace(sc.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
if k, v, ok := strings.Cut(line, "="); ok {
|
||||
k = strings.TrimSpace(k)
|
||||
if os.Getenv(k) == "" {
|
||||
os.Setenv(k, strings.Trim(strings.TrimSpace(v), `"'`))
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = sc.Err() // best-effort loader; a truncated .env just loads less
|
||||
}
|
||||
Reference in New Issue
Block a user