feat: live-validated against Ollama Cloud; schema instruction fallback for cloud
Phase 8: all six live checks pass (tier aliases, thinking-tier chat, real tool invocation, structured Generate[T], forced failover with bench+skip, skill agent). Discovery: ollama.com ignores the format field — the provider now also states the schema as a system instruction (constrained decoding locally, instruction-guided JSON on cloud), with hermetic test. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -281,7 +281,11 @@ streaming); majordomo's stream API works against it and delivers the
|
|||||||
response as a single delta plus final event.
|
response as a single delta plus final event.
|
||||||
|
|
||||||
Notes: Ollama has no native tool_choice — `"none"` drops the tools;
|
Notes: Ollama has no native tool_choice — `"none"` drops the tools;
|
||||||
`"required"`/named choices are best-effort ignored there.
|
`"required"`/named choices are best-effort ignored there. Ollama Cloud
|
||||||
|
ignores the `format` field (verified live), so the provider also states
|
||||||
|
the schema as an explicit system instruction — constrained decoding on
|
||||||
|
local Ollama, instruction-guided JSON on cloud, one canonical API either
|
||||||
|
way.
|
||||||
|
|
||||||
Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ ·
|
Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ ·
|
||||||
health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline ✅
|
health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline ✅
|
||||||
|
|||||||
@@ -0,0 +1,251 @@
|
|||||||
|
// Command live is the Phase 8 live-validation harness: it proves majordomo
|
||||||
|
// end to end against real Ollama Cloud models before the library goes near
|
||||||
|
// mort. It needs OLLAMA_API_KEY (loaded from ./.env when present) and is
|
||||||
|
// NOT part of the hermetic suite — run it manually:
|
||||||
|
//
|
||||||
|
// go run ./examples/live
|
||||||
|
//
|
||||||
|
// Checks: tier aliases resolve; plain chat (thinking tier); a tool call
|
||||||
|
// the model actually invokes; structured Generate[T]; a forced failover
|
||||||
|
// chain (dead head → retry → bench → fall through, then skip-on-second-
|
||||||
|
// request); and an agent with a skill attached. If a model tag is
|
||||||
|
// unavailable it falls back to the tier's alternates (that's what chains
|
||||||
|
// do) and the harness notes which model served each check.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/ollama"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/skill/calc"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
thinkingSpec = "ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud"
|
||||||
|
workhorseSpec = "ollama-cloud/minimax-m2.7:cloud,ollama-cloud/qwen3-coder:480b-cloud"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadDotEnv(".env")
|
||||||
|
if os.Getenv("OLLAMA_API_KEY") == "" {
|
||||||
|
fmt.Println("FATAL: OLLAMA_API_KEY not set (and no .env found) — cannot run live validation")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
reg := majordomo.New()
|
||||||
|
reg.RegisterAlias("thinking", thinkingSpec)
|
||||||
|
reg.RegisterAlias("workhorse", workhorseSpec)
|
||||||
|
// A provider that can never answer: connection-refused is a transient
|
||||||
|
// error, so it exercises retry + bench + failover against a live tail.
|
||||||
|
reg.RegisterProvider(ollama.New(ollama.WithName("dead"), ollama.WithBaseURL("http://127.0.0.1:9")))
|
||||||
|
|
||||||
|
failed := 0
|
||||||
|
check := func(name string, fn func() (string, error)) {
|
||||||
|
fmt.Printf("\n=== %s ===\n", name)
|
||||||
|
start := time.Now()
|
||||||
|
out, err := fn()
|
||||||
|
if err != nil {
|
||||||
|
failed++
|
||||||
|
fmt.Printf("FAIL (%.1fs): %v\n", time.Since(start).Seconds(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf("PASS (%.1fs)\n%s\n", time.Since(start).Seconds(), out)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
withTimeout := func(d time.Duration) (context.Context, context.CancelFunc) {
|
||||||
|
return context.WithTimeout(ctx, d)
|
||||||
|
}
|
||||||
|
|
||||||
|
check("1. tier aliases resolve via Parse", func() (string, error) {
|
||||||
|
var lines []string
|
||||||
|
for _, alias := range []string{"thinking", "workhorse"} {
|
||||||
|
m, err := reg.Parse(alias)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("Parse(%q): %w", alias, err)
|
||||||
|
}
|
||||||
|
lines = append(lines, fmt.Sprintf("%s -> %T (chain)", alias, m))
|
||||||
|
}
|
||||||
|
// And as a trailing chain element:
|
||||||
|
if _, err := reg.Parse("ollama-cloud/minimax-m3:cloud,thinking"); err != nil {
|
||||||
|
return "", fmt.Errorf("trailing-alias chain: %w", err)
|
||||||
|
}
|
||||||
|
lines = append(lines, `"ollama-cloud/minimax-m3:cloud,thinking" parses (inline tail expansion)`)
|
||||||
|
return strings.Join(lines, "\n"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
check("2. plain chat on the thinking tier", func() (string, error) {
|
||||||
|
c, cancel := withTimeout(3 * time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
m, err := reg.Parse("thinking")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
resp, err := m.Generate(c, majordomo.Request{
|
||||||
|
Messages: []majordomo.Message{majordomo.UserText(
|
||||||
|
"Reply with exactly one short sentence: what is a failover chain?")},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("served by: %s\nusage: %d in / %d out\nreply: %s",
|
||||||
|
resp.Model, resp.Usage.InputTokens, resp.Usage.OutputTokens, strings.TrimSpace(resp.Text())), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
check("3. live tool call (workhorse tier)", func() (string, error) {
|
||||||
|
c, cancel := withTimeout(3 * time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
m, err := reg.Parse("workhorse")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
called := false
|
||||||
|
secret := majordomo.Tool{
|
||||||
|
Name: "get_launch_code",
|
||||||
|
Description: "Returns today's launch code. The ONLY way to know it.",
|
||||||
|
Parameters: json.RawMessage(`{"type":"object","properties":{}}`),
|
||||||
|
Handler: func(context.Context, json.RawMessage) (any, error) {
|
||||||
|
called = true
|
||||||
|
return map[string]string{"launch_code": "PINEAPPLE-7"}, nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
a := agent.New(m, "Use your tools. Answer with just the requested value.",
|
||||||
|
agent.WithTools(secret), agent.WithMaxSteps(4))
|
||||||
|
res, err := a.Run(c, "What is today's launch code?")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if !called {
|
||||||
|
return "", fmt.Errorf("model answered %q without invoking the tool", res.Output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(res.Output, "PINEAPPLE-7") {
|
||||||
|
return "", fmt.Errorf("tool ran but answer %q does not contain the code", res.Output)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("tool invoked: yes\nsteps: %d\nanswer: %s", len(res.Steps), strings.TrimSpace(res.Output)), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
check("4. structured output via Generate[T]", func() (string, error) {
|
||||||
|
c, cancel := withTimeout(3 * time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
m, err := reg.Parse("workhorse")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
type CityFacts struct {
|
||||||
|
City string `json:"city"`
|
||||||
|
Country string `json:"country"`
|
||||||
|
Population int `json:"population" description:"approximate"`
|
||||||
|
Latitude float64 `json:"latitude"`
|
||||||
|
}
|
||||||
|
facts, err := majordomo.Generate[CityFacts](c, m, majordomo.Request{
|
||||||
|
Messages: []majordomo.Message{majordomo.UserText("Facts about Tokyo.")},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if !strings.EqualFold(facts.City, "tokyo") || facts.Population < 1_000_000 {
|
||||||
|
return "", fmt.Errorf("implausible decode: %+v", facts)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("decoded: %+v", facts), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
check("5. forced failover: dead head -> retry -> bench -> live tail", func() (string, error) {
|
||||||
|
c, cancel := withTimeout(4 * time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
m, err := reg.Parse("dead/anything,workhorse")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
resp, err := m.Generate(c, majordomo.Request{
|
||||||
|
Messages: []majordomo.Message{majordomo.UserText("Say OK.")},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("first request: %w", err)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(resp.Model, "ollama-cloud/") {
|
||||||
|
return "", fmt.Errorf("expected a cloud target to serve, got %s", resp.Model)
|
||||||
|
}
|
||||||
|
if reg.Health().Available("dead/anything") {
|
||||||
|
return "", fmt.Errorf("dead head should be benched after repeated transient failures")
|
||||||
|
}
|
||||||
|
// Second request: the benched head must be skipped without a dial.
|
||||||
|
resp2, err := m.Generate(c, majordomo.Request{
|
||||||
|
Messages: []majordomo.Message{majordomo.UserText("Say OK again.")},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("second request: %w", err)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("first served by %s after head retry+bench\nhead benched: yes\nsecond served by %s (head skipped while benched)",
|
||||||
|
resp.Model, resp2.Model), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
check("6. agent with a skill attached", func() (string, error) {
|
||||||
|
c, cancel := withTimeout(3 * time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
m, err := reg.Parse("workhorse")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
a := agent.New(m, "You are precise.", agent.WithMaxSteps(4))
|
||||||
|
a.AddSkill(calc.New())
|
||||||
|
|
||||||
|
res, err := a.Run(c, "Compute 1337*42+7 with your calculate tool, then answer with just the number.")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
var usedCalc bool
|
||||||
|
for _, step := range res.Steps {
|
||||||
|
for _, r := range step.Results {
|
||||||
|
if r.Name == "calculate" && !r.IsError {
|
||||||
|
usedCalc = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !usedCalc {
|
||||||
|
return "", fmt.Errorf("calculate was never invoked; answer: %q", res.Output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(res.Output, "56161") {
|
||||||
|
return "", fmt.Errorf("wrong answer %q (want 56161)", res.Output)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("calculate invoked: yes\nanswer: %s", strings.TrimSpace(res.Output)), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
fmt.Printf("\n=== live validation: %d failed ===\n", failed)
|
||||||
|
if failed > 0 {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadDotEnv reads KEY=VALUE lines into the environment without
|
||||||
|
// overriding variables that are already set. Quiet on a missing file.
|
||||||
|
func loadDotEnv(path string) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
line := strings.TrimSpace(sc.Text())
|
||||||
|
if line == "" || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if k, v, ok := strings.Cut(line, "="); ok {
|
||||||
|
k = strings.TrimSpace(k)
|
||||||
|
if os.Getenv(k) == "" {
|
||||||
|
os.Setenv(k, strings.Trim(strings.TrimSpace(v), `"'`))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = sc.Err() // best-effort loader; a truncated .env just loads less
|
||||||
|
}
|
||||||
+27
@@ -1,5 +1,32 @@
|
|||||||
# progress
|
# progress
|
||||||
|
|
||||||
|
## 2026-06-10 — Phase 8: live validation against real Ollama Cloud
|
||||||
|
|
||||||
|
**All six checks PASS** (examples/live harness, OLLAMA_API_KEY from .env):
|
||||||
|
1. Tier aliases (`thinking` = minimax-m3:cloud→kimi-k2.6:cloud,
|
||||||
|
`workhorse` = minimax-m2.7:cloud→qwen3-coder:480b-cloud) resolve via
|
||||||
|
Parse, incl. as a trailing chain element.
|
||||||
|
2. Plain chat served by ollama-cloud/minimax-m3:cloud (189 in/48 out).
|
||||||
|
3. Live tool call: the workhorse agent actually invoked get_launch_code
|
||||||
|
and answered from its result in 2 steps.
|
||||||
|
4. Structured Generate[T] decoded {City:Tokyo Country:Japan
|
||||||
|
Population:14000000 Latitude:35.6762}.
|
||||||
|
5. Forced failover: an unreachable head (connection refused = transient)
|
||||||
|
was retried, benched, and fell through to a live cloud tail; the second
|
||||||
|
request skipped the benched head without dialing it.
|
||||||
|
6. Agent with the calc skill attached invoked calculate and answered
|
||||||
|
56161.
|
||||||
|
|
||||||
|
**Discovery + fix:** Ollama Cloud ignores the `format` field entirely
|
||||||
|
(verified with raw curl — markdown came back despite a schema). The
|
||||||
|
ollama provider now also states the schema as an explicit system
|
||||||
|
instruction (local stays constrained-decoded; cloud becomes
|
||||||
|
instruction-guided); hermetic test added. The `:cloud`-suffixed model
|
||||||
|
names work verbatim against ollama.com — mort's tier strings carry over
|
||||||
|
unchanged.
|
||||||
|
|
||||||
|
**Next:** Phase 9 — convert mort onto majordomo, open the PR.
|
||||||
|
|
||||||
## 2026-06-10 — Phase 7: examples, migration blueprint, README finalization
|
## 2026-06-10 — Phase 7: examples, migration blueprint, README finalization
|
||||||
|
|
||||||
**Landed:** `examples/` — nine runnable programs, one per hard requirement
|
**Landed:** `examples/` — nine runnable programs, one per hard requirement
|
||||||
|
|||||||
@@ -235,6 +235,14 @@ func TestStructuredOutputFormat(t *testing.T) {
|
|||||||
if resp.Text() != `{"name":"Ada"}` {
|
if resp.Text() != `{"name":"Ada"}` {
|
||||||
t.Errorf("text = %q", resp.Text())
|
t.Errorf("text = %q", resp.Text())
|
||||||
}
|
}
|
||||||
|
// Ollama Cloud ignores "format", so the schema must also be stated as
|
||||||
|
// a system instruction.
|
||||||
|
msgs := cap.body["messages"].([]any)
|
||||||
|
sys := msgs[0].(map[string]any)
|
||||||
|
if sys["role"] != "system" || !strings.Contains(sys["content"].(string), `"name"`) ||
|
||||||
|
!strings.Contains(sys["content"].(string), "JSON Schema") {
|
||||||
|
t.Errorf("system fold must carry the schema instruction, got %v", sys)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestThinkMapping(t *testing.T) {
|
func TestThinkMapping(t *testing.T) {
|
||||||
|
|||||||
@@ -138,6 +138,14 @@ func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if len(req.Schema) > 0 {
|
||||||
|
// Belt and braces: local Ollama enforces the "format" schema by
|
||||||
|
// constrained decoding, but Ollama Cloud ignores the field
|
||||||
|
// (verified live 2026-06-10) — so the schema is also stated as an
|
||||||
|
// explicit instruction. Harmless where format works, essential
|
||||||
|
// where it doesn't.
|
||||||
|
sys = append(sys, "Respond with a single JSON object that validates against this JSON Schema — no markdown, no code fences, no prose before or after the JSON:\n"+string(req.Schema))
|
||||||
|
}
|
||||||
if len(sys) > 0 {
|
if len(sys) > 0 {
|
||||||
out.Messages = append(out.Messages, chatMessage{
|
out.Messages = append(out.Messages, chatMessage{
|
||||||
Role: "system", Content: strings.Join(sys, "\n\n"),
|
Role: "system", Content: strings.Join(sys, "\n\n"),
|
||||||
|
|||||||
Reference in New Issue
Block a user