feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
package majordomo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/png"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -325,6 +329,95 @@ func TestSingleTargetGetsChainSemantics(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// pngImage encodes a width×height PNG for media tests.
|
||||
func pngImage(t *testing.T, width, height int) []byte {
|
||||
t.Helper()
|
||||
img := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||
for y := range height {
|
||||
for x := range width {
|
||||
img.Set(x, y, color.RGBA{R: uint8(x), G: uint8(y), B: 128, A: 255})
|
||||
}
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := png.Encode(&buf, img); err != nil {
|
||||
t.Fatalf("encode png: %v", err)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// TestChainNormalizesMediaPerTarget: the request's image is downscaled to
|
||||
// the capabilities of the target that actually serves it.
|
||||
func TestChainNormalizesMediaPerTarget(t *testing.T) {
|
||||
r := newTestRegistry(t)
|
||||
fp := fake.New("fp",
|
||||
fake.WithModelCapabilities("small-vision", llm.Capabilities{
|
||||
MaxImagesPerReq: 2,
|
||||
MaxImageDimension: 32,
|
||||
AllowedImageMIME: []string{"image/png"},
|
||||
}),
|
||||
)
|
||||
r.RegisterProvider(fp)
|
||||
|
||||
m, _ := r.Parse("fp/small-vision")
|
||||
_, err := m.Generate(context.Background(), Request{Messages: []Message{
|
||||
UserParts(Text("describe"), Image("image/png", pngImage(t, 100, 50))),
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
|
||||
calls := fp.Calls()
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("calls = %d", len(calls))
|
||||
}
|
||||
var img llm.ImagePart
|
||||
for _, part := range calls[0].Request.Messages[0].Parts {
|
||||
if ip, ok := part.(llm.ImagePart); ok {
|
||||
img = ip
|
||||
}
|
||||
}
|
||||
if img.Data == nil {
|
||||
t.Fatal("no image reached the provider")
|
||||
}
|
||||
cfg, err := png.DecodeConfig(bytes.NewReader(img.Data))
|
||||
if err != nil {
|
||||
t.Fatalf("decode delivered image: %v", err)
|
||||
}
|
||||
if cfg.Width != 32 || cfg.Height != 16 {
|
||||
t.Errorf("delivered image = %dx%d, want 32x16 (downscaled to target cap)", cfg.Width, cfg.Height)
|
||||
}
|
||||
}
|
||||
|
||||
// TestChainAdvancesPastImagelessTarget: a text-only head can't take an
|
||||
// image request; the chain advances to a vision-capable element with no
|
||||
// health penalty.
|
||||
func TestChainAdvancesPastImagelessTarget(t *testing.T) {
|
||||
r := newTestRegistry(t)
|
||||
fp := fake.New("fp",
|
||||
fake.WithModelCapabilities("text-only", llm.Capabilities{SupportsTools: true}),
|
||||
fake.WithModelCapabilities("vision", llm.Capabilities{MaxImagesPerReq: 4}),
|
||||
)
|
||||
r.RegisterProvider(fp)
|
||||
fp.Enqueue("vision", fake.Reply("a tasteful png"))
|
||||
|
||||
m, _ := r.Parse("fp/text-only,fp/vision")
|
||||
resp, err := m.Generate(context.Background(), Request{Messages: []Message{
|
||||
UserParts(Text("what is this?"), Image("image/png", pngImage(t, 8, 8))),
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if resp.Text() != "a tasteful png" {
|
||||
t.Errorf("text = %q", resp.Text())
|
||||
}
|
||||
if got := fp.CallCount("text-only"); got != 0 {
|
||||
t.Errorf("text-only target saw %d calls, want 0 (normalization rejects pre-send)", got)
|
||||
}
|
||||
if !r.Health().Available("fp/text-only") {
|
||||
t.Error("media rejection must not penalize health")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHTTP529ClassifiedTransient: Anthropic's "overloaded" status fails
|
||||
// over like any other transient error.
|
||||
func TestHTTP529FailsOver(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user