feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/health"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/media"
|
||||
)
|
||||
|
||||
// ErrChainExhausted reports that every element of a failover chain failed
|
||||
@@ -65,8 +66,8 @@ func (c *chain) Capabilities() llm.Capabilities {
|
||||
// Generate tries each target per the chain semantics above.
|
||||
func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (*llm.Response, error) {
|
||||
return t.model.Generate(ctx, req)
|
||||
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (*llm.Response, error) {
|
||||
return t.model.Generate(ctx, nreq)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -76,14 +77,17 @@ func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Optio
|
||||
// (replaying half-delivered output would duplicate content).
|
||||
func (c *chain) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
||||
req = req.Apply(opts...)
|
||||
return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (llm.Stream, error) {
|
||||
return t.model.Stream(ctx, req)
|
||||
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (llm.Stream, error) {
|
||||
return t.model.Stream(ctx, nreq)
|
||||
})
|
||||
}
|
||||
|
||||
// chainDo runs the head-to-tail failover algorithm around an attempt
|
||||
// function, generic over the result type (response vs stream).
|
||||
func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context, chainTarget) (T, error)) (T, error) {
|
||||
// function, generic over the result type (response vs stream). Before each
|
||||
// target is tried, the request's media is normalized against THAT target's
|
||||
// capabilities (ADR-0008/0009) — a request that cannot be made to fit one
|
||||
// target advances to the next without a health penalty.
|
||||
func chainDo[T any](ctx context.Context, c *chain, req llm.Request, attempt func(context.Context, chainTarget, llm.Request) (T, error)) (T, error) {
|
||||
var zero T
|
||||
var failures []error
|
||||
|
||||
@@ -94,12 +98,20 @@ func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context,
|
||||
continue
|
||||
}
|
||||
|
||||
nreq, err := media.Normalize(req, t.model.Capabilities())
|
||||
if err != nil {
|
||||
// Always ErrUnsupported-wrapped: this target cannot take the
|
||||
// request by declaration. Advance, no health penalty.
|
||||
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
|
||||
continue
|
||||
}
|
||||
|
||||
retries := c.cfg.retries()
|
||||
for attemptN := 0; ; attemptN++ {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return zero, err
|
||||
}
|
||||
result, err := attempt(ctx, t)
|
||||
result, err := attempt(ctx, t, nreq)
|
||||
if err == nil {
|
||||
c.tracker.ReportSuccess(t.key)
|
||||
return result, nil
|
||||
|
||||
Reference in New Issue
Block a user