Files
majordomo/provider/openai/model.go
T
steve 0147a79d18
CI / Tidy (push) Successful in 9m31s
CI / Build & Test (push) Successful in 10m13s
feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers;
DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired
through anthropic/openai/google; WithPromptCaching (Anthropic
cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer,
WithCompactor, WithToolErrorLimits + ErrToolLoop); health
Bench/Unbench/Snapshot; ChainConfig.Observer failover events.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:30:06 +02:00

220 lines
7.0 KiB
Go

package openai
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// model is one provider-bound target.
type model struct {
p *Provider
id string
caps llm.Capabilities
}
// Capabilities implements llm.Model.
func (m *model) Capabilities() llm.Capabilities { return m.caps }
// Generate implements llm.Model.
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
req = req.Apply(opts...)
if err := checkRequest(m.caps, req); err != nil {
return nil, err
}
httpResp, err := m.do(ctx, req, false)
if err != nil {
return nil, err
}
defer httpResp.Body.Close()
if httpResp.StatusCode/100 != 2 {
return nil, m.apiError(httpResp)
}
var wire chatResponse
if err := json.NewDecoder(httpResp.Body).Decode(&wire); err != nil {
return nil, fmt.Errorf("openai: decode response: %w", err)
}
return m.toResponse(&wire), nil
}
// Stream implements llm.Model.
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
req = req.Apply(opts...)
if !m.caps.SupportsStreaming {
return nil, fmt.Errorf("%w: streaming not supported by %s/%s", llm.ErrUnsupported, m.p.name, m.id)
}
if err := checkRequest(m.caps, req); err != nil {
return nil, err
}
httpResp, err := m.do(ctx, req, true)
if err != nil {
return nil, err
}
if httpResp.StatusCode/100 != 2 {
defer httpResp.Body.Close()
return nil, m.apiError(httpResp)
}
sc := bufio.NewScanner(httpResp.Body)
// Why: a single SSE data line carries a whole JSON chunk; tool-call
// argument fragments can make lines far larger than Scanner's 64 KiB
// default cap.
sc.Buffer(make([]byte, 0, 64*1024), 16<<20)
return &stream{m: m, body: httpResp.Body, sc: sc}, nil
}
// do builds and performs the HTTP request. Transport failures are wrapped
// raw (never as *llm.APIError) so llm.Classify still sees net.Error,
// syscall errnos, and context errors underneath.
func (m *model) do(ctx context.Context, req llm.Request, stream bool) (*http.Response, error) {
if m.p.apiKey == "" {
// Why a synthetic 401: the constructor never fails, so a missing
// key must surface at request time as the auth failure it is —
// permanent under llm.Classify, like a real 401.
return nil, &llm.APIError{
Provider: m.p.name,
Model: m.id,
Status: http.StatusUnauthorized,
Code: "missing_api_key",
Message: "no API key configured: set OPENAI_API_KEY or use WithAPIKey",
}
}
body, err := json.Marshal(m.buildRequest(req, stream))
if err != nil {
return nil, fmt.Errorf("openai: encode request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, m.p.baseURL+"/chat/completions", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("openai: build request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Authorization", "Bearer "+m.p.apiKey)
if stream {
httpReq.Header.Set("Accept", "text/event-stream")
}
httpResp, err := m.p.client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("openai: do request: %w", err)
}
return httpResp, nil
}
// apiError converts a non-2xx response into *llm.APIError, pulling code and
// message from the {"error":{...}} body when it parses.
func (m *model) apiError(httpResp *http.Response) error {
apiErr := &llm.APIError{Provider: m.p.name, Model: m.id, Status: httpResp.StatusCode}
body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 1<<20))
var env errorEnvelope
if err := json.Unmarshal(body, &env); err == nil &&
(env.Error.Message != "" || env.Error.Type != "" || env.Error.Code != "") {
apiErr.Message = env.Error.Message
apiErr.Code = env.Error.Code
if apiErr.Code == "" {
apiErr.Code = env.Error.Type
}
} else {
// Why: compat servers emit all sorts of error bodies; a raw snippet
// beats silence when the canonical envelope is absent.
apiErr.Message = strings.TrimSpace(string(body))
}
return apiErr
}
// toResponse maps the wire response onto the canonical llm.Response.
func (m *model) toResponse(wire *chatResponse) *llm.Response {
resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
if wire.Usage != nil {
resp.Usage = wire.Usage.toUsage()
}
if len(wire.Choices) == 0 {
resp.FinishReason = llm.FinishOther
return resp
}
choice := wire.Choices[0]
if choice.Message.Content != "" {
resp.Parts = append(resp.Parts, llm.TextPart{Text: choice.Message.Content})
}
for i, tc := range choice.Message.ToolCalls {
id := tc.ID
if id == "" {
// Why: ToolResult.ID must echo ToolCall.ID, so calls from compat
// servers that omit ids get synthesized ones.
id = fmt.Sprintf("call_%d", i)
}
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
ID: id,
Name: tc.Function.Name,
Arguments: json.RawMessage(tc.Function.Arguments),
})
}
resp.FinishReason = mapFinish(choice.FinishReason, len(resp.ToolCalls) > 0)
return resp
}
// mapFinish maps a wire finish_reason to the canonical enum. Tool-call
// presence wins over the reported reason: a forced (named tool_choice) call
// can finish with "stop" while still carrying tool_calls.
func mapFinish(reason string, hasToolCalls bool) llm.FinishReason {
if hasToolCalls {
return llm.FinishToolCalls
}
switch reason {
case "stop":
return llm.FinishStop
case "length":
return llm.FinishLength
case "tool_calls":
return llm.FinishToolCalls
case "content_filter":
return llm.FinishContentFilter
default:
return llm.FinishOther
}
}
// checkRequest enforces the model's effective capabilities. Why enforcement
// rather than normalization: a separate media layer resizes/transcodes
// images BEFORE requests reach the provider; this check is the honest
// backstop that refuses, with llm.ErrUnsupported, what the target
// declaredly cannot serve (chains advance past it penalty-free).
func checkRequest(caps llm.Capabilities, req llm.Request) error {
if len(req.Tools) > 0 && !caps.SupportsTools {
return fmt.Errorf("%w: tools not supported", llm.ErrUnsupported)
}
if len(req.Schema) > 0 && !caps.SupportsStructured {
return fmt.Errorf("%w: structured output not supported", llm.ErrUnsupported)
}
images := 0
for _, msg := range req.Messages {
for _, part := range msg.Parts {
img, ok := part.(llm.ImagePart)
if !ok {
continue
}
images++
if !caps.SupportsImages() {
return fmt.Errorf("%w: image input not supported", llm.ErrUnsupported)
}
if !caps.MIMEAllowed(img.MIME) {
return fmt.Errorf("%w: image MIME type %q not allowed (allowed: %s)",
llm.ErrUnsupported, img.MIME, strings.Join(caps.AllowedImageMIME, ", "))
}
if caps.MaxImageBytes > 0 && len(img.Data) > caps.MaxImageBytes {
return fmt.Errorf("%w: image is %d bytes, limit is %d",
llm.ErrUnsupported, len(img.Data), caps.MaxImageBytes)
}
}
}
if images > caps.MaxImagesPerReq {
return fmt.Errorf("%w: request carries %d images, limit is %d",
llm.ErrUnsupported, images, caps.MaxImagesPerReq)
}
return nil
}