0147a79d18
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
220 lines
7.0 KiB
Go
220 lines
7.0 KiB
Go
package openai
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// model is one provider-bound target.
|
|
type model struct {
|
|
p *Provider
|
|
id string
|
|
caps llm.Capabilities
|
|
}
|
|
|
|
// Capabilities implements llm.Model.
|
|
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
|
|
|
// Generate implements llm.Model.
|
|
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
|
req = req.Apply(opts...)
|
|
if err := checkRequest(m.caps, req); err != nil {
|
|
return nil, err
|
|
}
|
|
httpResp, err := m.do(ctx, req, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer httpResp.Body.Close()
|
|
if httpResp.StatusCode/100 != 2 {
|
|
return nil, m.apiError(httpResp)
|
|
}
|
|
var wire chatResponse
|
|
if err := json.NewDecoder(httpResp.Body).Decode(&wire); err != nil {
|
|
return nil, fmt.Errorf("openai: decode response: %w", err)
|
|
}
|
|
return m.toResponse(&wire), nil
|
|
}
|
|
|
|
// Stream implements llm.Model.
|
|
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
|
req = req.Apply(opts...)
|
|
if !m.caps.SupportsStreaming {
|
|
return nil, fmt.Errorf("%w: streaming not supported by %s/%s", llm.ErrUnsupported, m.p.name, m.id)
|
|
}
|
|
if err := checkRequest(m.caps, req); err != nil {
|
|
return nil, err
|
|
}
|
|
httpResp, err := m.do(ctx, req, true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if httpResp.StatusCode/100 != 2 {
|
|
defer httpResp.Body.Close()
|
|
return nil, m.apiError(httpResp)
|
|
}
|
|
sc := bufio.NewScanner(httpResp.Body)
|
|
// Why: a single SSE data line carries a whole JSON chunk; tool-call
|
|
// argument fragments can make lines far larger than Scanner's 64 KiB
|
|
// default cap.
|
|
sc.Buffer(make([]byte, 0, 64*1024), 16<<20)
|
|
return &stream{m: m, body: httpResp.Body, sc: sc}, nil
|
|
}
|
|
|
|
// do builds and performs the HTTP request. Transport failures are wrapped
|
|
// raw (never as *llm.APIError) so llm.Classify still sees net.Error,
|
|
// syscall errnos, and context errors underneath.
|
|
func (m *model) do(ctx context.Context, req llm.Request, stream bool) (*http.Response, error) {
|
|
if m.p.apiKey == "" {
|
|
// Why a synthetic 401: the constructor never fails, so a missing
|
|
// key must surface at request time as the auth failure it is —
|
|
// permanent under llm.Classify, like a real 401.
|
|
return nil, &llm.APIError{
|
|
Provider: m.p.name,
|
|
Model: m.id,
|
|
Status: http.StatusUnauthorized,
|
|
Code: "missing_api_key",
|
|
Message: "no API key configured: set OPENAI_API_KEY or use WithAPIKey",
|
|
}
|
|
}
|
|
body, err := json.Marshal(m.buildRequest(req, stream))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("openai: encode request: %w", err)
|
|
}
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, m.p.baseURL+"/chat/completions", bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("openai: build request: %w", err)
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/json")
|
|
httpReq.Header.Set("Authorization", "Bearer "+m.p.apiKey)
|
|
if stream {
|
|
httpReq.Header.Set("Accept", "text/event-stream")
|
|
}
|
|
httpResp, err := m.p.client.Do(httpReq)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("openai: do request: %w", err)
|
|
}
|
|
return httpResp, nil
|
|
}
|
|
|
|
// apiError converts a non-2xx response into *llm.APIError, pulling code and
|
|
// message from the {"error":{...}} body when it parses.
|
|
func (m *model) apiError(httpResp *http.Response) error {
|
|
apiErr := &llm.APIError{Provider: m.p.name, Model: m.id, Status: httpResp.StatusCode}
|
|
body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 1<<20))
|
|
var env errorEnvelope
|
|
if err := json.Unmarshal(body, &env); err == nil &&
|
|
(env.Error.Message != "" || env.Error.Type != "" || env.Error.Code != "") {
|
|
apiErr.Message = env.Error.Message
|
|
apiErr.Code = env.Error.Code
|
|
if apiErr.Code == "" {
|
|
apiErr.Code = env.Error.Type
|
|
}
|
|
} else {
|
|
// Why: compat servers emit all sorts of error bodies; a raw snippet
|
|
// beats silence when the canonical envelope is absent.
|
|
apiErr.Message = strings.TrimSpace(string(body))
|
|
}
|
|
return apiErr
|
|
}
|
|
|
|
// toResponse maps the wire response onto the canonical llm.Response.
|
|
func (m *model) toResponse(wire *chatResponse) *llm.Response {
|
|
resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
|
|
if wire.Usage != nil {
|
|
resp.Usage = wire.Usage.toUsage()
|
|
}
|
|
if len(wire.Choices) == 0 {
|
|
resp.FinishReason = llm.FinishOther
|
|
return resp
|
|
}
|
|
choice := wire.Choices[0]
|
|
if choice.Message.Content != "" {
|
|
resp.Parts = append(resp.Parts, llm.TextPart{Text: choice.Message.Content})
|
|
}
|
|
for i, tc := range choice.Message.ToolCalls {
|
|
id := tc.ID
|
|
if id == "" {
|
|
// Why: ToolResult.ID must echo ToolCall.ID, so calls from compat
|
|
// servers that omit ids get synthesized ones.
|
|
id = fmt.Sprintf("call_%d", i)
|
|
}
|
|
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
|
ID: id,
|
|
Name: tc.Function.Name,
|
|
Arguments: json.RawMessage(tc.Function.Arguments),
|
|
})
|
|
}
|
|
resp.FinishReason = mapFinish(choice.FinishReason, len(resp.ToolCalls) > 0)
|
|
return resp
|
|
}
|
|
|
|
// mapFinish maps a wire finish_reason to the canonical enum. Tool-call
|
|
// presence wins over the reported reason: a forced (named tool_choice) call
|
|
// can finish with "stop" while still carrying tool_calls.
|
|
func mapFinish(reason string, hasToolCalls bool) llm.FinishReason {
|
|
if hasToolCalls {
|
|
return llm.FinishToolCalls
|
|
}
|
|
switch reason {
|
|
case "stop":
|
|
return llm.FinishStop
|
|
case "length":
|
|
return llm.FinishLength
|
|
case "tool_calls":
|
|
return llm.FinishToolCalls
|
|
case "content_filter":
|
|
return llm.FinishContentFilter
|
|
default:
|
|
return llm.FinishOther
|
|
}
|
|
}
|
|
|
|
// checkRequest enforces the model's effective capabilities. Why enforcement
|
|
// rather than normalization: a separate media layer resizes/transcodes
|
|
// images BEFORE requests reach the provider; this check is the honest
|
|
// backstop that refuses, with llm.ErrUnsupported, what the target
|
|
// declaredly cannot serve (chains advance past it penalty-free).
|
|
func checkRequest(caps llm.Capabilities, req llm.Request) error {
|
|
if len(req.Tools) > 0 && !caps.SupportsTools {
|
|
return fmt.Errorf("%w: tools not supported", llm.ErrUnsupported)
|
|
}
|
|
if len(req.Schema) > 0 && !caps.SupportsStructured {
|
|
return fmt.Errorf("%w: structured output not supported", llm.ErrUnsupported)
|
|
}
|
|
images := 0
|
|
for _, msg := range req.Messages {
|
|
for _, part := range msg.Parts {
|
|
img, ok := part.(llm.ImagePart)
|
|
if !ok {
|
|
continue
|
|
}
|
|
images++
|
|
if !caps.SupportsImages() {
|
|
return fmt.Errorf("%w: image input not supported", llm.ErrUnsupported)
|
|
}
|
|
if !caps.MIMEAllowed(img.MIME) {
|
|
return fmt.Errorf("%w: image MIME type %q not allowed (allowed: %s)",
|
|
llm.ErrUnsupported, img.MIME, strings.Join(caps.AllowedImageMIME, ", "))
|
|
}
|
|
if caps.MaxImageBytes > 0 && len(img.Data) > caps.MaxImageBytes {
|
|
return fmt.Errorf("%w: image is %d bytes, limit is %d",
|
|
llm.ErrUnsupported, len(img.Data), caps.MaxImageBytes)
|
|
}
|
|
}
|
|
}
|
|
if images > caps.MaxImagesPerReq {
|
|
return fmt.Errorf("%w: request carries %d images, limit is %d",
|
|
llm.ErrUnsupported, images, caps.MaxImagesPerReq)
|
|
}
|
|
return nil
|
|
}
|