Files
majordomo/provider/openai/wire.go
T
steve 0147a79d18
CI / Tidy (push) Successful in 9m31s
CI / Build & Test (push) Successful in 10m13s
feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers;
DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired
through anthropic/openai/google; WithPromptCaching (Anthropic
cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer,
WithCompactor, WithToolErrorLimits + ErrToolLoop); health
Bench/Unbench/Snapshot; ChainConfig.Observer failover events.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:30:06 +02:00

345 lines
10 KiB
Go

package openai
import (
"encoding/base64"
"encoding/json"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// --- request wire shapes ---
type chatRequest struct {
Model string `json:"model"`
Messages []wireMessage `json:"messages"`
Tools []wireTool `json:"tools,omitempty"`
// ToolChoice is "auto"/"none"/"required" (string) or a named-function
// object; any avoids two fields for one wire key.
ToolChoice any `json:"tool_choice,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
Stop []string `json:"stop,omitempty"`
ReasoningEffort string `json:"reasoning_effort,omitempty"`
ResponseFormat *wireRespFormat `json:"response_format,omitempty"`
Stream bool `json:"stream,omitempty"`
StreamOptions *wireStreamOptions `json:"stream_options,omitempty"`
}
type wireMessage struct {
Role string `json:"role"`
// Content is a string for text-only turns, a part array for multimodal
// turns, or nil (wire null) for assistant turns that only call tools.
Content any `json:"content"`
ToolCalls []wireToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
type wireTextPart struct {
Type string `json:"type"`
Text string `json:"text"`
}
type wireImagePart struct {
Type string `json:"type"`
ImageURL wireImageURL `json:"image_url"`
}
type wireImageURL struct {
URL string `json:"url"`
}
type wireToolCall struct {
ID string `json:"id"`
Type string `json:"type"`
Function wireFunctionCall `json:"function"`
}
type wireFunctionCall struct {
Name string `json:"name"`
// Arguments is a JSON-encoded STRING per the wire format, not an object.
Arguments string `json:"arguments"`
}
type wireTool struct {
Type string `json:"type"`
Function wireToolFunction `json:"function"`
}
type wireToolFunction struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters json.RawMessage `json:"parameters,omitempty"`
}
type wireNamedToolChoice struct {
Type string `json:"type"`
Function wireToolName `json:"function"`
}
type wireToolName struct {
Name string `json:"name"`
}
type wireRespFormat struct {
Type string `json:"type"`
JSONSchema *wireJSONSchema `json:"json_schema,omitempty"`
}
// wireJSONSchema omits the strict flag on purpose: strict mode imposes
// schema rewrites (every property required, additionalProperties:false at
// every level) that belong to the caller, not the transport.
type wireJSONSchema struct {
Name string `json:"name"`
Schema json.RawMessage `json:"schema"`
}
type wireStreamOptions struct {
IncludeUsage bool `json:"include_usage"`
}
// --- response wire shapes (loose: unknown fields ignored) ---
type chatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []chatChoice `json:"choices"`
Usage *wireUsage `json:"usage"`
}
type chatChoice struct {
Index int `json:"index"`
Message wireRespMessage `json:"message"`
FinishReason string `json:"finish_reason"`
}
type wireRespMessage struct {
Role string `json:"role"`
Content string `json:"content"` // null decodes to ""
Refusal string `json:"refusal"` // tolerated, unused
ToolCalls []wireToolCall `json:"tool_calls"`
}
type wireUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptTokensDetails *wirePromptDetail `json:"prompt_tokens_details"`
CompletionTokensDetails *wireOutputDetail `json:"completion_tokens_details"`
}
type wirePromptDetail struct {
CachedTokens int `json:"cached_tokens"`
}
type wireOutputDetail struct {
ReasoningTokens int `json:"reasoning_tokens"`
}
// toUsage maps wire usage (with optional detail objects — absent on many
// compat servers) onto the canonical Usage.
func (u *wireUsage) toUsage() llm.Usage {
out := llm.Usage{InputTokens: u.PromptTokens, OutputTokens: u.CompletionTokens}
if u.PromptTokensDetails != nil {
out.CacheReadTokens = u.PromptTokensDetails.CachedTokens
}
if u.CompletionTokensDetails != nil {
out.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
}
return out
}
type errorEnvelope struct {
Error wireError `json:"error"`
}
type wireError struct {
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"` // null decodes to ""
}
// --- streaming wire shapes ---
type streamChunk struct {
Choices []streamChoice `json:"choices"`
Usage *wireUsage `json:"usage"`
Error *wireError `json:"error"` // mid-stream error event
}
type streamChoice struct {
Index int `json:"index"`
Delta streamDelta `json:"delta"`
FinishReason string `json:"finish_reason"` // null decodes to ""
}
type streamDelta struct {
Content string `json:"content"` // null decodes to ""
ToolCalls []streamToolCallDelta `json:"tool_calls"`
}
// streamToolCallDelta is one tool-call fragment. The id and name appear only
// on a call's first fragment; later fragments carry just index + an
// arguments substring. Accumulation keys on Index, never ID.
type streamToolCallDelta struct {
Index int `json:"index"`
ID string `json:"id"`
Function wireFunctionCall `json:"function"`
}
// --- mapping: llm.Request -> chatRequest ---
// buildRequest translates the canonical request to the wire shape. The
// capability check has already passed by the time this runs.
func (m *model) buildRequest(req llm.Request, stream bool) *chatRequest {
out := &chatRequest{
Model: m.id,
Temperature: req.Temperature,
TopP: req.TopP,
Stop: req.StopSequences,
ReasoningEffort: req.ReasoningEffort,
}
// Fold Request.System and every RoleSystem message into one leading
// system message, System field first. Why: the canonical contract allows
// system content in both places; OpenAI wants one system mechanism.
var sys []string
if req.System != "" {
sys = append(sys, req.System)
}
for _, msg := range req.Messages {
if msg.Role == llm.RoleSystem {
if t := msg.Text(); t != "" {
sys = append(sys, t)
}
}
}
if joined := strings.Join(sys, "\n\n"); joined != "" {
out.Messages = append(out.Messages, wireMessage{Role: "system", Content: joined})
}
for _, msg := range req.Messages {
switch msg.Role {
case llm.RoleSystem:
// Folded above; excluded from the normal message list.
case llm.RoleUser:
out.Messages = append(out.Messages, wireMessage{Role: "user", Content: contentValue(msg.Parts)})
case llm.RoleAssistant:
wm := wireMessage{Role: "assistant"}
if text := msg.Text(); text != "" {
wm.Content = text
}
for _, tc := range msg.ToolCalls {
args := string(tc.Arguments)
if args == "" {
// Why: arguments must be a valid JSON document string;
// an empty string is not one.
args = "{}"
}
wm.ToolCalls = append(wm.ToolCalls, wireToolCall{
ID: tc.ID,
Type: "function",
Function: wireFunctionCall{Name: tc.Name, Arguments: args},
})
}
out.Messages = append(out.Messages, wm)
case llm.RoleTool:
// One wire message per result: the API pairs each tool output
// with its call via tool_call_id, one message each.
for _, tr := range msg.ToolResults {
content := tr.Content
if tr.IsError {
content = "ERROR: " + content
}
out.Messages = append(out.Messages, wireMessage{
Role: "tool",
Content: content,
ToolCallID: tr.ID,
})
}
}
}
for _, t := range req.Tools {
out.Tools = append(out.Tools, wireTool{
Type: "function",
Function: wireToolFunction{Name: t.Name, Description: t.Description, Parameters: t.Parameters},
})
}
switch req.ToolChoice {
case "":
// Omit: provider default ("auto" when tools are present).
case "auto", "none", "required":
out.ToolChoice = req.ToolChoice
default:
// Any other value names the one tool the model must call.
out.ToolChoice = wireNamedToolChoice{Type: "function", Function: wireToolName{Name: req.ToolChoice}}
}
if req.MaxTokens > 0 {
if m.p.legacyMaxTokens {
out.MaxTokens = req.MaxTokens
} else {
out.MaxCompletionTokens = req.MaxTokens
}
}
if len(req.Schema) > 0 {
name := req.SchemaName
if name == "" {
name = "response"
}
out.ResponseFormat = &wireRespFormat{
Type: "json_schema",
JSONSchema: &wireJSONSchema{Name: name, Schema: req.Schema},
}
}
if stream {
out.Stream = true
// Why: without include_usage the stream never reports token counts;
// the usage arrives in one extra chunk with an empty choices array.
out.StreamOptions = &wireStreamOptions{IncludeUsage: true}
}
return out
}
// contentValue renders message parts as the wire content value: a plain
// string when text-only (maximum compat), a part array when images are
// present.
func contentValue(parts []llm.Part) any {
multimodal := false
for _, p := range parts {
if _, ok := p.(llm.ImagePart); ok {
multimodal = true
break
}
}
if !multimodal {
var b strings.Builder
for _, p := range parts {
if t, ok := p.(llm.TextPart); ok {
b.WriteString(t.Text)
}
}
return b.String()
}
out := make([]any, 0, len(parts))
for _, p := range parts {
switch v := p.(type) {
case llm.TextPart:
out = append(out, wireTextPart{Type: "text", Text: v.Text})
case llm.ImagePart:
url := "data:" + v.MIME + ";base64," + base64.StdEncoding.EncodeToString(v.Data)
out = append(out, wireImagePart{Type: "image_url", ImageURL: wireImageURL{URL: url}})
}
}
return out
}