majordomo/provider/openai/wire.go

package openai

import (
	"encoding/base64"
	"encoding/json"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// --- request wire shapes ---

type chatRequest struct {
	Model    string        `json:"model"`
	Messages []wireMessage `json:"messages"`
	Tools    []wireTool    `json:"tools,omitempty"`
	// ToolChoice is "auto"/"none"/"required" (string) or a named-function
	// object; any avoids two fields for one wire key.
	ToolChoice          any                `json:"tool_choice,omitempty"`
	Temperature         *float64           `json:"temperature,omitempty"`
	TopP                *float64           `json:"top_p,omitempty"`
	MaxCompletionTokens int                `json:"max_completion_tokens,omitempty"`
	MaxTokens           int                `json:"max_tokens,omitempty"`
	Stop                []string           `json:"stop,omitempty"`
	ReasoningEffort     string             `json:"reasoning_effort,omitempty"`
	ResponseFormat      *wireRespFormat    `json:"response_format,omitempty"`
	Stream              bool               `json:"stream,omitempty"`
	StreamOptions       *wireStreamOptions `json:"stream_options,omitempty"`
}

type wireMessage struct {
	Role string `json:"role"`
	// Content is a string for text-only turns, a part array for multimodal
	// turns, or nil (wire null) for assistant turns that only call tools.
	Content    any            `json:"content"`
	ToolCalls  []wireToolCall `json:"tool_calls,omitempty"`
	ToolCallID string         `json:"tool_call_id,omitempty"`
}

type wireTextPart struct {
	Type string `json:"type"`
	Text string `json:"text"`
}

type wireImagePart struct {
	Type     string       `json:"type"`
	ImageURL wireImageURL `json:"image_url"`
}

type wireImageURL struct {
	URL string `json:"url"`
}

type wireToolCall struct {
	ID       string           `json:"id"`
	Type     string           `json:"type"`
	Function wireFunctionCall `json:"function"`
}

type wireFunctionCall struct {
	Name string `json:"name"`
	// Arguments is a JSON-encoded STRING per the wire format, not an object.
	Arguments string `json:"arguments"`
}

type wireTool struct {
	Type     string           `json:"type"`
	Function wireToolFunction `json:"function"`
}

type wireToolFunction struct {
	Name        string          `json:"name"`
	Description string          `json:"description,omitempty"`
	Parameters  json.RawMessage `json:"parameters,omitempty"`
}

type wireNamedToolChoice struct {
	Type     string       `json:"type"`
	Function wireToolName `json:"function"`
}

type wireToolName struct {
	Name string `json:"name"`
}

type wireRespFormat struct {
	Type       string          `json:"type"`
	JSONSchema *wireJSONSchema `json:"json_schema,omitempty"`
}

// wireJSONSchema omits the strict flag on purpose: strict mode imposes
// schema rewrites (every property required, additionalProperties:false at
// every level) that belong to the caller, not the transport.
type wireJSONSchema struct {
	Name   string          `json:"name"`
	Schema json.RawMessage `json:"schema"`
}

type wireStreamOptions struct {
	IncludeUsage bool `json:"include_usage"`
}

// --- response wire shapes (loose: unknown fields ignored) ---

type chatResponse struct {
	ID      string       `json:"id"`
	Object  string       `json:"object"`
	Created int64        `json:"created"`
	Model   string       `json:"model"`
	Choices []chatChoice `json:"choices"`
	Usage   *wireUsage   `json:"usage"`
}

type chatChoice struct {
	Index        int             `json:"index"`
	Message      wireRespMessage `json:"message"`
	FinishReason string          `json:"finish_reason"`
}

type wireRespMessage struct {
	Role      string         `json:"role"`
	Content   string         `json:"content"` // null decodes to ""
	Refusal   string         `json:"refusal"` // tolerated, unused
	ToolCalls []wireToolCall `json:"tool_calls"`
}

type wireUsage struct {
	PromptTokens            int               `json:"prompt_tokens"`
	CompletionTokens        int               `json:"completion_tokens"`
	TotalTokens             int               `json:"total_tokens"`
	PromptTokensDetails     *wirePromptDetail `json:"prompt_tokens_details"`
	CompletionTokensDetails *wireOutputDetail `json:"completion_tokens_details"`
}

type wirePromptDetail struct {
	CachedTokens int `json:"cached_tokens"`
}

type wireOutputDetail struct {
	ReasoningTokens int `json:"reasoning_tokens"`
}

// toUsage maps wire usage (with optional detail objects — absent on many
// compat servers) onto the canonical Usage.
func (u *wireUsage) toUsage() llm.Usage {
	out := llm.Usage{InputTokens: u.PromptTokens, OutputTokens: u.CompletionTokens}
	if u.PromptTokensDetails != nil {
		out.CacheReadTokens = u.PromptTokensDetails.CachedTokens
	}
	if u.CompletionTokensDetails != nil {
		out.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
	}
	return out
}

type errorEnvelope struct {
	Error wireError `json:"error"`
}

type wireError struct {
	Message string `json:"message"`
	Type    string `json:"type"`
	Code    string `json:"code"` // null decodes to ""
}

// --- streaming wire shapes ---

type streamChunk struct {
	Choices []streamChoice `json:"choices"`
	Usage   *wireUsage     `json:"usage"`
	Error   *wireError     `json:"error"` // mid-stream error event
}

type streamChoice struct {
	Index        int         `json:"index"`
	Delta        streamDelta `json:"delta"`
	FinishReason string      `json:"finish_reason"` // null decodes to ""
}

type streamDelta struct {
	Content   string                `json:"content"` // null decodes to ""
	ToolCalls []streamToolCallDelta `json:"tool_calls"`
}

// streamToolCallDelta is one tool-call fragment. The id and name appear only
// on a call's first fragment; later fragments carry just index + an
// arguments substring. Accumulation keys on Index, never ID.
type streamToolCallDelta struct {
	Index    int              `json:"index"`
	ID       string           `json:"id"`
	Function wireFunctionCall `json:"function"`
}

// --- mapping: llm.Request -> chatRequest ---

// buildRequest translates the canonical request to the wire shape. The
// capability check has already passed by the time this runs.
func (m *model) buildRequest(req llm.Request, stream bool) *chatRequest {
	out := &chatRequest{
		Model:           m.id,
		Temperature:     req.Temperature,
		TopP:            req.TopP,
		Stop:            req.StopSequences,
		ReasoningEffort: req.ReasoningEffort,
	}

	// Fold Request.System and every RoleSystem message into one leading
	// system message, System field first. Why: the canonical contract allows
	// system content in both places; OpenAI wants one system mechanism.
	var sys []string
	if req.System != "" {
		sys = append(sys, req.System)
	}
	for _, msg := range req.Messages {
		if msg.Role == llm.RoleSystem {
			if t := msg.Text(); t != "" {
				sys = append(sys, t)
			}
		}
	}
	if joined := strings.Join(sys, "\n\n"); joined != "" {
		out.Messages = append(out.Messages, wireMessage{Role: "system", Content: joined})
	}

	for _, msg := range req.Messages {
		switch msg.Role {
		case llm.RoleSystem:
			// Folded above; excluded from the normal message list.
		case llm.RoleUser:
			out.Messages = append(out.Messages, wireMessage{Role: "user", Content: contentValue(msg.Parts)})
		case llm.RoleAssistant:
			wm := wireMessage{Role: "assistant"}
			if text := msg.Text(); text != "" {
				wm.Content = text
			}
			for _, tc := range msg.ToolCalls {
				args := string(tc.Arguments)
				if args == "" {
					// Why: arguments must be a valid JSON document string;
					// an empty string is not one.
					args = "{}"
				}
				wm.ToolCalls = append(wm.ToolCalls, wireToolCall{
					ID:       tc.ID,
					Type:     "function",
					Function: wireFunctionCall{Name: tc.Name, Arguments: args},
				})
			}
			out.Messages = append(out.Messages, wm)
		case llm.RoleTool:
			// One wire message per result: the API pairs each tool output
			// with its call via tool_call_id, one message each.
			for _, tr := range msg.ToolResults {
				content := tr.Content
				if tr.IsError {
					content = "ERROR: " + content
				}
				out.Messages = append(out.Messages, wireMessage{
					Role:       "tool",
					Content:    content,
					ToolCallID: tr.ID,
				})
			}
		}
	}

	for _, t := range req.Tools {
		out.Tools = append(out.Tools, wireTool{
			Type:     "function",
			Function: wireToolFunction{Name: t.Name, Description: t.Description, Parameters: t.Parameters},
		})
	}

	switch req.ToolChoice {
	case "":
		// Omit: provider default ("auto" when tools are present).
	case "auto", "none", "required":
		out.ToolChoice = req.ToolChoice
	default:
		// Any other value names the one tool the model must call.
		out.ToolChoice = wireNamedToolChoice{Type: "function", Function: wireToolName{Name: req.ToolChoice}}
	}

	if req.MaxTokens > 0 {
		if m.p.legacyMaxTokens {
			out.MaxTokens = req.MaxTokens
		} else {
			out.MaxCompletionTokens = req.MaxTokens
		}
	}

	if len(req.Schema) > 0 {
		name := req.SchemaName
		if name == "" {
			name = "response"
		}
		out.ResponseFormat = &wireRespFormat{
			Type:       "json_schema",
			JSONSchema: &wireJSONSchema{Name: name, Schema: req.Schema},
		}
	}

	if stream {
		out.Stream = true
		// Why: without include_usage the stream never reports token counts;
		// the usage arrives in one extra chunk with an empty choices array.
		out.StreamOptions = &wireStreamOptions{IncludeUsage: true}
	}

	return out
}

// contentValue renders message parts as the wire content value: a plain
// string when text-only (maximum compat), a part array when images are
// present.
func contentValue(parts []llm.Part) any {
	multimodal := false
	for _, p := range parts {
		if _, ok := p.(llm.ImagePart); ok {
			multimodal = true
			break
		}
	}
	if !multimodal {
		var b strings.Builder
		for _, p := range parts {
			if t, ok := p.(llm.TextPart); ok {
				b.WriteString(t.Text)
			}
		}
		return b.String()
	}
	out := make([]any, 0, len(parts))
	for _, p := range parts {
		switch v := p.(type) {
		case llm.TextPart:
			out = append(out, wireTextPart{Type: "text", Text: v.Text})
		case llm.ImagePart:
			url := "data:" + v.MIME + ";base64," + base64.StdEncoding.EncodeToString(v.Data)
			out = append(out, wireImagePart{Type: "image_url", ImageURL: wireImageURL{URL: url}})
		}
	}
	return out
}