Files
majordomo/provider/ollama/wire.go
T
steve 043249e0e1 feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3:
- provider/openai: Chat Completions for OpenAI + compat endpoints (SSE
  streaming with by-index tool-call assembly, response_format json_schema,
  legacy max_tokens option, reasoning_effort)
- provider/anthropic: Messages API (tool_use/tool_result, GA structured
  output via output_config.format, full SSE event parser, 529 transient)
- provider/ollama: one native /api/chat client behind the ollama,
  ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant
  of foreman's buffered single-object responses; object tool arguments;
  format-schema structured output; think mapping)
- media/: capability normalization (sniff, downscale, transcode, byte
  ladder, ErrUnsupported), wired into the chain executor per target with
  penalty-free advance past incapable elements
- registry: real provider + scheme wiring, WithHTTPClient option, required
  env-foreman TLS chat round-trip test
- ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README
  matrix + CLAUDE.md synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00

344 lines
9.4 KiB
Go

package ollama
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// ---- wire types (field names per ollama api/types.go) ----
type chatRequest struct {
Model string `json:"model"`
Messages []chatMessage `json:"messages"`
Tools []toolDef `json:"tools,omitempty"`
Format json.RawMessage `json:"format,omitempty"`
Options map[string]any `json:"options,omitempty"`
// Stream has no omitempty on purpose: the server default is true, so
// Generate must send an explicit false.
Stream bool `json:"stream"`
// Think is bool-or-string on the wire ("low"/"medium"/"high" or a bool).
Think json.RawMessage `json:"think,omitempty"`
}
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Images []string `json:"images,omitempty"` // raw base64, no data: prefix
ToolCalls []toolCall `json:"tool_calls,omitempty"`
ToolName string `json:"tool_name,omitempty"` // on role:"tool" results
}
type toolDef struct {
Type string `json:"type"`
Function toolDefFunc `json:"function"`
}
type toolDefFunc struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters json.RawMessage `json:"parameters,omitempty"`
}
type toolCall struct {
ID string `json:"id,omitempty"`
Function toolCallFunc `json:"function"`
}
type toolCallFunc struct {
Index int `json:"index,omitempty"`
Name string `json:"name"`
// Arguments is a JSON OBJECT on the wire (unlike OpenAI's string).
Arguments json.RawMessage `json:"arguments"`
}
type chatResponse struct {
Model string `json:"model"`
Message respMessage `json:"message"`
Done bool `json:"done"`
DoneReason string `json:"done_reason"`
PromptEvalCount int `json:"prompt_eval_count"`
EvalCount int `json:"eval_count"`
}
type respMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Thinking string `json:"thinking"`
ToolCalls []toolCall `json:"tool_calls"`
}
type errorBody struct {
Error string `json:"error"`
}
// ---- model ----
type model struct {
provider *Provider
id string
caps llm.Capabilities
}
func (m *model) Capabilities() llm.Capabilities { return m.caps }
func (m *model) qualified() string { return m.provider.name + "/" + m.id }
// enforceCapabilities is the backstop check (the media layer normalizes
// before requests get here; see ADR-0009).
func (m *model) enforceCapabilities(req llm.Request) error {
count := 0
for _, msg := range req.Messages {
for _, part := range msg.Parts {
img, ok := part.(llm.ImagePart)
if !ok {
continue
}
count++
if !m.caps.SupportsImages() {
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified())
}
if !m.caps.MIMEAllowed(img.MIME) {
return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME)
}
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d",
llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes)
}
}
}
if count > 0 && m.caps.MaxImagesPerReq > 0 && count > m.caps.MaxImagesPerReq {
return fmt.Errorf("%w: %d images exceed %s limit of %d",
llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq)
}
return nil
}
// buildRequest maps the canonical request onto the wire shape.
func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) {
out := &chatRequest{Model: m.id, Stream: stream}
// System prompt: dedicated field first, then folded RoleSystem messages.
var sys []string
if req.System != "" {
sys = append(sys, req.System)
}
for _, msg := range req.Messages {
if msg.Role == llm.RoleSystem {
if t := msg.Text(); t != "" {
sys = append(sys, t)
}
}
}
if len(sys) > 0 {
out.Messages = append(out.Messages, chatMessage{
Role: "system", Content: strings.Join(sys, "\n\n"),
})
}
for _, msg := range req.Messages {
switch msg.Role {
case llm.RoleSystem:
// Already folded above.
case llm.RoleTool:
for _, res := range msg.ToolResults {
content := res.Content
if res.IsError {
content = "ERROR: " + content
}
out.Messages = append(out.Messages, chatMessage{
Role: "tool", Content: content, ToolName: res.Name,
})
}
default:
cm := chatMessage{Role: string(msg.Role), Content: msg.Text()}
for _, part := range msg.Parts {
if img, ok := part.(llm.ImagePart); ok {
cm.Images = append(cm.Images, base64.StdEncoding.EncodeToString(img.Data))
}
}
for _, tc := range msg.ToolCalls {
args := tc.Arguments
if len(args) == 0 {
args = json.RawMessage("{}")
}
cm.ToolCalls = append(cm.ToolCalls, toolCall{
ID: tc.ID,
Function: toolCallFunc{Name: tc.Name, Arguments: args},
})
}
out.Messages = append(out.Messages, cm)
}
}
// Tools. Ollama has no tool_choice: "none" maps to omitting the tools;
// "required"/named choices have no wire equivalent and are best-effort
// ignored (documented in the README support matrix).
if req.ToolChoice != "none" {
for _, t := range req.Tools {
params := t.Parameters
if len(params) == 0 {
params = json.RawMessage(`{"type":"object","properties":{}}`)
}
out.Tools = append(out.Tools, toolDef{
Type: "function",
Function: toolDefFunc{Name: t.Name, Description: t.Description, Parameters: params},
})
}
}
if len(req.Schema) > 0 {
out.Format = req.Schema
}
opts := make(map[string]any)
if req.Temperature != nil {
opts["temperature"] = *req.Temperature
}
if req.TopP != nil {
opts["top_p"] = *req.TopP
}
if req.MaxTokens > 0 {
opts["num_predict"] = req.MaxTokens
}
if len(req.StopSequences) > 0 {
opts["stop"] = req.StopSequences
}
if len(opts) > 0 {
out.Options = opts
}
switch req.ReasoningEffort {
case "":
case "low", "medium", "high":
out.Think = json.RawMessage(strconv.Quote(req.ReasoningEffort))
default:
return nil, fmt.Errorf("ollama: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort)
}
return out, nil
}
// do POSTs /api/chat and returns the response body on 2xx, or a classified
// error.
func (m *model) do(ctx context.Context, wireReq *chatRequest) (*http.Response, error) {
p := m.provider
if err := p.checkReady(); err != nil {
return nil, err
}
body, err := json.Marshal(wireReq)
if err != nil {
return nil, fmt.Errorf("ollama: encode request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/chat", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("ollama: build request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
if p.token != "" {
httpReq.Header.Set("Authorization", "Bearer "+p.token)
}
resp, err := p.client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("ollama %s: do request: %w", m.qualified(), err)
}
if resp.StatusCode/100 != 2 {
defer resp.Body.Close()
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
var eb errorBody
_ = json.Unmarshal(raw, &eb)
msg := eb.Error
if msg == "" {
msg = strings.TrimSpace(string(raw))
}
return nil, &llm.APIError{
Provider: p.name, Model: m.id,
Status: resp.StatusCode, Message: msg,
}
}
return resp, nil
}
// Generate implements llm.Model.
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
req = req.Apply(opts...)
if err := m.enforceCapabilities(req); err != nil {
return nil, err
}
wireReq, err := m.buildRequest(req, false)
if err != nil {
return nil, err
}
resp, err := m.do(ctx, wireReq)
if err != nil {
return nil, err
}
defer resp.Body.Close()
var cr chatResponse
if err := json.NewDecoder(resp.Body).Decode(&cr); err != nil {
return nil, fmt.Errorf("ollama %s: decode response: %w", m.qualified(), err)
}
return m.toResponse(&cr), nil
}
// toResponse converts a final wire chunk into the canonical response.
func (m *model) toResponse(cr *chatResponse) *llm.Response {
out := &llm.Response{
Model: m.qualified(),
Usage: llm.Usage{InputTokens: cr.PromptEvalCount, OutputTokens: cr.EvalCount},
Raw: cr,
}
if cr.Message.Content != "" {
out.Parts = append(out.Parts, llm.Text(cr.Message.Content))
}
out.ToolCalls = convertToolCalls(cr.Message.ToolCalls)
out.FinishReason = finishReason(cr.DoneReason, len(out.ToolCalls) > 0)
return out
}
// convertToolCalls maps wire tool calls, synthesizing ids where the model
// omitted them (ids are optional in Ollama's shape but required by our
// agent loop to match results to calls).
func convertToolCalls(calls []toolCall) []llm.ToolCall {
out := make([]llm.ToolCall, 0, len(calls))
for i, tc := range calls {
id := tc.ID
if id == "" {
id = "call_" + strconv.Itoa(i)
}
args := tc.Function.Arguments
if len(args) == 0 {
args = json.RawMessage("{}")
}
out = append(out, llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args})
}
if len(out) == 0 {
return nil
}
return out
}
func finishReason(doneReason string, hasToolCalls bool) llm.FinishReason {
if hasToolCalls {
return llm.FinishToolCalls
}
switch doneReason {
case "stop", "":
return llm.FinishStop
case "length":
return llm.FinishLength
default:
return llm.FinishOther
}
}