feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,343 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// ---- wire types (field names per ollama api/types.go) ----
|
||||
|
||||
type chatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []chatMessage `json:"messages"`
|
||||
Tools []toolDef `json:"tools,omitempty"`
|
||||
Format json.RawMessage `json:"format,omitempty"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
// Stream has no omitempty on purpose: the server default is true, so
|
||||
// Generate must send an explicit false.
|
||||
Stream bool `json:"stream"`
|
||||
// Think is bool-or-string on the wire ("low"/"medium"/"high" or a bool).
|
||||
Think json.RawMessage `json:"think,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Images []string `json:"images,omitempty"` // raw base64, no data: prefix
|
||||
ToolCalls []toolCall `json:"tool_calls,omitempty"`
|
||||
ToolName string `json:"tool_name,omitempty"` // on role:"tool" results
|
||||
}
|
||||
|
||||
type toolDef struct {
|
||||
Type string `json:"type"`
|
||||
Function toolDefFunc `json:"function"`
|
||||
}
|
||||
|
||||
type toolDefFunc struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters json.RawMessage `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type toolCall struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Function toolCallFunc `json:"function"`
|
||||
}
|
||||
|
||||
type toolCallFunc struct {
|
||||
Index int `json:"index,omitempty"`
|
||||
Name string `json:"name"`
|
||||
// Arguments is a JSON OBJECT on the wire (unlike OpenAI's string).
|
||||
Arguments json.RawMessage `json:"arguments"`
|
||||
}
|
||||
|
||||
type chatResponse struct {
|
||||
Model string `json:"model"`
|
||||
Message respMessage `json:"message"`
|
||||
Done bool `json:"done"`
|
||||
DoneReason string `json:"done_reason"`
|
||||
PromptEvalCount int `json:"prompt_eval_count"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
}
|
||||
|
||||
type respMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Thinking string `json:"thinking"`
|
||||
ToolCalls []toolCall `json:"tool_calls"`
|
||||
}
|
||||
|
||||
type errorBody struct {
|
||||
Error string `json:"error"`
|
||||
}
|
||||
|
||||
// ---- model ----
|
||||
|
||||
type model struct {
|
||||
provider *Provider
|
||||
id string
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
||||
|
||||
func (m *model) qualified() string { return m.provider.name + "/" + m.id }
|
||||
|
||||
// enforceCapabilities is the backstop check (the media layer normalizes
|
||||
// before requests get here; see ADR-0009).
|
||||
func (m *model) enforceCapabilities(req llm.Request) error {
|
||||
count := 0
|
||||
for _, msg := range req.Messages {
|
||||
for _, part := range msg.Parts {
|
||||
img, ok := part.(llm.ImagePart)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if !m.caps.SupportsImages() {
|
||||
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified())
|
||||
}
|
||||
if !m.caps.MIMEAllowed(img.MIME) {
|
||||
return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME)
|
||||
}
|
||||
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
||||
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d",
|
||||
llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
if count > 0 && m.caps.MaxImagesPerReq > 0 && count > m.caps.MaxImagesPerReq {
|
||||
return fmt.Errorf("%w: %d images exceed %s limit of %d",
|
||||
llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildRequest maps the canonical request onto the wire shape.
|
||||
func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) {
|
||||
out := &chatRequest{Model: m.id, Stream: stream}
|
||||
|
||||
// System prompt: dedicated field first, then folded RoleSystem messages.
|
||||
var sys []string
|
||||
if req.System != "" {
|
||||
sys = append(sys, req.System)
|
||||
}
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role == llm.RoleSystem {
|
||||
if t := msg.Text(); t != "" {
|
||||
sys = append(sys, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(sys) > 0 {
|
||||
out.Messages = append(out.Messages, chatMessage{
|
||||
Role: "system", Content: strings.Join(sys, "\n\n"),
|
||||
})
|
||||
}
|
||||
|
||||
for _, msg := range req.Messages {
|
||||
switch msg.Role {
|
||||
case llm.RoleSystem:
|
||||
// Already folded above.
|
||||
case llm.RoleTool:
|
||||
for _, res := range msg.ToolResults {
|
||||
content := res.Content
|
||||
if res.IsError {
|
||||
content = "ERROR: " + content
|
||||
}
|
||||
out.Messages = append(out.Messages, chatMessage{
|
||||
Role: "tool", Content: content, ToolName: res.Name,
|
||||
})
|
||||
}
|
||||
default:
|
||||
cm := chatMessage{Role: string(msg.Role), Content: msg.Text()}
|
||||
for _, part := range msg.Parts {
|
||||
if img, ok := part.(llm.ImagePart); ok {
|
||||
cm.Images = append(cm.Images, base64.StdEncoding.EncodeToString(img.Data))
|
||||
}
|
||||
}
|
||||
for _, tc := range msg.ToolCalls {
|
||||
args := tc.Arguments
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
cm.ToolCalls = append(cm.ToolCalls, toolCall{
|
||||
ID: tc.ID,
|
||||
Function: toolCallFunc{Name: tc.Name, Arguments: args},
|
||||
})
|
||||
}
|
||||
out.Messages = append(out.Messages, cm)
|
||||
}
|
||||
}
|
||||
|
||||
// Tools. Ollama has no tool_choice: "none" maps to omitting the tools;
|
||||
// "required"/named choices have no wire equivalent and are best-effort
|
||||
// ignored (documented in the README support matrix).
|
||||
if req.ToolChoice != "none" {
|
||||
for _, t := range req.Tools {
|
||||
params := t.Parameters
|
||||
if len(params) == 0 {
|
||||
params = json.RawMessage(`{"type":"object","properties":{}}`)
|
||||
}
|
||||
out.Tools = append(out.Tools, toolDef{
|
||||
Type: "function",
|
||||
Function: toolDefFunc{Name: t.Name, Description: t.Description, Parameters: params},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(req.Schema) > 0 {
|
||||
out.Format = req.Schema
|
||||
}
|
||||
|
||||
opts := make(map[string]any)
|
||||
if req.Temperature != nil {
|
||||
opts["temperature"] = *req.Temperature
|
||||
}
|
||||
if req.TopP != nil {
|
||||
opts["top_p"] = *req.TopP
|
||||
}
|
||||
if req.MaxTokens > 0 {
|
||||
opts["num_predict"] = req.MaxTokens
|
||||
}
|
||||
if len(req.StopSequences) > 0 {
|
||||
opts["stop"] = req.StopSequences
|
||||
}
|
||||
if len(opts) > 0 {
|
||||
out.Options = opts
|
||||
}
|
||||
|
||||
switch req.ReasoningEffort {
|
||||
case "":
|
||||
case "low", "medium", "high":
|
||||
out.Think = json.RawMessage(strconv.Quote(req.ReasoningEffort))
|
||||
default:
|
||||
return nil, fmt.Errorf("ollama: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort)
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// do POSTs /api/chat and returns the response body on 2xx, or a classified
|
||||
// error.
|
||||
func (m *model) do(ctx context.Context, wireReq *chatRequest) (*http.Response, error) {
|
||||
p := m.provider
|
||||
if err := p.checkReady(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body, err := json.Marshal(wireReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: encode request: %w", err)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/chat", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: build request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if p.token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+p.token)
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama %s: do request: %w", m.qualified(), err)
|
||||
}
|
||||
if resp.StatusCode/100 != 2 {
|
||||
defer resp.Body.Close()
|
||||
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
|
||||
var eb errorBody
|
||||
_ = json.Unmarshal(raw, &eb)
|
||||
msg := eb.Error
|
||||
if msg == "" {
|
||||
msg = strings.TrimSpace(string(raw))
|
||||
}
|
||||
return nil, &llm.APIError{
|
||||
Provider: p.name, Model: m.id,
|
||||
Status: resp.StatusCode, Message: msg,
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Generate implements llm.Model.
|
||||
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wireReq, err := m.buildRequest(req, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := m.do(ctx, wireReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var cr chatResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&cr); err != nil {
|
||||
return nil, fmt.Errorf("ollama %s: decode response: %w", m.qualified(), err)
|
||||
}
|
||||
return m.toResponse(&cr), nil
|
||||
}
|
||||
|
||||
// toResponse converts a final wire chunk into the canonical response.
|
||||
func (m *model) toResponse(cr *chatResponse) *llm.Response {
|
||||
out := &llm.Response{
|
||||
Model: m.qualified(),
|
||||
Usage: llm.Usage{InputTokens: cr.PromptEvalCount, OutputTokens: cr.EvalCount},
|
||||
Raw: cr,
|
||||
}
|
||||
if cr.Message.Content != "" {
|
||||
out.Parts = append(out.Parts, llm.Text(cr.Message.Content))
|
||||
}
|
||||
out.ToolCalls = convertToolCalls(cr.Message.ToolCalls)
|
||||
out.FinishReason = finishReason(cr.DoneReason, len(out.ToolCalls) > 0)
|
||||
return out
|
||||
}
|
||||
|
||||
// convertToolCalls maps wire tool calls, synthesizing ids where the model
|
||||
// omitted them (ids are optional in Ollama's shape but required by our
|
||||
// agent loop to match results to calls).
|
||||
func convertToolCalls(calls []toolCall) []llm.ToolCall {
|
||||
out := make([]llm.ToolCall, 0, len(calls))
|
||||
for i, tc := range calls {
|
||||
id := tc.ID
|
||||
if id == "" {
|
||||
id = "call_" + strconv.Itoa(i)
|
||||
}
|
||||
args := tc.Function.Arguments
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
out = append(out, llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args})
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func finishReason(doneReason string, hasToolCalls bool) llm.FinishReason {
|
||||
if hasToolCalls {
|
||||
return llm.FinishToolCalls
|
||||
}
|
||||
switch doneReason {
|
||||
case "stop", "":
|
||||
return llm.FinishStop
|
||||
case "length":
|
||||
return llm.FinishLength
|
||||
default:
|
||||
return llm.FinishOther
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user