feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,319 @@
|
||||
// Package anthropic implements llm.Provider for the Anthropic Messages API
|
||||
// and Anthropic-compatible endpoints.
|
||||
//
|
||||
// API surface targeted: POST {base}/v1/messages with headers x-api-key,
|
||||
// anthropic-version: 2023-06-01, and content-type: application/json, per the
|
||||
// platform.claude.com Messages API reference as of June 2026. Streaming uses
|
||||
// the documented SSE event sequence (message_start, content_block_start,
|
||||
// content_block_delta, content_block_stop, message_delta, message_stop).
|
||||
// Structured output uses the GA output_config.format mechanism with
|
||||
// {"type":"json_schema"}; the result arrives as JSON text in the first text
|
||||
// content block.
|
||||
//
|
||||
// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
|
||||
// and the canonical llm contract needs only a narrow slice of the API.
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultName = "anthropic"
|
||||
defaultBaseURL = "https://api.anthropic.com"
|
||||
|
||||
// apiVersion is the anthropic-version header value. 2023-06-01 remains
|
||||
// the current (and only) stable version string as of June 2026.
|
||||
apiVersion = "2023-06-01"
|
||||
|
||||
// defaultMaxTokens is used when Request.MaxTokens is 0, because the
|
||||
// Messages API requires max_tokens on every request.
|
||||
defaultMaxTokens = 4096
|
||||
)
|
||||
|
||||
// defaultCapabilities reflects the documented first-party API image limits:
|
||||
// 100 images per request (200K-context models), 10 MB per image, 8000 px per
|
||||
// side, and the four supported media types.
|
||||
func defaultCapabilities() llm.Capabilities {
|
||||
return llm.Capabilities{
|
||||
SupportsTools: true,
|
||||
SupportsStructured: true,
|
||||
SupportsStreaming: true,
|
||||
MaxImagesPerReq: 100,
|
||||
MaxImageBytes: 10 << 20,
|
||||
MaxImageDimension: 8000,
|
||||
AllowedImageMIME: []string{
|
||||
"image/jpeg", "image/png", "image/gif", "image/webp",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Provider is an llm.Provider backed by the Anthropic Messages API.
|
||||
type Provider struct {
|
||||
name string
|
||||
apiKey string
|
||||
baseURL string
|
||||
client *http.Client
|
||||
caps llm.Capabilities
|
||||
maxTokens int
|
||||
}
|
||||
|
||||
// Option configures the provider at construction.
|
||||
type Option func(*Provider)
|
||||
|
||||
// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
|
||||
// environment default.
|
||||
func WithAPIKey(key string) Option {
|
||||
return func(p *Provider) { p.apiKey = key }
|
||||
}
|
||||
|
||||
// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
|
||||
// trailing slash is trimmed; "/v1/messages" is appended per request.
|
||||
func WithBaseURL(u string) Option {
|
||||
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
|
||||
}
|
||||
|
||||
// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
|
||||
func WithHTTPClient(c *http.Client) Option {
|
||||
return func(p *Provider) { p.client = c }
|
||||
}
|
||||
|
||||
// WithName overrides the registry name. Why: an Anthropic-compatible
|
||||
// endpoint registered under its own name must surface that name in
|
||||
// Response.Model and errors, not "anthropic".
|
||||
func WithName(name string) Option {
|
||||
return func(p *Provider) { p.name = name }
|
||||
}
|
||||
|
||||
// WithDefaultCapabilities replaces the provider-default capabilities.
|
||||
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
||||
return func(p *Provider) { p.caps = caps }
|
||||
}
|
||||
|
||||
// WithDefaultMaxTokens overrides the max_tokens value used when
|
||||
// Request.MaxTokens is 0. Why: the Messages API rejects requests without
|
||||
// max_tokens, so the provider must always send something.
|
||||
func WithDefaultMaxTokens(n int) Option {
|
||||
return func(p *Provider) { p.maxTokens = n }
|
||||
}
|
||||
|
||||
// New creates an Anthropic provider. It never fails: a missing API key
|
||||
// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
|
||||
// 401-style *llm.APIError at request time, not at construction.
|
||||
func New(opts ...Option) *Provider {
|
||||
p := &Provider{
|
||||
name: defaultName,
|
||||
baseURL: defaultBaseURL,
|
||||
client: http.DefaultClient,
|
||||
caps: defaultCapabilities(),
|
||||
maxTokens: defaultMaxTokens,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(p)
|
||||
}
|
||||
if p.apiKey == "" {
|
||||
p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Name implements llm.Provider.
|
||||
func (p *Provider) Name() string { return p.name }
|
||||
|
||||
// Model implements llm.Provider. The id is passed through verbatim — it is
|
||||
// never validated against a catalog.
|
||||
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
||||
cfg := llm.ApplyModelOptions(opts)
|
||||
caps := p.caps
|
||||
if cfg.Capabilities != nil {
|
||||
caps = *cfg.Capabilities
|
||||
}
|
||||
return &model{provider: p, id: id, caps: caps}, nil
|
||||
}
|
||||
|
||||
type model struct {
|
||||
provider *Provider
|
||||
id string
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
// Capabilities implements llm.Model.
|
||||
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
||||
|
||||
// fullName is the "provider/model" identifier used in Response.Model.
|
||||
func (m *model) fullName() string { return m.provider.name + "/" + m.id }
|
||||
|
||||
// Generate implements llm.Model.
|
||||
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
var wr wireResponse
|
||||
if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
|
||||
return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
|
||||
}
|
||||
return m.toResponse(&wr), nil
|
||||
}
|
||||
|
||||
// Stream implements llm.Model. A non-2xx status is returned as an error from
|
||||
// Stream itself, before any events are delivered.
|
||||
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
defer httpResp.Body.Close()
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
return newStream(m, httpResp.Body), nil
|
||||
}
|
||||
|
||||
// enforceCapabilities is the honest backstop behind the media layer: it
|
||||
// rejects (rather than silently mutates) requests the target cannot serve.
|
||||
// Why: a separate media layer resizes/transcodes images BEFORE requests
|
||||
// reach the provider, so anything still out of bounds here is a real error.
|
||||
func (m *model) enforceCapabilities(req llm.Request) error {
|
||||
images := 0
|
||||
for _, msg := range req.Messages {
|
||||
for _, part := range msg.Parts {
|
||||
img, ok := part.(llm.ImagePart)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
images++
|
||||
if !m.caps.SupportsImages() {
|
||||
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
|
||||
}
|
||||
if !m.caps.MIMEAllowed(img.MIME) {
|
||||
return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
|
||||
}
|
||||
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
||||
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
|
||||
llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
|
||||
return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
|
||||
llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// do builds and executes one Messages API call. Transport errors are wrapped
|
||||
// with context but NOT converted to *llm.APIError, so llm.Classify still
|
||||
// sees the underlying net.Error / syscall errno.
|
||||
func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
|
||||
p := m.provider
|
||||
if p.apiKey == "" {
|
||||
// Why request-time, not construction-time: New never fails by
|
||||
// convention, and a 401-shaped APIError classifies permanent so
|
||||
// chains fail fast past a misconfigured target.
|
||||
return nil, &llm.APIError{
|
||||
Provider: p.name,
|
||||
Model: m.id,
|
||||
Status: http.StatusUnauthorized,
|
||||
Code: "authentication_error",
|
||||
Message: "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
|
||||
}
|
||||
}
|
||||
|
||||
body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: build request: %w", p.name, err)
|
||||
}
|
||||
httpReq.Header.Set("x-api-key", p.apiKey)
|
||||
httpReq.Header.Set("anthropic-version", apiVersion)
|
||||
httpReq.Header.Set("content-type", "application/json")
|
||||
if streaming {
|
||||
httpReq.Header.Set("accept", "text/event-stream")
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: do request: %w", p.name, err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// apiError converts a non-2xx response into *llm.APIError, filling Code and
|
||||
// Message from the documented {"type":"error","error":{...}} body when it
|
||||
// parses, and falling back to the raw body text when it does not.
|
||||
func (m *model) apiError(resp *http.Response) error {
|
||||
apiErr := &llm.APIError{
|
||||
Provider: m.provider.name,
|
||||
Model: m.id,
|
||||
Status: resp.StatusCode,
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
if err != nil {
|
||||
return apiErr
|
||||
}
|
||||
var we wireErrorEnvelope
|
||||
if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
|
||||
apiErr.Code = we.Error.Type
|
||||
apiErr.Message = we.Error.Message
|
||||
} else {
|
||||
apiErr.Message = strings.TrimSpace(string(body))
|
||||
}
|
||||
return apiErr
|
||||
}
|
||||
|
||||
// toResponse maps a wire response onto the canonical llm.Response. Thinking
|
||||
// and other unrecognized block types are tolerated and skipped — they are
|
||||
// not part of the canonical content vocabulary.
|
||||
func (m *model) toResponse(wr *wireResponse) *llm.Response {
|
||||
resp := &llm.Response{
|
||||
FinishReason: mapStopReason(wr.StopReason),
|
||||
Usage: wr.Usage.toUsage(),
|
||||
Model: m.fullName(),
|
||||
Raw: wr,
|
||||
}
|
||||
for _, block := range wr.Content {
|
||||
switch block.Type {
|
||||
case "text":
|
||||
resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
|
||||
case "tool_use":
|
||||
args := block.Input
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
||||
ID: block.ID,
|
||||
Name: block.Name,
|
||||
Arguments: args,
|
||||
})
|
||||
default:
|
||||
// thinking, redacted_thinking, server-tool blocks, and any
|
||||
// future types are skipped, not surfaced as parts.
|
||||
}
|
||||
}
|
||||
return resp
|
||||
}
|
||||
@@ -0,0 +1,774 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// okBody is a minimal successful Messages API response.
|
||||
const okBody = `{
|
||||
"id": "msg_01",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"model": "claude-test",
|
||||
"content": [{"type": "text", "text": "ok"}],
|
||||
"stop_reason": "end_turn",
|
||||
"usage": {"input_tokens": 3, "output_tokens": 5}
|
||||
}`
|
||||
|
||||
// capture records the last request the test server received.
|
||||
type capture struct {
|
||||
mu sync.Mutex
|
||||
hits int
|
||||
method string
|
||||
path string
|
||||
header http.Header
|
||||
body []byte
|
||||
}
|
||||
|
||||
func (c *capture) handler(status int, respBody string) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
c.mu.Lock()
|
||||
c.hits++
|
||||
c.method = r.Method
|
||||
c.path = r.URL.Path
|
||||
c.header = r.Header.Clone()
|
||||
c.body = body
|
||||
c.mu.Unlock()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_, _ = w.Write([]byte(respBody))
|
||||
}
|
||||
}
|
||||
|
||||
// bodyMap decodes the captured request body for key-presence assertions.
|
||||
func (c *capture) bodyMap(t *testing.T) map[string]any {
|
||||
t.Helper()
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
var m map[string]any
|
||||
if err := json.Unmarshal(c.body, &m); err != nil {
|
||||
t.Fatalf("decode captured body: %v\nbody: %s", err, c.body)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// newTestProvider spins up an httptest server and a provider pointed at it.
|
||||
func newTestProvider(t *testing.T, h http.Handler, opts ...Option) *Provider {
|
||||
t.Helper()
|
||||
srv := httptest.NewServer(h)
|
||||
t.Cleanup(srv.Close)
|
||||
return New(append([]Option{WithAPIKey("test-key"), WithBaseURL(srv.URL)}, opts...)...)
|
||||
}
|
||||
|
||||
func mustModel(t *testing.T, p *Provider, id string, opts ...llm.ModelOption) llm.Model {
|
||||
t.Helper()
|
||||
m, err := p.Model(id, opts...)
|
||||
if err != nil {
|
||||
t.Fatalf("Model(%q): %v", id, err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func generate(t *testing.T, m llm.Model, req llm.Request, opts ...llm.Option) *llm.Response {
|
||||
t.Helper()
|
||||
resp, err := m.Generate(context.Background(), req, opts...)
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
func TestRequestHeadersAndPath(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
generate(t, m, llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
|
||||
if c.method != http.MethodPost {
|
||||
t.Errorf("method = %q, want POST", c.method)
|
||||
}
|
||||
if c.path != "/v1/messages" {
|
||||
t.Errorf("path = %q, want /v1/messages", c.path)
|
||||
}
|
||||
for header, want := range map[string]string{
|
||||
"x-api-key": "test-key",
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
} {
|
||||
if got := c.header.Get(header); got != want {
|
||||
t.Errorf("header %s = %q, want %q", header, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSystemFold(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
generate(t, m, llm.Request{
|
||||
System: "base prompt",
|
||||
Messages: []llm.Message{
|
||||
llm.SystemText("first extra"),
|
||||
llm.UserText("hi"),
|
||||
llm.SystemText("second extra"),
|
||||
},
|
||||
})
|
||||
|
||||
body := c.bodyMap(t)
|
||||
if got, want := body["system"], "base prompt\n\nfirst extra\n\nsecond extra"; got != want {
|
||||
t.Errorf("system = %q, want %q", got, want)
|
||||
}
|
||||
msgs := body["messages"].([]any)
|
||||
if len(msgs) != 1 {
|
||||
t.Fatalf("messages length = %d, want 1 (system messages must be excluded)", len(msgs))
|
||||
}
|
||||
if role := msgs[0].(map[string]any)["role"]; role != "user" {
|
||||
t.Errorf("remaining message role = %q, want user", role)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoSystemOmitsField(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
generate(t, m, llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
|
||||
if _, ok := c.bodyMap(t)["system"]; ok {
|
||||
t.Error("system key present, want omitted when empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaxTokens(t *testing.T) {
|
||||
t.Run("default 4096", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if got := c.bodyMap(t)["max_tokens"].(float64); got != 4096 {
|
||||
t.Errorf("max_tokens = %v, want 4096", got)
|
||||
}
|
||||
})
|
||||
t.Run("explicit wins", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{
|
||||
Messages: []llm.Message{llm.UserText("hi")},
|
||||
MaxTokens: 123,
|
||||
})
|
||||
if got := c.bodyMap(t)["max_tokens"].(float64); got != 123 {
|
||||
t.Errorf("max_tokens = %v, want 123", got)
|
||||
}
|
||||
})
|
||||
t.Run("WithDefaultMaxTokens overrides default", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody), WithDefaultMaxTokens(99))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if got := c.bodyMap(t)["max_tokens"].(float64); got != 99 {
|
||||
t.Errorf("max_tokens = %v, want 99", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestImageBlock(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
raw := []byte{0x01, 0x02, 0x03}
|
||||
generate(t, m, llm.Request{Messages: []llm.Message{
|
||||
llm.UserParts(llm.Text("look at this"), llm.Image("image/png", raw)),
|
||||
}})
|
||||
|
||||
msgs := c.bodyMap(t)["messages"].([]any)
|
||||
content := msgs[0].(map[string]any)["content"].([]any)
|
||||
if len(content) != 2 {
|
||||
t.Fatalf("content blocks = %d, want 2", len(content))
|
||||
}
|
||||
img := content[1].(map[string]any)
|
||||
if img["type"] != "image" {
|
||||
t.Fatalf("block type = %v, want image", img["type"])
|
||||
}
|
||||
src := img["source"].(map[string]any)
|
||||
if src["type"] != "base64" {
|
||||
t.Errorf("source type = %v, want base64", src["type"])
|
||||
}
|
||||
if src["media_type"] != "image/png" {
|
||||
t.Errorf("media_type = %v, want image/png", src["media_type"])
|
||||
}
|
||||
if want := base64.StdEncoding.EncodeToString(raw); src["data"] != want {
|
||||
t.Errorf("data = %v, want %q", src["data"], want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolUseToolResultRoundTrip(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
generate(t, m, llm.Request{Messages: []llm.Message{
|
||||
llm.UserText("weather?"),
|
||||
{
|
||||
Role: llm.RoleAssistant,
|
||||
Parts: []llm.Part{llm.Text("checking")},
|
||||
ToolCalls: []llm.ToolCall{
|
||||
{ID: "toolu_1", Name: "get_weather", Arguments: json.RawMessage(`{"location":"Paris"}`)},
|
||||
{ID: "toolu_2", Name: "noop"}, // empty args must become {}
|
||||
},
|
||||
},
|
||||
llm.ToolResultsMessage(
|
||||
llm.ToolResult{ID: "toolu_1", Name: "get_weather", Content: "72F and sunny"},
|
||||
llm.ToolResult{ID: "toolu_2", Name: "noop", Content: "boom", IsError: true},
|
||||
),
|
||||
}})
|
||||
|
||||
msgs := c.bodyMap(t)["messages"].([]any)
|
||||
if len(msgs) != 3 {
|
||||
t.Fatalf("messages = %d, want 3", len(msgs))
|
||||
}
|
||||
|
||||
asst := msgs[1].(map[string]any)
|
||||
if asst["role"] != "assistant" {
|
||||
t.Errorf("messages[1].role = %v, want assistant", asst["role"])
|
||||
}
|
||||
asstContent := asst["content"].([]any)
|
||||
if len(asstContent) != 3 {
|
||||
t.Fatalf("assistant blocks = %d, want 3 (text + 2 tool_use)", len(asstContent))
|
||||
}
|
||||
tu := asstContent[1].(map[string]any)
|
||||
if tu["type"] != "tool_use" || tu["id"] != "toolu_1" || tu["name"] != "get_weather" {
|
||||
t.Errorf("tool_use block = %v", tu)
|
||||
}
|
||||
if loc := tu["input"].(map[string]any)["location"]; loc != "Paris" {
|
||||
t.Errorf("tool_use input.location = %v, want Paris", loc)
|
||||
}
|
||||
if input := asstContent[2].(map[string]any)["input"].(map[string]any); len(input) != 0 {
|
||||
t.Errorf("empty-args tool_use input = %v, want {}", input)
|
||||
}
|
||||
|
||||
// RoleTool → ONE user message with one tool_result block per result.
|
||||
toolMsg := msgs[2].(map[string]any)
|
||||
if toolMsg["role"] != "user" {
|
||||
t.Errorf("messages[2].role = %v, want user", toolMsg["role"])
|
||||
}
|
||||
results := toolMsg["content"].([]any)
|
||||
if len(results) != 2 {
|
||||
t.Fatalf("tool_result blocks = %d, want 2", len(results))
|
||||
}
|
||||
first := results[0].(map[string]any)
|
||||
if first["type"] != "tool_result" || first["tool_use_id"] != "toolu_1" || first["content"] != "72F and sunny" {
|
||||
t.Errorf("first tool_result = %v", first)
|
||||
}
|
||||
if _, ok := first["is_error"]; ok {
|
||||
t.Error("first tool_result has is_error, want omitted when false")
|
||||
}
|
||||
second := results[1].(map[string]any)
|
||||
if second["tool_use_id"] != "toolu_2" || second["is_error"] != true {
|
||||
t.Errorf("second tool_result = %v, want is_error true", second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolDefinitions(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
schema := json.RawMessage(`{"type":"object","properties":{"q":{"type":"string"}},"required":["q"]}`)
|
||||
generate(t, m, llm.Request{
|
||||
Messages: []llm.Message{llm.UserText("hi")},
|
||||
Tools: []llm.Tool{
|
||||
{Name: "search", Description: "Search the web.", Parameters: schema},
|
||||
{Name: "ping"}, // nil Parameters → default empty object schema
|
||||
},
|
||||
})
|
||||
|
||||
tools := c.bodyMap(t)["tools"].([]any)
|
||||
if len(tools) != 2 {
|
||||
t.Fatalf("tools = %d, want 2", len(tools))
|
||||
}
|
||||
search := tools[0].(map[string]any)
|
||||
if search["name"] != "search" || search["description"] != "Search the web." {
|
||||
t.Errorf("tool[0] = %v", search)
|
||||
}
|
||||
if typ := search["input_schema"].(map[string]any)["type"]; typ != "object" {
|
||||
t.Errorf("input_schema.type = %v, want object", typ)
|
||||
}
|
||||
ping := tools[1].(map[string]any)
|
||||
if typ := ping["input_schema"].(map[string]any)["type"]; typ != "object" {
|
||||
t.Errorf("nil-Parameters input_schema.type = %v, want object", typ)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolChoiceForms(t *testing.T) {
|
||||
cases := []struct {
|
||||
choice string
|
||||
wantType string // "" means the field must be absent
|
||||
wantName string
|
||||
}{
|
||||
{choice: "", wantType: ""},
|
||||
{choice: "auto", wantType: "auto"},
|
||||
{choice: "required", wantType: "any"},
|
||||
{choice: "none", wantType: "none"},
|
||||
{choice: "get_weather", wantType: "tool", wantName: "get_weather"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run("choice="+tc.choice, func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{
|
||||
Messages: []llm.Message{llm.UserText("hi")},
|
||||
ToolChoice: tc.choice,
|
||||
})
|
||||
body := c.bodyMap(t)
|
||||
raw, present := body["tool_choice"]
|
||||
if tc.wantType == "" {
|
||||
if present {
|
||||
t.Fatalf("tool_choice present (%v), want omitted", raw)
|
||||
}
|
||||
return
|
||||
}
|
||||
choice := raw.(map[string]any)
|
||||
if choice["type"] != tc.wantType {
|
||||
t.Errorf("tool_choice.type = %v, want %q", choice["type"], tc.wantType)
|
||||
}
|
||||
if tc.wantName != "" && choice["name"] != tc.wantName {
|
||||
t.Errorf("tool_choice.name = %v, want %q", choice["name"], tc.wantName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOutputConfigFormat(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
m := mustModel(t, p, "claude-test")
|
||||
|
||||
schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}},"required":["name"],"additionalProperties":false}`)
|
||||
generate(t, m, llm.Request{Messages: []llm.Message{llm.UserText("hi")}},
|
||||
llm.WithSchema(schema, "person"))
|
||||
|
||||
body := c.bodyMap(t)
|
||||
format := body["output_config"].(map[string]any)["format"].(map[string]any)
|
||||
if format["type"] != "json_schema" {
|
||||
t.Errorf("output_config.format.type = %v, want json_schema", format["type"])
|
||||
}
|
||||
// Normalize both sides through any → Marshal (sorted keys) to compare.
|
||||
got, _ := json.Marshal(format["schema"])
|
||||
var want any
|
||||
_ = json.Unmarshal(schema, &want)
|
||||
wantJSON, _ := json.Marshal(want)
|
||||
if string(got) != string(wantJSON) {
|
||||
t.Errorf("schema = %s, want %s", got, wantJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOutputConfigOmittedWithoutSchema(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if _, ok := c.bodyMap(t)["output_config"]; ok {
|
||||
t.Error("output_config present, want omitted when Schema is nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSamplingKnobs(t *testing.T) {
|
||||
t.Run("omitted when unset", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
body := c.bodyMap(t)
|
||||
if _, ok := body["temperature"]; ok {
|
||||
t.Error("temperature present, want omitted when unset")
|
||||
}
|
||||
if _, ok := body["top_p"]; ok {
|
||||
t.Error("top_p present, want omitted when unset")
|
||||
}
|
||||
if _, ok := body["stop_sequences"]; ok {
|
||||
t.Error("stop_sequences present, want omitted when unset")
|
||||
}
|
||||
})
|
||||
t.Run("present when set", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}},
|
||||
llm.WithTemperature(0), // explicit zero must still be sent
|
||||
llm.WithTopP(0.9),
|
||||
llm.WithStopSequences("END"))
|
||||
body := c.bodyMap(t)
|
||||
if got, ok := body["temperature"]; !ok || got.(float64) != 0 {
|
||||
t.Errorf("temperature = %v (present=%v), want explicit 0", got, ok)
|
||||
}
|
||||
if got := body["top_p"].(float64); got != 0.9 {
|
||||
t.Errorf("top_p = %v, want 0.9", got)
|
||||
}
|
||||
stops := body["stop_sequences"].([]any)
|
||||
if len(stops) != 1 || stops[0] != "END" {
|
||||
t.Errorf("stop_sequences = %v, want [END]", stops)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestStreamFieldOmittedOnGenerate(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if _, ok := c.bodyMap(t)["stream"]; ok {
|
||||
t.Error("stream key present on Generate, want omitted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponseParse(t *testing.T) {
|
||||
const body = `{
|
||||
"id": "msg_02",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"model": "claude-test",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "pondering...", "signature": "sig"},
|
||||
{"type": "text", "text": "I'll check the weather."},
|
||||
{"type": "tool_use", "id": "toolu_9", "name": "get_weather", "input": {"location": "Paris"}}
|
||||
],
|
||||
"stop_reason": "tool_use",
|
||||
"usage": {
|
||||
"input_tokens": 3,
|
||||
"output_tokens": 7,
|
||||
"cache_creation_input_tokens": 10,
|
||||
"cache_read_input_tokens": 20
|
||||
}
|
||||
}`
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, body))
|
||||
resp := generate(t, mustModel(t, p, "claude-test"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
|
||||
if len(resp.Parts) != 1 {
|
||||
t.Fatalf("parts = %d, want 1 (thinking blocks must be skipped)", len(resp.Parts))
|
||||
}
|
||||
if got := resp.Text(); got != "I'll check the weather." {
|
||||
t.Errorf("text = %q", got)
|
||||
}
|
||||
if len(resp.ToolCalls) != 1 {
|
||||
t.Fatalf("tool calls = %d, want 1", len(resp.ToolCalls))
|
||||
}
|
||||
call := resp.ToolCalls[0]
|
||||
if call.ID != "toolu_9" || call.Name != "get_weather" {
|
||||
t.Errorf("tool call = %+v", call)
|
||||
}
|
||||
var args map[string]any
|
||||
if err := json.Unmarshal(call.Arguments, &args); err != nil || args["location"] != "Paris" {
|
||||
t.Errorf("arguments = %s (err %v), want location Paris", call.Arguments, err)
|
||||
}
|
||||
if resp.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("finish = %q, want %q", resp.FinishReason, llm.FinishToolCalls)
|
||||
}
|
||||
// Total real input = input + cache_creation + cache_read.
|
||||
if resp.Usage.InputTokens != 33 || resp.Usage.OutputTokens != 7 {
|
||||
t.Errorf("usage = %+v, want {33 7}", resp.Usage)
|
||||
}
|
||||
if resp.Model != "anthropic/claude-test" {
|
||||
t.Errorf("model = %q, want anthropic/claude-test", resp.Model)
|
||||
}
|
||||
if resp.Raw == nil {
|
||||
t.Error("Raw = nil, want wire response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStopReasonMapping(t *testing.T) {
|
||||
cases := map[string]llm.FinishReason{
|
||||
"end_turn": llm.FinishStop,
|
||||
"stop_sequence": llm.FinishStop,
|
||||
"max_tokens": llm.FinishLength,
|
||||
"model_context_window_exceeded": llm.FinishLength,
|
||||
"tool_use": llm.FinishToolCalls,
|
||||
"refusal": llm.FinishContentFilter,
|
||||
"pause_turn": llm.FinishOther,
|
||||
"some_future_reason": llm.FinishOther,
|
||||
}
|
||||
for stop, want := range cases {
|
||||
if got := mapStopReason(stop); got != want {
|
||||
t.Errorf("mapStopReason(%q) = %q, want %q", stop, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPErrorMapping(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
status int
|
||||
body string
|
||||
wantCode string
|
||||
wantClass llm.ErrorClass
|
||||
}{
|
||||
{
|
||||
name: "429 rate limit is transient",
|
||||
status: http.StatusTooManyRequests,
|
||||
body: `{"type":"error","error":{"type":"rate_limit_error","message":"slow down"}}`,
|
||||
wantCode: "rate_limit_error", wantClass: llm.ClassTransient,
|
||||
},
|
||||
{
|
||||
name: "529 overloaded is transient",
|
||||
status: 529,
|
||||
body: `{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}`,
|
||||
wantCode: "overloaded_error", wantClass: llm.ClassTransient,
|
||||
},
|
||||
{
|
||||
name: "401 auth is permanent",
|
||||
status: http.StatusUnauthorized,
|
||||
body: `{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}`,
|
||||
wantCode: "authentication_error", wantClass: llm.ClassPermanent,
|
||||
},
|
||||
{
|
||||
name: "404 is permanent",
|
||||
status: http.StatusNotFound,
|
||||
body: `{"type":"error","error":{"type":"not_found_error","message":"model: nope"}}`,
|
||||
wantCode: "not_found_error", wantClass: llm.ClassPermanent,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(tc.status, tc.body))
|
||||
_, err := mustModel(t, p, "claude-test").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err == nil {
|
||||
t.Fatal("Generate succeeded, want error")
|
||||
}
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("error %T (%v), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Provider != "anthropic" || apiErr.Model != "claude-test" {
|
||||
t.Errorf("provider/model = %s/%s", apiErr.Provider, apiErr.Model)
|
||||
}
|
||||
if apiErr.Status != tc.status {
|
||||
t.Errorf("status = %d, want %d", apiErr.Status, tc.status)
|
||||
}
|
||||
if apiErr.Code != tc.wantCode {
|
||||
t.Errorf("code = %q, want %q", apiErr.Code, tc.wantCode)
|
||||
}
|
||||
if apiErr.Message == "" {
|
||||
t.Error("message empty, want provider message")
|
||||
}
|
||||
if got := llm.Classify(err); got != tc.wantClass {
|
||||
t.Errorf("Classify = %v, want %v", got, tc.wantClass)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("404 unwraps to ErrModelNotFound", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusNotFound,
|
||||
`{"type":"error","error":{"type":"not_found_error","message":"model: nope"}}`))
|
||||
_, err := mustModel(t, p, "missing").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if !errors.Is(err, llm.ErrModelNotFound) {
|
||||
t.Errorf("errors.Is(err, ErrModelNotFound) = false for %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("non-JSON error body falls back to raw text", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusBadGateway, "upstream exploded"))
|
||||
_, err := mustModel(t, p, "m").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("error %T, want *llm.APIError", err)
|
||||
}
|
||||
if apiErr.Status != http.StatusBadGateway || apiErr.Message != "upstream exploded" {
|
||||
t.Errorf("apiErr = %+v", apiErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestMissingAPIKey(t *testing.T) {
|
||||
t.Setenv("ANTHROPIC_API_KEY", "") // isolate from any real environment
|
||||
|
||||
var c capture
|
||||
srv := httptest.NewServer(c.handler(http.StatusOK, okBody))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
p := New(WithBaseURL(srv.URL)) // construction must not fail
|
||||
_, err := mustModel(t, p, "claude-test").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("error %T (%v), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusUnauthorized || apiErr.Code != "authentication_error" {
|
||||
t.Errorf("apiErr = %+v, want 401 authentication_error", apiErr)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassPermanent {
|
||||
t.Error("missing key must classify permanent")
|
||||
}
|
||||
if c.hits != 0 {
|
||||
t.Errorf("server hits = %d, want 0 (no request without a key)", c.hits)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIKeyFromEnv(t *testing.T) {
|
||||
t.Setenv("ANTHROPIC_API_KEY", "env-key")
|
||||
|
||||
var c capture
|
||||
srv := httptest.NewServer(c.handler(http.StatusOK, okBody))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
p := New(WithBaseURL(srv.URL))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if got := c.header.Get("x-api-key"); got != "env-key" {
|
||||
t.Errorf("x-api-key = %q, want env-key", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCapabilityEnforcement(t *testing.T) {
|
||||
img := func(mime string, n int) llm.Part { return llm.Image(mime, make([]byte, n)) }
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
caps *llm.Capabilities // nil = provider defaults
|
||||
req llm.Request
|
||||
}{
|
||||
{
|
||||
name: "images unsupported",
|
||||
caps: &llm.Capabilities{}, // MaxImagesPerReq 0 = no images
|
||||
req: llm.Request{Messages: []llm.Message{llm.UserParts(img("image/png", 4))}},
|
||||
},
|
||||
{
|
||||
name: "too many images",
|
||||
caps: &llm.Capabilities{MaxImagesPerReq: 1},
|
||||
req: llm.Request{Messages: []llm.Message{
|
||||
llm.UserParts(img("image/png", 4), img("image/png", 4)),
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "disallowed MIME",
|
||||
req: llm.Request{Messages: []llm.Message{llm.UserParts(img("image/bmp", 4))}},
|
||||
},
|
||||
{
|
||||
name: "image too large",
|
||||
caps: &llm.Capabilities{MaxImagesPerReq: 1, MaxImageBytes: 2},
|
||||
req: llm.Request{Messages: []llm.Message{llm.UserParts(img("image/png", 3))}},
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
var opts []llm.ModelOption
|
||||
if tc.caps != nil {
|
||||
opts = append(opts, llm.WithCapabilities(*tc.caps))
|
||||
}
|
||||
m := mustModel(t, p, "claude-test", opts...)
|
||||
|
||||
_, err := m.Generate(context.Background(), tc.req)
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Errorf("Generate err = %v, want ErrUnsupported", err)
|
||||
}
|
||||
_, err = m.Stream(context.Background(), tc.req)
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Errorf("Stream err = %v, want ErrUnsupported", err)
|
||||
}
|
||||
if c.hits != 0 {
|
||||
t.Errorf("server hits = %d, want 0 (rejected before sending)", c.hits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("within limits passes", func(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody))
|
||||
generate(t, mustModel(t, p, "m"), llm.Request{
|
||||
Messages: []llm.Message{llm.UserParts(llm.Text("ok"), img("image/jpeg", 16))},
|
||||
})
|
||||
if c.hits != 1 {
|
||||
t.Errorf("server hits = %d, want 1", c.hits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompatEndpointWithNameAndBaseURL(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(http.StatusOK, okBody), WithName("compat"))
|
||||
if p.Name() != "compat" {
|
||||
t.Errorf("Name() = %q, want compat", p.Name())
|
||||
}
|
||||
resp := generate(t, mustModel(t, p, "claude-test"), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if resp.Model != "compat/claude-test" {
|
||||
t.Errorf("resp.Model = %q, want compat/claude-test", resp.Model)
|
||||
}
|
||||
|
||||
var ec capture
|
||||
pe := newTestProvider(t, ec.handler(http.StatusTooManyRequests,
|
||||
`{"type":"error","error":{"type":"rate_limit_error","message":"x"}}`), WithName("compat"))
|
||||
_, err := mustModel(t, pe, "m").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok || apiErr.Provider != "compat" {
|
||||
t.Errorf("error provider = %v, want compat (err %v)", apiErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCapabilitiesDefaultsAndOverrides(t *testing.T) {
|
||||
p := New(WithAPIKey("k"))
|
||||
m := mustModel(t, p, "m")
|
||||
caps := m.Capabilities()
|
||||
if !caps.SupportsTools || !caps.SupportsStructured || !caps.SupportsStreaming {
|
||||
t.Errorf("default feature flags = %+v, want all true", caps)
|
||||
}
|
||||
if caps.MaxImagesPerReq != 100 || caps.MaxImageBytes != 10<<20 || caps.MaxImageDimension != 8000 {
|
||||
t.Errorf("default image limits = %+v", caps)
|
||||
}
|
||||
wantMIME := []string{"image/jpeg", "image/png", "image/gif", "image/webp"}
|
||||
if len(caps.AllowedImageMIME) != len(wantMIME) {
|
||||
t.Fatalf("AllowedImageMIME = %v, want %v", caps.AllowedImageMIME, wantMIME)
|
||||
}
|
||||
for i, mime := range wantMIME {
|
||||
if caps.AllowedImageMIME[i] != mime {
|
||||
t.Errorf("AllowedImageMIME[%d] = %q, want %q", i, caps.AllowedImageMIME[i], mime)
|
||||
}
|
||||
}
|
||||
|
||||
custom := llm.Capabilities{SupportsStreaming: true, MaxImagesPerReq: 1}
|
||||
p2 := New(WithAPIKey("k"), WithDefaultCapabilities(custom))
|
||||
if got := mustModel(t, p2, "m").Capabilities(); got.MaxImagesPerReq != 1 || got.SupportsTools {
|
||||
t.Errorf("WithDefaultCapabilities not applied: %+v", got)
|
||||
}
|
||||
|
||||
perModel := llm.Capabilities{SupportsTools: true}
|
||||
if got := mustModel(t, p2, "m", llm.WithCapabilities(perModel)).Capabilities(); !got.SupportsTools || got.MaxImagesPerReq != 0 {
|
||||
t.Errorf("per-model capabilities not applied: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransportErrorNotAPIError(t *testing.T) {
|
||||
// Point at a server that is immediately closed: the connection failure
|
||||
// must surface as a wrapped transport error, not *llm.APIError.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {}))
|
||||
url := srv.URL
|
||||
srv.Close()
|
||||
|
||||
p := New(WithAPIKey("k"), WithBaseURL(url))
|
||||
_, err := mustModel(t, p, "m").Generate(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err == nil {
|
||||
t.Fatal("Generate succeeded, want transport error")
|
||||
}
|
||||
if _, ok := errors.AsType[*llm.APIError](err); ok {
|
||||
t.Errorf("transport error wrapped in APIError: %v", err)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassTransient {
|
||||
t.Errorf("connection failure must classify transient: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// wireStreamEvent is the union of all SSE data payloads the Messages API
|
||||
// emits. Dispatch is on Type (the data always carries one), so the SSE
|
||||
// "event:" line is informational only.
|
||||
type wireStreamEvent struct {
|
||||
Type string `json:"type"`
|
||||
Index int `json:"index"`
|
||||
|
||||
// message_start
|
||||
Message *struct {
|
||||
Usage wireUsage `json:"usage"`
|
||||
} `json:"message"`
|
||||
|
||||
// content_block_start
|
||||
ContentBlock *struct {
|
||||
Type string `json:"type"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
} `json:"content_block"`
|
||||
|
||||
// content_block_delta / message_delta
|
||||
Delta struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
PartialJSON string `json:"partial_json"`
|
||||
StopReason string `json:"stop_reason"`
|
||||
} `json:"delta"`
|
||||
|
||||
// message_delta
|
||||
Usage *wireUsage `json:"usage"`
|
||||
|
||||
// error
|
||||
Error *struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
// stream adapts the Messages API SSE stream to llm.Stream.
|
||||
//
|
||||
// Why single-threaded pull (no reader goroutine): Next is already the
|
||||
// consumer's pull point, so parsing lazily inside Next keeps cancellation,
|
||||
// buffering, and error propagation trivial — Close just closes the body and
|
||||
// the next read fails.
|
||||
type stream struct {
|
||||
provider string
|
||||
model string
|
||||
full string // provider/model
|
||||
body io.ReadCloser
|
||||
scanner *bufio.Scanner
|
||||
|
||||
// accumulated response
|
||||
parts []llm.Part
|
||||
toolCalls []llm.ToolCall
|
||||
usage llm.Usage
|
||||
finish llm.FinishReason
|
||||
|
||||
// current content block state
|
||||
blockType string
|
||||
textBuf strings.Builder
|
||||
toolID string
|
||||
toolName string
|
||||
argsBuf strings.Builder
|
||||
|
||||
done bool // final Response event emitted
|
||||
closeOnce sync.Once
|
||||
closeErr error
|
||||
}
|
||||
|
||||
func newStream(m *model, body io.ReadCloser) *stream {
|
||||
sc := bufio.NewScanner(body)
|
||||
// Why a large limit: one SSE line carries one whole delta; default 64K
|
||||
// can be exceeded by large structured-output or tool-argument deltas.
|
||||
sc.Buffer(make([]byte, 0, 64*1024), 10*1024*1024)
|
||||
return &stream{
|
||||
provider: m.provider.name,
|
||||
model: m.id,
|
||||
full: m.fullName(),
|
||||
body: body,
|
||||
scanner: sc,
|
||||
finish: llm.FinishOther,
|
||||
}
|
||||
}
|
||||
|
||||
// Close implements llm.Stream. Safe to call at any time and more than once.
|
||||
func (s *stream) Close() error {
|
||||
s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
|
||||
return s.closeErr
|
||||
}
|
||||
|
||||
// Next implements llm.Stream. It emits TextDelta fragments as they arrive,
|
||||
// fully-assembled ToolCalls at content_block_stop, exactly one final
|
||||
// Response event at message_stop, then io.EOF.
|
||||
func (s *stream) Next() (llm.StreamEvent, error) {
|
||||
if s.done {
|
||||
return llm.StreamEvent{}, io.EOF
|
||||
}
|
||||
for {
|
||||
data, err := s.nextData()
|
||||
if err != nil {
|
||||
return llm.StreamEvent{}, err
|
||||
}
|
||||
var ev wireStreamEvent
|
||||
if err := json.Unmarshal([]byte(data), &ev); err != nil {
|
||||
return llm.StreamEvent{}, fmt.Errorf("%s: decode stream event: %w", s.provider, err)
|
||||
}
|
||||
|
||||
switch ev.Type {
|
||||
case "message_start":
|
||||
if ev.Message != nil {
|
||||
s.usage = ev.Message.Usage.toUsage()
|
||||
}
|
||||
|
||||
case "content_block_start":
|
||||
s.blockType = ""
|
||||
s.textBuf.Reset()
|
||||
s.argsBuf.Reset()
|
||||
if ev.ContentBlock != nil {
|
||||
s.blockType = ev.ContentBlock.Type
|
||||
if s.blockType == "tool_use" {
|
||||
s.toolID = ev.ContentBlock.ID
|
||||
s.toolName = ev.ContentBlock.Name
|
||||
}
|
||||
}
|
||||
|
||||
case "content_block_delta":
|
||||
switch ev.Delta.Type {
|
||||
case "text_delta":
|
||||
s.textBuf.WriteString(ev.Delta.Text)
|
||||
return llm.StreamEvent{TextDelta: ev.Delta.Text}, nil
|
||||
case "input_json_delta":
|
||||
// Buffer partial JSON internally; consumers never see it.
|
||||
s.argsBuf.WriteString(ev.Delta.PartialJSON)
|
||||
default:
|
||||
// thinking_delta / signature_delta: tolerated, skipped.
|
||||
}
|
||||
|
||||
case "content_block_stop":
|
||||
if event, ok := s.finishBlock(); ok {
|
||||
return event, nil
|
||||
}
|
||||
|
||||
case "message_delta":
|
||||
if ev.Delta.StopReason != "" {
|
||||
s.finish = mapStopReason(ev.Delta.StopReason)
|
||||
}
|
||||
if ev.Usage != nil {
|
||||
// Output tokens arrive cumulatively in the final delta;
|
||||
// input tokens were reported in message_start.
|
||||
s.usage.OutputTokens = ev.Usage.OutputTokens
|
||||
}
|
||||
|
||||
case "message_stop":
|
||||
s.done = true
|
||||
return llm.StreamEvent{Response: &llm.Response{
|
||||
Parts: s.parts,
|
||||
ToolCalls: s.toolCalls,
|
||||
FinishReason: s.finish,
|
||||
Usage: s.usage,
|
||||
Model: s.full,
|
||||
}}, nil
|
||||
|
||||
case "error":
|
||||
// Mid-stream failure after the 200 (e.g. overloaded_error).
|
||||
// Status stays 0: there is no HTTP status for it, and the
|
||||
// default Classify treats it as transient, which fits overload.
|
||||
apiErr := &llm.APIError{Provider: s.provider, Model: s.model}
|
||||
if ev.Error != nil {
|
||||
apiErr.Code = ev.Error.Type
|
||||
apiErr.Message = ev.Error.Message
|
||||
}
|
||||
return llm.StreamEvent{}, apiErr
|
||||
|
||||
default:
|
||||
// ping and unknown event types: ignored.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finishBlock closes out the current content block, appending its result to
|
||||
// the accumulated response. Tool-use blocks produce a stream event.
|
||||
func (s *stream) finishBlock() (llm.StreamEvent, bool) {
|
||||
defer func() {
|
||||
s.blockType = ""
|
||||
s.textBuf.Reset()
|
||||
s.argsBuf.Reset()
|
||||
}()
|
||||
switch s.blockType {
|
||||
case "text":
|
||||
if s.textBuf.Len() > 0 {
|
||||
s.parts = append(s.parts, llm.TextPart{Text: s.textBuf.String()})
|
||||
}
|
||||
case "tool_use":
|
||||
args := s.argsBuf.String()
|
||||
if args == "" {
|
||||
// A tool called with no arguments streams zero (or empty)
|
||||
// input_json_delta fragments; the canonical form is "{}".
|
||||
args = "{}"
|
||||
}
|
||||
call := llm.ToolCall{ID: s.toolID, Name: s.toolName, Arguments: json.RawMessage(args)}
|
||||
s.toolCalls = append(s.toolCalls, call)
|
||||
return llm.StreamEvent{ToolCall: &call}, true
|
||||
}
|
||||
return llm.StreamEvent{}, false
|
||||
}
|
||||
|
||||
// nextData reads SSE lines until one complete event's data is assembled
|
||||
// (multi-line data fields are joined with "\n" per the SSE spec). "event:"
|
||||
// lines and comments are ignored; dispatch keys off the JSON "type" field.
|
||||
func (s *stream) nextData() (string, error) {
|
||||
var data strings.Builder
|
||||
for s.scanner.Scan() {
|
||||
line := s.scanner.Text()
|
||||
if line == "" {
|
||||
if data.Len() > 0 {
|
||||
return data.String(), nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
if rest, ok := strings.CutPrefix(line, "data:"); ok {
|
||||
if data.Len() > 0 {
|
||||
data.WriteByte('\n')
|
||||
}
|
||||
data.WriteString(strings.TrimPrefix(rest, " "))
|
||||
}
|
||||
}
|
||||
if err := s.scanner.Err(); err != nil {
|
||||
return "", fmt.Errorf("%s: read stream: %w", s.provider, err)
|
||||
}
|
||||
if data.Len() > 0 {
|
||||
return data.String(), nil
|
||||
}
|
||||
// EOF before message_stop: the connection dropped mid-response.
|
||||
return "", fmt.Errorf("%s: stream ended before message_stop: %w", s.provider, io.ErrUnexpectedEOF)
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// sse joins data payloads into an SSE body. Each payload becomes one event
|
||||
// ("event:" name derived from the JSON type field is what the real API
|
||||
// sends, but the client dispatches on the data, so a generic name is fine).
|
||||
func sse(payloads ...string) string {
|
||||
var b strings.Builder
|
||||
for _, p := range payloads {
|
||||
b.WriteString("event: event\n")
|
||||
b.WriteString("data: ")
|
||||
b.WriteString(p)
|
||||
b.WriteString("\n\n")
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func sseServer(t *testing.T, c *capture, body string) *Provider {
|
||||
t.Helper()
|
||||
return newTestProvider(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
raw, _ := io.ReadAll(r.Body)
|
||||
c.mu.Lock()
|
||||
c.hits++
|
||||
c.header = r.Header.Clone()
|
||||
c.body = raw
|
||||
c.mu.Unlock()
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
_, _ = io.WriteString(w, body)
|
||||
}))
|
||||
}
|
||||
|
||||
// drain collects all events until io.EOF, failing the test on any error.
|
||||
func drain(t *testing.T, s llm.Stream) []llm.StreamEvent {
|
||||
t.Helper()
|
||||
var events []llm.StreamEvent
|
||||
for {
|
||||
ev, err := s.Next()
|
||||
if err == io.EOF {
|
||||
return events
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Next: %v", err)
|
||||
}
|
||||
events = append(events, ev)
|
||||
}
|
||||
}
|
||||
|
||||
func openStream(t *testing.T, p *Provider, modelID string) llm.Stream {
|
||||
t.Helper()
|
||||
s, err := mustModel(t, p, modelID).Stream(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = s.Close() })
|
||||
return s
|
||||
}
|
||||
|
||||
func TestStreamTextDeltas(t *testing.T) {
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","type":"message","role":"assistant","content":[],"model":"m","usage":{"input_tokens":10,"cache_creation_input_tokens":2,"cache_read_input_tokens":3,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"ping"}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hel"}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"lo"}}`,
|
||||
`{"type":"content_block_stop","index":0}`,
|
||||
`{"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" world"}}`,
|
||||
`{"type":"content_block_stop","index":1}`,
|
||||
`{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":12}}`,
|
||||
`{"type":"message_stop"}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
s := openStream(t, p, "claude-test")
|
||||
events := drain(t, s)
|
||||
|
||||
if len(events) != 4 {
|
||||
t.Fatalf("events = %d, want 4 (3 deltas + final response)", len(events))
|
||||
}
|
||||
for i, want := range []string{"Hel", "lo", " world"} {
|
||||
if events[i].TextDelta != want {
|
||||
t.Errorf("event[%d].TextDelta = %q, want %q", i, events[i].TextDelta, want)
|
||||
}
|
||||
}
|
||||
|
||||
final := events[3].Response
|
||||
if final == nil {
|
||||
t.Fatal("last event has no Response")
|
||||
}
|
||||
if len(final.Parts) != 2 {
|
||||
t.Fatalf("final parts = %d, want 2 (one per text block)", len(final.Parts))
|
||||
}
|
||||
if final.Text() != "Hello world" {
|
||||
t.Errorf("final text = %q, want %q", final.Text(), "Hello world")
|
||||
}
|
||||
if final.FinishReason != llm.FinishStop {
|
||||
t.Errorf("finish = %q, want stop", final.FinishReason)
|
||||
}
|
||||
// Input = 10+2+3 from message_start; output = 12 from message_delta.
|
||||
if final.Usage.InputTokens != 15 || final.Usage.OutputTokens != 12 {
|
||||
t.Errorf("usage = %+v, want {15 12}", final.Usage)
|
||||
}
|
||||
if final.Model != "anthropic/claude-test" {
|
||||
t.Errorf("model = %q, want anthropic/claude-test", final.Model)
|
||||
}
|
||||
|
||||
// Past EOF, Next keeps returning io.EOF.
|
||||
if _, err := s.Next(); err != io.EOF {
|
||||
t.Errorf("Next after EOF = %v, want io.EOF", err)
|
||||
}
|
||||
|
||||
// The request must carry "stream": true.
|
||||
if streamFlag := c.bodyMap(t)["stream"]; streamFlag != true {
|
||||
t.Errorf("request stream = %v, want true", streamFlag)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamToolCallAssembly(t *testing.T) {
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","usage":{"input_tokens":8,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Checking."}}`,
|
||||
`{"type":"content_block_stop","index":0}`,
|
||||
`{"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_9","name":"get_weather","input":{}}}`,
|
||||
`{"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}}`,
|
||||
`{"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"location\":"}}`,
|
||||
`{"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" \"San Francisco, CA\"}"}}`,
|
||||
`{"type":"content_block_stop","index":1}`,
|
||||
`{"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_10","name":"noop","input":{}}}`,
|
||||
`{"type":"content_block_stop","index":2}`,
|
||||
`{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":21}}`,
|
||||
`{"type":"message_stop"}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
events := drain(t, openStream(t, p, "claude-test"))
|
||||
|
||||
if len(events) != 4 {
|
||||
t.Fatalf("events = %d, want 4 (text, 2 tool calls, final)", len(events))
|
||||
}
|
||||
if events[0].TextDelta != "Checking." {
|
||||
t.Errorf("event[0] = %+v, want text delta", events[0])
|
||||
}
|
||||
|
||||
call := events[1].ToolCall
|
||||
if call == nil {
|
||||
t.Fatal("event[1] has no ToolCall")
|
||||
}
|
||||
if call.ID != "toolu_9" || call.Name != "get_weather" {
|
||||
t.Errorf("tool call = %+v", call)
|
||||
}
|
||||
var args map[string]any
|
||||
if err := json.Unmarshal(call.Arguments, &args); err != nil {
|
||||
t.Fatalf("assembled arguments invalid JSON: %v (%s)", err, call.Arguments)
|
||||
}
|
||||
if args["location"] != "San Francisco, CA" {
|
||||
t.Errorf("arguments = %v", args)
|
||||
}
|
||||
|
||||
empty := events[2].ToolCall
|
||||
if empty == nil || empty.ID != "toolu_10" {
|
||||
t.Fatalf("event[2] = %+v, want second tool call", events[2])
|
||||
}
|
||||
if string(empty.Arguments) != "{}" {
|
||||
t.Errorf("empty tool call arguments = %s, want {}", empty.Arguments)
|
||||
}
|
||||
|
||||
final := events[3].Response
|
||||
if final == nil {
|
||||
t.Fatal("last event has no Response")
|
||||
}
|
||||
if len(final.ToolCalls) != 2 {
|
||||
t.Errorf("final tool calls = %d, want 2", len(final.ToolCalls))
|
||||
}
|
||||
if final.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("finish = %q, want tool_calls", final.FinishReason)
|
||||
}
|
||||
if final.Text() != "Checking." {
|
||||
t.Errorf("final text = %q", final.Text())
|
||||
}
|
||||
if final.Usage.InputTokens != 8 || final.Usage.OutputTokens != 21 {
|
||||
t.Errorf("usage = %+v, want {8 21}", final.Usage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamThinkingSkipped(t *testing.T) {
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","usage":{"input_tokens":5,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hmm"}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig"}}`,
|
||||
`{"type":"content_block_stop","index":0}`,
|
||||
`{"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"hi"}}`,
|
||||
`{"type":"content_block_stop","index":1}`,
|
||||
`{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":2}}`,
|
||||
`{"type":"message_stop"}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
events := drain(t, openStream(t, p, "claude-test"))
|
||||
|
||||
if len(events) != 2 {
|
||||
t.Fatalf("events = %d, want 2 (thinking produces none)", len(events))
|
||||
}
|
||||
if events[0].TextDelta != "hi" {
|
||||
t.Errorf("event[0] = %+v, want TextDelta hi", events[0])
|
||||
}
|
||||
final := events[1].Response
|
||||
if final == nil || len(final.Parts) != 1 || final.Text() != "hi" {
|
||||
t.Errorf("final = %+v, want single text part %q", final, "hi")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamMidStreamError(t *testing.T) {
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","usage":{"input_tokens":5,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"par"}}`,
|
||||
`{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
s := openStream(t, p, "claude-test")
|
||||
|
||||
ev, err := s.Next()
|
||||
if err != nil || ev.TextDelta != "par" {
|
||||
t.Fatalf("first Next = (%+v, %v), want text delta", ev, err)
|
||||
}
|
||||
_, err = s.Next()
|
||||
if err == nil {
|
||||
t.Fatal("second Next succeeded, want mid-stream error")
|
||||
}
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("error %T (%v), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Code != "overloaded_error" || apiErr.Message != "Overloaded" || apiErr.Status != 0 {
|
||||
t.Errorf("apiErr = %+v", apiErr)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassTransient {
|
||||
t.Error("overloaded_error must classify transient")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamHTTPErrorBeforeEvents(t *testing.T) {
|
||||
var c capture
|
||||
p := newTestProvider(t, c.handler(529,
|
||||
`{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}`))
|
||||
_, err := mustModel(t, p, "claude-test").Stream(context.Background(),
|
||||
llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err == nil {
|
||||
t.Fatal("Stream succeeded, want APIError before any events")
|
||||
}
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("error %T (%v), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != 529 || apiErr.Code != "overloaded_error" {
|
||||
t.Errorf("apiErr = %+v, want 529 overloaded_error", apiErr)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassTransient {
|
||||
t.Error("529 must classify transient")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamTruncatedBody(t *testing.T) {
|
||||
// Stream ends without message_stop: Next must surface unexpected EOF.
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","usage":{"input_tokens":5,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
s := openStream(t, p, "claude-test")
|
||||
|
||||
if ev, err := s.Next(); err != nil || ev.TextDelta != "hi" {
|
||||
t.Fatalf("first Next = (%+v, %v)", ev, err)
|
||||
}
|
||||
if _, err := s.Next(); !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
t.Errorf("Next on truncated stream = %v, want io.ErrUnexpectedEOF", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamCloseIsSafe(t *testing.T) {
|
||||
body := sse(
|
||||
`{"type":"message_start","message":{"id":"msg_1","usage":{"input_tokens":5,"output_tokens":1}}}`,
|
||||
`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`,
|
||||
`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}`,
|
||||
`{"type":"content_block_stop","index":0}`,
|
||||
`{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":2}}`,
|
||||
`{"type":"message_stop"}`,
|
||||
)
|
||||
var c capture
|
||||
p := sseServer(t, &c, body)
|
||||
s := openStream(t, p, "claude-test")
|
||||
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("first Close: %v", err)
|
||||
}
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("second Close: %v", err)
|
||||
}
|
||||
|
||||
// After EOF, Close is still fine.
|
||||
s2 := openStream(t, p, "claude-test")
|
||||
drain(t, s2)
|
||||
if err := s2.Close(); err != nil {
|
||||
t.Errorf("Close after EOF: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,299 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// Wire types mirror the Messages API JSON shapes (June 2026 docs). Only the
|
||||
// fields majordomo uses are modeled; unknown response fields are ignored by
|
||||
// encoding/json.
|
||||
|
||||
type wireRequest struct {
|
||||
Model string `json:"model"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
System string `json:"system,omitempty"`
|
||||
Messages []wireMessage `json:"messages"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []wireTool `json:"tools,omitempty"`
|
||||
ToolChoice *wireToolChoice `json:"tool_choice,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
StopSequences []string `json:"stop_sequences,omitempty"`
|
||||
OutputConfig *wireOutputConfig `json:"output_config,omitempty"`
|
||||
}
|
||||
|
||||
type wireMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content []wireBlock `json:"content"`
|
||||
}
|
||||
|
||||
// wireBlock is a request-side content block. Exactly one shape is populated
|
||||
// per block, keyed by Type: text, image, tool_use, or tool_result.
|
||||
type wireBlock struct {
|
||||
Type string `json:"type"`
|
||||
|
||||
// text
|
||||
Text string `json:"text,omitempty"`
|
||||
|
||||
// image
|
||||
Source *wireImageSource `json:"source,omitempty"`
|
||||
|
||||
// tool_use
|
||||
ID string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Input json.RawMessage `json:"input,omitempty"`
|
||||
|
||||
// tool_result
|
||||
ToolUseID string `json:"tool_use_id,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
IsError bool `json:"is_error,omitempty"`
|
||||
}
|
||||
|
||||
type wireImageSource struct {
|
||||
Type string `json:"type"`
|
||||
MediaType string `json:"media_type"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type wireTool struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
InputSchema json.RawMessage `json:"input_schema"`
|
||||
}
|
||||
|
||||
type wireToolChoice struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
|
||||
type wireOutputConfig struct {
|
||||
Format *wireOutputFormat `json:"format,omitempty"`
|
||||
}
|
||||
|
||||
type wireOutputFormat struct {
|
||||
Type string `json:"type"`
|
||||
Schema json.RawMessage `json:"schema"`
|
||||
}
|
||||
|
||||
type wireResponse struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Role string `json:"role"`
|
||||
Model string `json:"model"`
|
||||
Content []wireRespBlock `json:"content"`
|
||||
StopReason string `json:"stop_reason"`
|
||||
Usage wireUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type wireRespBlock struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Input json.RawMessage `json:"input"`
|
||||
}
|
||||
|
||||
type wireUsage struct {
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
|
||||
CacheReadInputTokens int `json:"cache_read_input_tokens"`
|
||||
}
|
||||
|
||||
// toUsage maps API token accounting onto the canonical Usage. Why the sum:
|
||||
// the API's input_tokens counts only tokens after the last cache breakpoint;
|
||||
// real total input is input + cache_creation + cache_read.
|
||||
func (u wireUsage) toUsage() llm.Usage {
|
||||
return llm.Usage{
|
||||
InputTokens: u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
|
||||
OutputTokens: u.OutputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
type wireErrorEnvelope struct {
|
||||
Type string `json:"type"`
|
||||
Error struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
// buildWireRequest translates the canonical request into the Messages API
|
||||
// shape.
|
||||
//
|
||||
// Request.ReasoningEffort is intentionally ignored: the current Messages API
|
||||
// has no low/medium/high reasoning knob — thinking is adaptive on current
|
||||
// models, and the legacy budget/disable parameters 400 on them. The llm
|
||||
// contract says providers ignore ReasoningEffort where no mapping exists.
|
||||
//
|
||||
// Request.SchemaName is likewise ignored: output_config.format takes a bare
|
||||
// schema with no name field.
|
||||
func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bool) wireRequest {
|
||||
maxTokens := req.MaxTokens
|
||||
if maxTokens == 0 {
|
||||
// max_tokens is required by the API; 0 means "provider default".
|
||||
maxTokens = defaultMax
|
||||
}
|
||||
|
||||
wr := wireRequest{
|
||||
Model: modelID,
|
||||
MaxTokens: maxTokens,
|
||||
System: foldSystem(req),
|
||||
Messages: toWireMessages(req.Messages),
|
||||
Stream: stream,
|
||||
Tools: toWireTools(req.Tools),
|
||||
ToolChoice: toWireToolChoice(req.ToolChoice),
|
||||
Temperature: req.Temperature,
|
||||
TopP: req.TopP,
|
||||
StopSequences: req.StopSequences,
|
||||
}
|
||||
if req.Schema != nil {
|
||||
wr.OutputConfig = &wireOutputConfig{Format: &wireOutputFormat{
|
||||
Type: "json_schema",
|
||||
Schema: req.Schema,
|
||||
}}
|
||||
}
|
||||
return wr
|
||||
}
|
||||
|
||||
// foldSystem joins Request.System with the text of every RoleSystem message
|
||||
// (System field first, original order, "\n\n" separators). Why: the API
|
||||
// takes the system prompt as a top-level field and rejects system roles
|
||||
// inside messages, so canonical RoleSystem messages must fold in here.
|
||||
func foldSystem(req llm.Request) string {
|
||||
parts := make([]string, 0, 2)
|
||||
if req.System != "" {
|
||||
parts = append(parts, req.System)
|
||||
}
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role != llm.RoleSystem {
|
||||
continue
|
||||
}
|
||||
if text := msg.Text(); text != "" {
|
||||
parts = append(parts, text)
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, "\n\n")
|
||||
}
|
||||
|
||||
func toWireMessages(msgs []llm.Message) []wireMessage {
|
||||
out := make([]wireMessage, 0, len(msgs))
|
||||
for _, msg := range msgs {
|
||||
switch msg.Role {
|
||||
case llm.RoleSystem:
|
||||
// Folded into the top-level system field by foldSystem.
|
||||
continue
|
||||
|
||||
case llm.RoleTool:
|
||||
// One user message carrying one tool_result block per result.
|
||||
blocks := make([]wireBlock, 0, len(msg.ToolResults))
|
||||
for _, res := range msg.ToolResults {
|
||||
blocks = append(blocks, wireBlock{
|
||||
Type: "tool_result",
|
||||
ToolUseID: res.ID,
|
||||
Content: res.Content,
|
||||
IsError: res.IsError,
|
||||
})
|
||||
}
|
||||
out = append(out, wireMessage{Role: "user", Content: blocks})
|
||||
|
||||
case llm.RoleAssistant:
|
||||
blocks := toWireBlocks(msg.Parts)
|
||||
for _, call := range msg.ToolCalls {
|
||||
args := call.Arguments
|
||||
if len(args) == 0 {
|
||||
// The API requires input to be a JSON object.
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
blocks = append(blocks, wireBlock{
|
||||
Type: "tool_use",
|
||||
ID: call.ID,
|
||||
Name: call.Name,
|
||||
Input: args,
|
||||
})
|
||||
}
|
||||
out = append(out, wireMessage{Role: "assistant", Content: blocks})
|
||||
|
||||
default: // llm.RoleUser and anything unrecognized
|
||||
out = append(out, wireMessage{Role: "user", Content: toWireBlocks(msg.Parts)})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func toWireBlocks(parts []llm.Part) []wireBlock {
|
||||
blocks := make([]wireBlock, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
switch p := part.(type) {
|
||||
case llm.TextPart:
|
||||
blocks = append(blocks, wireBlock{Type: "text", Text: p.Text})
|
||||
case llm.ImagePart:
|
||||
blocks = append(blocks, wireBlock{Type: "image", Source: &wireImageSource{
|
||||
Type: "base64",
|
||||
MediaType: p.MIME,
|
||||
Data: base64.StdEncoding.EncodeToString(p.Data),
|
||||
}})
|
||||
}
|
||||
}
|
||||
return blocks
|
||||
}
|
||||
|
||||
func toWireTools(tools []llm.Tool) []wireTool {
|
||||
if len(tools) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]wireTool, 0, len(tools))
|
||||
for _, t := range tools {
|
||||
schema := t.Parameters
|
||||
if len(schema) == 0 {
|
||||
// Why: input_schema is required by the API; a tool with no
|
||||
// arguments still needs an (empty) object schema.
|
||||
schema = json.RawMessage(`{"type":"object","properties":{}}`)
|
||||
}
|
||||
out = append(out, wireTool{
|
||||
Name: t.Name,
|
||||
Description: t.Description,
|
||||
InputSchema: schema,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// toWireToolChoice maps the canonical tool-choice policy. "" omits the field
|
||||
// (API default is auto); any value other than the three keywords names the
|
||||
// one tool the model must call.
|
||||
func toWireToolChoice(choice string) *wireToolChoice {
|
||||
switch choice {
|
||||
case "":
|
||||
return nil
|
||||
case "auto":
|
||||
return &wireToolChoice{Type: "auto"}
|
||||
case "required":
|
||||
return &wireToolChoice{Type: "any"}
|
||||
case "none":
|
||||
return &wireToolChoice{Type: "none"}
|
||||
default:
|
||||
return &wireToolChoice{Type: "tool", Name: choice}
|
||||
}
|
||||
}
|
||||
|
||||
// mapStopReason maps the API stop_reason onto the canonical FinishReason.
|
||||
func mapStopReason(stop string) llm.FinishReason {
|
||||
switch stop {
|
||||
case "end_turn", "stop_sequence":
|
||||
return llm.FinishStop
|
||||
case "max_tokens", "model_context_window_exceeded":
|
||||
return llm.FinishLength
|
||||
case "tool_use":
|
||||
return llm.FinishToolCalls
|
||||
case "refusal":
|
||||
return llm.FinishContentFilter
|
||||
default:
|
||||
// pause_turn and any future provider-specific reasons.
|
||||
return llm.FinishOther
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
// Package ollama implements majordomo's provider contract over Ollama's
|
||||
// native chat API (POST {base}/api/chat), targeted at three backends that
|
||||
// share one wire protocol:
|
||||
//
|
||||
// - a local Ollama instance (preset Local: OLLAMA_HOST or
|
||||
// http://localhost:11434, no auth),
|
||||
// - Ollama Cloud (preset Cloud: https://ollama.com, bearer key from
|
||||
// OLLAMA_API_KEY), and
|
||||
// - foreman, Steve's native-Ollama queue daemon (preset Foreman: explicit
|
||||
// base URL + bearer token).
|
||||
//
|
||||
// Wire surface verified against docs.ollama.com and ollama/ollama
|
||||
// docs/api.md + api/types.go (June 2026): NDJSON streaming (stream defaults
|
||||
// true server-side — Generate always sends stream:false explicitly);
|
||||
// tool_calls carry arguments as a JSON OBJECT (not a string); tool results
|
||||
// return as {"role":"tool","content",...,"tool_name"}; structured output
|
||||
// via "format" (a full JSON-schema object); thinking via the bool-or-string
|
||||
// "think" field; errors as {"error":"message"} with a non-2xx status.
|
||||
//
|
||||
// foreman deviation (verified in its source): sync /api/chat does not
|
||||
// stream — a stream:true request yields ONE buffered application/json
|
||||
// object. The NDJSON reader here handles that transparently (a single JSON
|
||||
// line parses as the final chunk).
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// DefaultLocalBaseURL is the default base URL for a locally-running Ollama.
|
||||
const DefaultLocalBaseURL = "http://localhost:11434"
|
||||
|
||||
// DefaultCloudBaseURL is the base URL for Ollama Cloud.
|
||||
const DefaultCloudBaseURL = "https://ollama.com"
|
||||
|
||||
// defaultCapabilities is the conservative provider-wide default; individual
|
||||
// models (e.g. high-resolution vision tags) override via llm.WithCapabilities.
|
||||
var defaultCapabilities = llm.Capabilities{
|
||||
SupportsTools: true,
|
||||
SupportsStructured: true,
|
||||
SupportsStreaming: true,
|
||||
MaxImagesPerReq: 8,
|
||||
MaxImageBytes: 20 << 20,
|
||||
MaxImageDimension: 2048,
|
||||
AllowedImageMIME: []string{"image/jpeg", "image/png"},
|
||||
}
|
||||
|
||||
// Provider is a native-Ollama chat client bound to one base URL.
|
||||
type Provider struct {
|
||||
name string
|
||||
baseURL string
|
||||
token string
|
||||
client *http.Client
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
// Option configures the provider.
|
||||
type Option func(*Provider)
|
||||
|
||||
// WithName overrides the registry name (default "ollama").
|
||||
func WithName(name string) Option { return func(p *Provider) { p.name = name } }
|
||||
|
||||
// WithBaseURL sets the backend base URL (scheme://host[:port][/path]).
|
||||
func WithBaseURL(u string) Option {
|
||||
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
|
||||
}
|
||||
|
||||
// WithToken sets the bearer token (Ollama Cloud key / foreman token).
|
||||
// Empty means no Authorization header (local mode).
|
||||
func WithToken(token string) Option { return func(p *Provider) { p.token = token } }
|
||||
|
||||
// WithHTTPClient overrides the HTTP client (proxies, test TLS, timeouts —
|
||||
// note foreman sync chat long-polls; prefer context deadlines over client
|
||||
// timeouts).
|
||||
func WithHTTPClient(c *http.Client) Option { return func(p *Provider) { p.client = c } }
|
||||
|
||||
// WithDefaultCapabilities overrides the provider-wide default capabilities.
|
||||
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
||||
return func(p *Provider) { p.caps = caps }
|
||||
}
|
||||
|
||||
// New creates a generic native-Ollama provider. Most callers want one of
|
||||
// the presets (Local, Cloud, Foreman) or an LLM_* env DSN instead.
|
||||
// Construction never fails; a missing base URL surfaces at request time.
|
||||
func New(opts ...Option) *Provider {
|
||||
p := &Provider{
|
||||
name: "ollama",
|
||||
client: &http.Client{},
|
||||
caps: defaultCapabilities,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Local returns the local-Ollama preset: name "ollama", base URL from
|
||||
// OLLAMA_HOST (normalized per Ollama conventions) or localhost:11434.
|
||||
func Local(opts ...Option) *Provider {
|
||||
base := DefaultLocalBaseURL
|
||||
if h := os.Getenv("OLLAMA_HOST"); h != "" {
|
||||
base = NormalizeHost(h)
|
||||
}
|
||||
return New(append([]Option{WithBaseURL(base)}, opts...)...)
|
||||
}
|
||||
|
||||
// Cloud returns the Ollama Cloud preset: name "ollama-cloud",
|
||||
// https://ollama.com, bearer key from OLLAMA_API_KEY.
|
||||
func Cloud(opts ...Option) *Provider {
|
||||
return New(append([]Option{
|
||||
WithName("ollama-cloud"),
|
||||
WithBaseURL(DefaultCloudBaseURL),
|
||||
WithToken(os.Getenv("OLLAMA_API_KEY")),
|
||||
}, opts...)...)
|
||||
}
|
||||
|
||||
// Foreman returns a foreman preset bound to the given daemon.
|
||||
func Foreman(baseURL, token string, opts ...Option) *Provider {
|
||||
return New(append([]Option{
|
||||
WithName("foreman"),
|
||||
WithBaseURL(baseURL),
|
||||
WithToken(token),
|
||||
}, opts...)...)
|
||||
}
|
||||
|
||||
// NormalizeHost turns an OLLAMA_HOST-style value into a base URL:
|
||||
// "host" → http://host:11434, "host:port" → http://host:port, full URLs
|
||||
// pass through (trailing slash trimmed).
|
||||
func NormalizeHost(h string) string {
|
||||
h = strings.TrimRight(strings.TrimSpace(h), "/")
|
||||
if strings.Contains(h, "://") {
|
||||
return h
|
||||
}
|
||||
if !strings.Contains(h, ":") {
|
||||
h += ":11434"
|
||||
}
|
||||
return "http://" + h
|
||||
}
|
||||
|
||||
// Name implements llm.Provider.
|
||||
func (p *Provider) Name() string { return p.name }
|
||||
|
||||
// BaseURL reports the configured backend base URL (diagnostics).
|
||||
func (p *Provider) BaseURL() string { return p.baseURL }
|
||||
|
||||
// Model implements llm.Provider; the id passes through verbatim.
|
||||
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
||||
cfg := llm.ApplyModelOptions(opts)
|
||||
caps := p.caps
|
||||
if cfg.Capabilities != nil {
|
||||
caps = *cfg.Capabilities
|
||||
}
|
||||
return &model{provider: p, id: id, caps: caps}, nil
|
||||
}
|
||||
|
||||
// checkReady reports a usable configuration (a base URL is the only hard
|
||||
// requirement; auth problems surface as 401s from the backend).
|
||||
func (p *Provider) checkReady() error {
|
||||
if p.baseURL == "" {
|
||||
return fmt.Errorf("ollama provider %q: no base URL configured (set one via the preset, WithBaseURL, or an LLM_* env DSN)", p.name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,492 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// capture spins up an httptest server that records the request and replies
|
||||
// with the given handler.
|
||||
type captured struct {
|
||||
auth string
|
||||
contentType string
|
||||
path string
|
||||
body map[string]any
|
||||
raw []byte
|
||||
}
|
||||
|
||||
func serve(t *testing.T, status int, respond func(w http.ResponseWriter)) (*Provider, *captured) {
|
||||
t.Helper()
|
||||
cap := &captured{}
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
cap.auth = r.Header.Get("Authorization")
|
||||
cap.contentType = r.Header.Get("Content-Type")
|
||||
cap.path = r.URL.Path
|
||||
cap.raw, _ = io.ReadAll(r.Body)
|
||||
_ = json.Unmarshal(cap.raw, &cap.body)
|
||||
w.WriteHeader(status)
|
||||
respond(w)
|
||||
}))
|
||||
t.Cleanup(ts.Close)
|
||||
return New(WithBaseURL(ts.URL), WithToken("test-token")), cap
|
||||
}
|
||||
|
||||
func jsonReply(obj string) func(w http.ResponseWriter) {
|
||||
return func(w http.ResponseWriter) { _, _ = io.WriteString(w, obj) }
|
||||
}
|
||||
|
||||
func basicRequest() llm.Request {
|
||||
return llm.Request{Messages: []llm.Message{llm.UserText("hi")}}
|
||||
}
|
||||
|
||||
func TestGenerateRoundTrip(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{
|
||||
"model":"qwen3:30b",
|
||||
"message":{"role":"assistant","content":"hello there"},
|
||||
"done":true,"done_reason":"stop",
|
||||
"prompt_eval_count":12,"eval_count":7
|
||||
}`))
|
||||
|
||||
m, _ := p.Model("qwen3:30b")
|
||||
temp := 0.2
|
||||
resp, err := m.Generate(context.Background(), llm.Request{
|
||||
System: "be terse",
|
||||
Messages: []llm.Message{llm.SystemText("extra sys"), llm.UserText("hi")},
|
||||
Temperature: &temp,
|
||||
MaxTokens: 64,
|
||||
StopSequences: []string{"END"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
|
||||
// Wire assertions.
|
||||
if cap.path != "/api/chat" {
|
||||
t.Errorf("path = %q", cap.path)
|
||||
}
|
||||
if cap.auth != "Bearer test-token" {
|
||||
t.Errorf("auth = %q", cap.auth)
|
||||
}
|
||||
if cap.body["model"] != "qwen3:30b" {
|
||||
t.Errorf("model = %v", cap.body["model"])
|
||||
}
|
||||
if stream, ok := cap.body["stream"].(bool); !ok || stream {
|
||||
t.Errorf("stream must be explicit false, got %v", cap.body["stream"])
|
||||
}
|
||||
msgs := cap.body["messages"].([]any)
|
||||
first := msgs[0].(map[string]any)
|
||||
if first["role"] != "system" || first["content"] != "be terse\n\nextra sys" {
|
||||
t.Errorf("system fold = %v", first)
|
||||
}
|
||||
second := msgs[1].(map[string]any)
|
||||
if second["role"] != "user" || second["content"] != "hi" {
|
||||
t.Errorf("user msg = %v", second)
|
||||
}
|
||||
opts := cap.body["options"].(map[string]any)
|
||||
if opts["temperature"] != 0.2 || opts["num_predict"] != float64(64) {
|
||||
t.Errorf("options = %v", opts)
|
||||
}
|
||||
|
||||
// Response assertions.
|
||||
if resp.Text() != "hello there" {
|
||||
t.Errorf("text = %q", resp.Text())
|
||||
}
|
||||
if resp.FinishReason != llm.FinishStop {
|
||||
t.Errorf("finish = %v", resp.FinishReason)
|
||||
}
|
||||
if resp.Usage.InputTokens != 12 || resp.Usage.OutputTokens != 7 {
|
||||
t.Errorf("usage = %+v", resp.Usage)
|
||||
}
|
||||
if resp.Model != "ollama/qwen3:30b" {
|
||||
t.Errorf("resp.Model = %q", resp.Model)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImagesEncodeAsBase64(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"a cat"},"done":true,"done_reason":"stop"}`))
|
||||
imgBytes := []byte{0xFF, 0xD8, 0xFF, 0xE0, 1, 2, 3}
|
||||
|
||||
m, _ := p.Model("llava")
|
||||
_, err := m.Generate(context.Background(), llm.Request{
|
||||
Messages: []llm.Message{llm.UserParts(llm.Text("describe"), llm.Image("image/jpeg", imgBytes))},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
msgs := cap.body["messages"].([]any)
|
||||
user := msgs[0].(map[string]any)
|
||||
images := user["images"].([]any)
|
||||
if len(images) != 1 || images[0] != base64.StdEncoding.EncodeToString(imgBytes) {
|
||||
t.Errorf("images = %v", images)
|
||||
}
|
||||
if strings.Contains(images[0].(string), "data:") {
|
||||
t.Error("images must be raw base64 without data: prefix")
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolsAndToolCallRoundTrip(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{
|
||||
"message":{"role":"assistant","content":"","tool_calls":[
|
||||
{"function":{"index":0,"name":"get_weather","arguments":{"city":"Tokyo"}}}
|
||||
]},
|
||||
"done":true,"done_reason":"stop"
|
||||
}`))
|
||||
|
||||
tool := llm.Tool{
|
||||
Name: "get_weather", Description: "weather",
|
||||
Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}`),
|
||||
}
|
||||
m, _ := p.Model("qwen3")
|
||||
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithTools(tool))
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
|
||||
// Tools serialize with parameters as an object.
|
||||
tools := cap.body["tools"].([]any)
|
||||
fn := tools[0].(map[string]any)["function"].(map[string]any)
|
||||
if fn["name"] != "get_weather" {
|
||||
t.Errorf("tool fn = %v", fn)
|
||||
}
|
||||
if _, ok := fn["parameters"].(map[string]any); !ok {
|
||||
t.Errorf("parameters must be an object, got %T", fn["parameters"])
|
||||
}
|
||||
|
||||
// Tool call comes back with arguments as a JSON object → RawMessage.
|
||||
if len(resp.ToolCalls) != 1 {
|
||||
t.Fatalf("tool calls = %v", resp.ToolCalls)
|
||||
}
|
||||
tc := resp.ToolCalls[0]
|
||||
if tc.Name != "get_weather" || tc.ID == "" {
|
||||
t.Errorf("call = %+v (id must be synthesized)", tc)
|
||||
}
|
||||
var args struct {
|
||||
City string `json:"city"`
|
||||
}
|
||||
if err := json.Unmarshal(tc.Arguments, &args); err != nil || args.City != "Tokyo" {
|
||||
t.Errorf("arguments = %s (%v)", tc.Arguments, err)
|
||||
}
|
||||
if resp.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("finish = %v, want tool_calls", resp.FinishReason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolResultsAndHistoryToolCalls(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"21C"},"done":true,"done_reason":"stop"}`))
|
||||
|
||||
m, _ := p.Model("qwen3")
|
||||
_, err := m.Generate(context.Background(), llm.Request{
|
||||
Messages: []llm.Message{
|
||||
llm.UserText("weather?"),
|
||||
{Role: llm.RoleAssistant, ToolCalls: []llm.ToolCall{
|
||||
{ID: "call_0", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Tokyo"}`)},
|
||||
}},
|
||||
llm.ToolResultsMessage(
|
||||
llm.ToolResult{ID: "call_0", Name: "get_weather", Content: `{"temp":21}`},
|
||||
llm.ToolResult{ID: "call_1", Name: "broken_tool", Content: "boom", IsError: true},
|
||||
),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
msgs := cap.body["messages"].([]any)
|
||||
if len(msgs) != 4 {
|
||||
t.Fatalf("messages = %d, want 4 (user, assistant, 2 tool results)", len(msgs))
|
||||
}
|
||||
asst := msgs[1].(map[string]any)
|
||||
calls := asst["tool_calls"].([]any)
|
||||
args := calls[0].(map[string]any)["function"].(map[string]any)["arguments"]
|
||||
if _, ok := args.(map[string]any); !ok {
|
||||
t.Errorf("history tool-call arguments must be an object, got %T", args)
|
||||
}
|
||||
tr1 := msgs[2].(map[string]any)
|
||||
if tr1["role"] != "tool" || tr1["tool_name"] != "get_weather" || tr1["content"] != `{"temp":21}` {
|
||||
t.Errorf("tool result 1 = %v", tr1)
|
||||
}
|
||||
tr2 := msgs[3].(map[string]any)
|
||||
if tr2["content"] != "ERROR: boom" {
|
||||
t.Errorf("error result content = %v", tr2["content"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStructuredOutputFormat(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"{\"name\":\"Ada\"}"},"done":true,"done_reason":"stop"}`))
|
||||
schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}`)
|
||||
|
||||
m, _ := p.Model("qwen3")
|
||||
resp, err := m.Generate(context.Background(), basicRequest(), llm.WithSchema(schema, "person"))
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
format, ok := cap.body["format"].(map[string]any)
|
||||
if !ok || format["type"] != "object" {
|
||||
t.Errorf("format = %v, want the schema object", cap.body["format"])
|
||||
}
|
||||
if resp.Text() != `{"name":"Ada"}` {
|
||||
t.Errorf("text = %q", resp.Text())
|
||||
}
|
||||
}
|
||||
|
||||
func TestThinkMapping(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
|
||||
m, _ := p.Model("gpt-oss:120b")
|
||||
_, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("high"))
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if cap.body["think"] != "high" {
|
||||
t.Errorf("think = %v", cap.body["think"])
|
||||
}
|
||||
|
||||
if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("max")); err == nil {
|
||||
t.Error("invalid reasoning effort should error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolChoiceNoneDropsTools(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"role":"assistant","content":"ok"},"done":true,"done_reason":"stop"}`))
|
||||
m, _ := p.Model("qwen3")
|
||||
_, err := m.Generate(context.Background(), basicRequest(),
|
||||
llm.WithTools(llm.Tool{Name: "t"}), llm.WithToolChoice("none"))
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if _, present := cap.body["tools"]; present {
|
||||
t.Error("tool_choice none must omit tools")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamingNDJSON(t *testing.T) {
|
||||
p, _ := serve(t, 200, func(w http.ResponseWriter) {
|
||||
w.Header().Set("Content-Type", "application/x-ndjson")
|
||||
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"Hel"},"done":false}
|
||||
{"message":{"role":"assistant","content":"lo"},"done":false}
|
||||
{"message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"ping","arguments":{}}}]},"done":false}
|
||||
{"message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","prompt_eval_count":5,"eval_count":9}
|
||||
`)
|
||||
})
|
||||
|
||||
m, _ := p.Model("qwen3")
|
||||
s, err := m.Stream(context.Background(), basicRequest())
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
var text strings.Builder
|
||||
var toolCalls []llm.ToolCall
|
||||
var final *llm.Response
|
||||
for {
|
||||
ev, err := s.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Next: %v", err)
|
||||
}
|
||||
text.WriteString(ev.TextDelta)
|
||||
if ev.ToolCall != nil {
|
||||
toolCalls = append(toolCalls, *ev.ToolCall)
|
||||
}
|
||||
if ev.Response != nil {
|
||||
final = ev.Response
|
||||
}
|
||||
}
|
||||
if text.String() != "Hello" {
|
||||
t.Errorf("text = %q", text.String())
|
||||
}
|
||||
if len(toolCalls) != 1 || toolCalls[0].Name != "ping" {
|
||||
t.Errorf("tool calls = %+v", toolCalls)
|
||||
}
|
||||
if final == nil {
|
||||
t.Fatal("no final response event")
|
||||
}
|
||||
if final.Usage.InputTokens != 5 || final.Usage.OutputTokens != 9 {
|
||||
t.Errorf("final usage = %+v", final.Usage)
|
||||
}
|
||||
if final.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("final finish = %v", final.FinishReason)
|
||||
}
|
||||
if final.Text() != "Hello" {
|
||||
t.Errorf("final text = %q", final.Text())
|
||||
}
|
||||
}
|
||||
|
||||
// TestStreamingForemanSingleObject: foreman returns one buffered JSON
|
||||
// object to a stream:true request; the stream must still deliver the text
|
||||
// and a final response.
|
||||
func TestStreamingForemanSingleObject(t *testing.T) {
|
||||
p, cap := serve(t, 200, func(w http.ResponseWriter) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{"message":{"role":"assistant","content":"queued answer"},"done":true,"done_reason":"stop","prompt_eval_count":3,"eval_count":4}`)
|
||||
})
|
||||
|
||||
m, _ := p.Model("qwen3:30b")
|
||||
s, err := m.Stream(context.Background(), basicRequest())
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
if stream, ok := cap.body["stream"].(bool); !ok || !stream {
|
||||
t.Errorf("stream flag = %v, want true", cap.body["stream"])
|
||||
}
|
||||
|
||||
var text strings.Builder
|
||||
var final *llm.Response
|
||||
for {
|
||||
ev, err := s.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Next: %v", err)
|
||||
}
|
||||
text.WriteString(ev.TextDelta)
|
||||
if ev.Response != nil {
|
||||
final = ev.Response
|
||||
}
|
||||
}
|
||||
if text.String() != "queued answer" || final == nil || final.Usage.OutputTokens != 4 {
|
||||
t.Errorf("text=%q final=%+v", text.String(), final)
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrorMapping(t *testing.T) {
|
||||
t.Run("404 is model-not-found", func(t *testing.T) {
|
||||
p, _ := serve(t, 404, jsonReply(`{"error":"model not found"}`))
|
||||
m, _ := p.Model("nope")
|
||||
_, err := m.Generate(context.Background(), basicRequest())
|
||||
if !errors.Is(err, llm.ErrModelNotFound) {
|
||||
t.Errorf("error = %v, want ErrModelNotFound", err)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassPermanent {
|
||||
t.Error("404 must classify permanent")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("503 transient with message", func(t *testing.T) {
|
||||
p, _ := serve(t, 503, jsonReply(`{"error":"request cancelled while waiting"}`))
|
||||
m, _ := p.Model("qwen3")
|
||||
_, err := m.Generate(context.Background(), basicRequest())
|
||||
var apiErr *llm.APIError
|
||||
if !errors.As(err, &apiErr) || apiErr.Status != 503 || !strings.Contains(apiErr.Message, "cancelled") {
|
||||
t.Errorf("error = %v", err)
|
||||
}
|
||||
if llm.Classify(err) != llm.ClassTransient {
|
||||
t.Error("503 must classify transient")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("non-JSON error body", func(t *testing.T) {
|
||||
p, _ := serve(t, 500, jsonReply(`upstream exploded`))
|
||||
m, _ := p.Model("qwen3")
|
||||
_, err := m.Generate(context.Background(), basicRequest())
|
||||
var apiErr *llm.APIError
|
||||
if !errors.As(err, &apiErr) || !strings.Contains(apiErr.Message, "upstream exploded") {
|
||||
t.Errorf("error = %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCapabilityEnforcement(t *testing.T) {
|
||||
p, _ := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
|
||||
|
||||
t.Run("too many images", func(t *testing.T) {
|
||||
m, _ := p.Model("llava", llm.WithCapabilities(llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/png"}}))
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
||||
llm.UserParts(llm.Image("image/png", []byte{1}), llm.Image("image/png", []byte{2})),
|
||||
}})
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Errorf("error = %v, want ErrUnsupported", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("images on text-only model", func(t *testing.T) {
|
||||
m, _ := p.Model("qwen3", llm.WithCapabilities(llm.Capabilities{}))
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
||||
llm.UserParts(llm.Image("image/png", []byte{1})),
|
||||
}})
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Errorf("error = %v, want ErrUnsupported", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("disallowed mime", func(t *testing.T) {
|
||||
m, _ := p.Model("llava") // default caps: jpeg/png only
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{
|
||||
llm.UserParts(llm.Image("image/tiff", []byte{1})),
|
||||
}})
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Errorf("error = %v, want ErrUnsupported", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNoBaseURL(t *testing.T) {
|
||||
p := New(WithBaseURL(""))
|
||||
m, _ := p.Model("x")
|
||||
if _, err := m.Generate(context.Background(), basicRequest()); err == nil ||
|
||||
!strings.Contains(err.Error(), "no base URL") {
|
||||
t.Errorf("error = %v, want a clear no-base-URL message", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeHost(t *testing.T) {
|
||||
for in, want := range map[string]string{
|
||||
"myhost": "http://myhost:11434",
|
||||
"myhost:8080": "http://myhost:8080",
|
||||
"http://myhost:8080/": "http://myhost:8080",
|
||||
"https://ollama.com": "https://ollama.com",
|
||||
" 127.0.0.1:11434 ": "http://127.0.0.1:11434",
|
||||
} {
|
||||
if got := NormalizeHost(in); got != want {
|
||||
t.Errorf("NormalizeHost(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPresets(t *testing.T) {
|
||||
t.Run("cloud", func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_API_KEY", "cloud-key")
|
||||
p := Cloud()
|
||||
if p.Name() != "ollama-cloud" || p.baseURL != DefaultCloudBaseURL || p.token != "cloud-key" {
|
||||
t.Errorf("cloud preset = %+v", p)
|
||||
}
|
||||
})
|
||||
t.Run("local respects OLLAMA_HOST", func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_HOST", "box.lan:9999")
|
||||
p := Local()
|
||||
if p.Name() != "ollama" || p.baseURL != "http://box.lan:9999" || p.token != "" {
|
||||
t.Errorf("local preset = %+v", p)
|
||||
}
|
||||
})
|
||||
t.Run("foreman", func(t *testing.T) {
|
||||
p := Foreman("http://foreman-m1:8080", "tok")
|
||||
if p.Name() != "foreman" || p.baseURL != "http://foreman-m1:8080" || p.token != "tok" {
|
||||
t.Errorf("foreman preset = %+v", p)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLocalNoAuthHeader(t *testing.T) {
|
||||
p, cap := serve(t, 200, jsonReply(`{"message":{"content":"x"},"done":true}`))
|
||||
p.token = "" // simulate local mode on the test server
|
||||
m, _ := p.Model("llama3")
|
||||
if _, err := m.Generate(context.Background(), basicRequest()); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if cap.auth != "" {
|
||||
t.Errorf("auth header = %q, want none in local mode", cap.auth)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// Stream implements llm.Model over Ollama's NDJSON streaming. It also
|
||||
// transparently handles foreman's non-streaming degradation (a single
|
||||
// buffered JSON object): one JSON line parses as the final chunk.
|
||||
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wireReq, err := m.buildRequest(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := m.do(ctx, wireReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sc := bufio.NewScanner(resp.Body)
|
||||
// Single NDJSON lines can far exceed the 64KB default (thinking dumps,
|
||||
// tool payloads, foreman's whole-response-as-one-line degradation).
|
||||
sc.Buffer(make([]byte, 64<<10), 16<<20)
|
||||
|
||||
return &stream{model: m, body: resp.Body, scanner: sc}, nil
|
||||
}
|
||||
|
||||
type stream struct {
|
||||
model *model
|
||||
body io.Closer
|
||||
scanner *bufio.Scanner
|
||||
|
||||
mu sync.Mutex
|
||||
closed bool
|
||||
finished bool
|
||||
toolCalls []llm.ToolCall
|
||||
text []byte
|
||||
pending []llm.StreamEvent
|
||||
usage llm.Usage
|
||||
doneReason string
|
||||
}
|
||||
|
||||
func (s *stream) Next() (llm.StreamEvent, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for {
|
||||
if len(s.pending) > 0 {
|
||||
ev := s.pending[0]
|
||||
s.pending = s.pending[1:]
|
||||
return ev, nil
|
||||
}
|
||||
if s.finished {
|
||||
return llm.StreamEvent{}, io.EOF
|
||||
}
|
||||
if !s.scanner.Scan() {
|
||||
if err := s.scanner.Err(); err != nil {
|
||||
return llm.StreamEvent{}, fmt.Errorf("ollama %s: read stream: %w", s.model.qualified(), err)
|
||||
}
|
||||
// EOF without a done chunk: synthesize the final response from
|
||||
// what we accumulated rather than losing it.
|
||||
s.queueFinal()
|
||||
continue
|
||||
}
|
||||
line := s.scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var chunk chatResponse
|
||||
if err := json.Unmarshal(line, &chunk); err != nil {
|
||||
return llm.StreamEvent{}, fmt.Errorf("ollama %s: decode stream chunk: %w", s.model.qualified(), err)
|
||||
}
|
||||
|
||||
if chunk.Message.Content != "" {
|
||||
s.text = append(s.text, chunk.Message.Content...)
|
||||
s.pending = append(s.pending, llm.StreamEvent{TextDelta: chunk.Message.Content})
|
||||
}
|
||||
// Tool calls arrive complete per chunk (no partial-argument deltas
|
||||
// in the native protocol).
|
||||
base := len(s.toolCalls)
|
||||
for i, tc := range chunk.Message.ToolCalls {
|
||||
id := tc.ID
|
||||
if id == "" {
|
||||
id = "call_" + strconv.Itoa(base+i)
|
||||
}
|
||||
args := tc.Function.Arguments
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
call := llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args}
|
||||
s.toolCalls = append(s.toolCalls, call)
|
||||
s.pending = append(s.pending, llm.StreamEvent{ToolCall: &s.toolCalls[len(s.toolCalls)-1]})
|
||||
}
|
||||
if chunk.Done {
|
||||
s.usage = llm.Usage{InputTokens: chunk.PromptEvalCount, OutputTokens: chunk.EvalCount}
|
||||
s.doneReason = chunk.DoneReason
|
||||
s.queueFinal()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// queueFinal appends the final Response event and marks the stream done.
|
||||
func (s *stream) queueFinal() {
|
||||
resp := &llm.Response{
|
||||
Model: s.model.qualified(),
|
||||
Usage: s.usage,
|
||||
FinishReason: finishReason(s.doneReason, len(s.toolCalls) > 0),
|
||||
}
|
||||
if len(s.text) > 0 {
|
||||
resp.Parts = append(resp.Parts, llm.Text(string(s.text)))
|
||||
}
|
||||
if len(s.toolCalls) > 0 {
|
||||
resp.ToolCalls = append([]llm.ToolCall(nil), s.toolCalls...)
|
||||
}
|
||||
s.pending = append(s.pending, llm.StreamEvent{Response: resp})
|
||||
s.finished = true
|
||||
}
|
||||
|
||||
func (s *stream) Close() error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
return s.body.Close()
|
||||
}
|
||||
@@ -0,0 +1,343 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// ---- wire types (field names per ollama api/types.go) ----
|
||||
|
||||
type chatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []chatMessage `json:"messages"`
|
||||
Tools []toolDef `json:"tools,omitempty"`
|
||||
Format json.RawMessage `json:"format,omitempty"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
// Stream has no omitempty on purpose: the server default is true, so
|
||||
// Generate must send an explicit false.
|
||||
Stream bool `json:"stream"`
|
||||
// Think is bool-or-string on the wire ("low"/"medium"/"high" or a bool).
|
||||
Think json.RawMessage `json:"think,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Images []string `json:"images,omitempty"` // raw base64, no data: prefix
|
||||
ToolCalls []toolCall `json:"tool_calls,omitempty"`
|
||||
ToolName string `json:"tool_name,omitempty"` // on role:"tool" results
|
||||
}
|
||||
|
||||
type toolDef struct {
|
||||
Type string `json:"type"`
|
||||
Function toolDefFunc `json:"function"`
|
||||
}
|
||||
|
||||
type toolDefFunc struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters json.RawMessage `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type toolCall struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Function toolCallFunc `json:"function"`
|
||||
}
|
||||
|
||||
type toolCallFunc struct {
|
||||
Index int `json:"index,omitempty"`
|
||||
Name string `json:"name"`
|
||||
// Arguments is a JSON OBJECT on the wire (unlike OpenAI's string).
|
||||
Arguments json.RawMessage `json:"arguments"`
|
||||
}
|
||||
|
||||
type chatResponse struct {
|
||||
Model string `json:"model"`
|
||||
Message respMessage `json:"message"`
|
||||
Done bool `json:"done"`
|
||||
DoneReason string `json:"done_reason"`
|
||||
PromptEvalCount int `json:"prompt_eval_count"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
}
|
||||
|
||||
type respMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Thinking string `json:"thinking"`
|
||||
ToolCalls []toolCall `json:"tool_calls"`
|
||||
}
|
||||
|
||||
type errorBody struct {
|
||||
Error string `json:"error"`
|
||||
}
|
||||
|
||||
// ---- model ----
|
||||
|
||||
type model struct {
|
||||
provider *Provider
|
||||
id string
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
||||
|
||||
func (m *model) qualified() string { return m.provider.name + "/" + m.id }
|
||||
|
||||
// enforceCapabilities is the backstop check (the media layer normalizes
|
||||
// before requests get here; see ADR-0009).
|
||||
func (m *model) enforceCapabilities(req llm.Request) error {
|
||||
count := 0
|
||||
for _, msg := range req.Messages {
|
||||
for _, part := range msg.Parts {
|
||||
img, ok := part.(llm.ImagePart)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if !m.caps.SupportsImages() {
|
||||
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified())
|
||||
}
|
||||
if !m.caps.MIMEAllowed(img.MIME) {
|
||||
return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME)
|
||||
}
|
||||
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
||||
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d",
|
||||
llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
if count > 0 && m.caps.MaxImagesPerReq > 0 && count > m.caps.MaxImagesPerReq {
|
||||
return fmt.Errorf("%w: %d images exceed %s limit of %d",
|
||||
llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildRequest maps the canonical request onto the wire shape.
|
||||
func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) {
|
||||
out := &chatRequest{Model: m.id, Stream: stream}
|
||||
|
||||
// System prompt: dedicated field first, then folded RoleSystem messages.
|
||||
var sys []string
|
||||
if req.System != "" {
|
||||
sys = append(sys, req.System)
|
||||
}
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role == llm.RoleSystem {
|
||||
if t := msg.Text(); t != "" {
|
||||
sys = append(sys, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(sys) > 0 {
|
||||
out.Messages = append(out.Messages, chatMessage{
|
||||
Role: "system", Content: strings.Join(sys, "\n\n"),
|
||||
})
|
||||
}
|
||||
|
||||
for _, msg := range req.Messages {
|
||||
switch msg.Role {
|
||||
case llm.RoleSystem:
|
||||
// Already folded above.
|
||||
case llm.RoleTool:
|
||||
for _, res := range msg.ToolResults {
|
||||
content := res.Content
|
||||
if res.IsError {
|
||||
content = "ERROR: " + content
|
||||
}
|
||||
out.Messages = append(out.Messages, chatMessage{
|
||||
Role: "tool", Content: content, ToolName: res.Name,
|
||||
})
|
||||
}
|
||||
default:
|
||||
cm := chatMessage{Role: string(msg.Role), Content: msg.Text()}
|
||||
for _, part := range msg.Parts {
|
||||
if img, ok := part.(llm.ImagePart); ok {
|
||||
cm.Images = append(cm.Images, base64.StdEncoding.EncodeToString(img.Data))
|
||||
}
|
||||
}
|
||||
for _, tc := range msg.ToolCalls {
|
||||
args := tc.Arguments
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
cm.ToolCalls = append(cm.ToolCalls, toolCall{
|
||||
ID: tc.ID,
|
||||
Function: toolCallFunc{Name: tc.Name, Arguments: args},
|
||||
})
|
||||
}
|
||||
out.Messages = append(out.Messages, cm)
|
||||
}
|
||||
}
|
||||
|
||||
// Tools. Ollama has no tool_choice: "none" maps to omitting the tools;
|
||||
// "required"/named choices have no wire equivalent and are best-effort
|
||||
// ignored (documented in the README support matrix).
|
||||
if req.ToolChoice != "none" {
|
||||
for _, t := range req.Tools {
|
||||
params := t.Parameters
|
||||
if len(params) == 0 {
|
||||
params = json.RawMessage(`{"type":"object","properties":{}}`)
|
||||
}
|
||||
out.Tools = append(out.Tools, toolDef{
|
||||
Type: "function",
|
||||
Function: toolDefFunc{Name: t.Name, Description: t.Description, Parameters: params},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(req.Schema) > 0 {
|
||||
out.Format = req.Schema
|
||||
}
|
||||
|
||||
opts := make(map[string]any)
|
||||
if req.Temperature != nil {
|
||||
opts["temperature"] = *req.Temperature
|
||||
}
|
||||
if req.TopP != nil {
|
||||
opts["top_p"] = *req.TopP
|
||||
}
|
||||
if req.MaxTokens > 0 {
|
||||
opts["num_predict"] = req.MaxTokens
|
||||
}
|
||||
if len(req.StopSequences) > 0 {
|
||||
opts["stop"] = req.StopSequences
|
||||
}
|
||||
if len(opts) > 0 {
|
||||
out.Options = opts
|
||||
}
|
||||
|
||||
switch req.ReasoningEffort {
|
||||
case "":
|
||||
case "low", "medium", "high":
|
||||
out.Think = json.RawMessage(strconv.Quote(req.ReasoningEffort))
|
||||
default:
|
||||
return nil, fmt.Errorf("ollama: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort)
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// do POSTs /api/chat and returns the response body on 2xx, or a classified
|
||||
// error.
|
||||
func (m *model) do(ctx context.Context, wireReq *chatRequest) (*http.Response, error) {
|
||||
p := m.provider
|
||||
if err := p.checkReady(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body, err := json.Marshal(wireReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: encode request: %w", err)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/chat", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama: build request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if p.token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+p.token)
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ollama %s: do request: %w", m.qualified(), err)
|
||||
}
|
||||
if resp.StatusCode/100 != 2 {
|
||||
defer resp.Body.Close()
|
||||
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
|
||||
var eb errorBody
|
||||
_ = json.Unmarshal(raw, &eb)
|
||||
msg := eb.Error
|
||||
if msg == "" {
|
||||
msg = strings.TrimSpace(string(raw))
|
||||
}
|
||||
return nil, &llm.APIError{
|
||||
Provider: p.name, Model: m.id,
|
||||
Status: resp.StatusCode, Message: msg,
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Generate implements llm.Model.
|
||||
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wireReq, err := m.buildRequest(req, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := m.do(ctx, wireReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var cr chatResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&cr); err != nil {
|
||||
return nil, fmt.Errorf("ollama %s: decode response: %w", m.qualified(), err)
|
||||
}
|
||||
return m.toResponse(&cr), nil
|
||||
}
|
||||
|
||||
// toResponse converts a final wire chunk into the canonical response.
|
||||
func (m *model) toResponse(cr *chatResponse) *llm.Response {
|
||||
out := &llm.Response{
|
||||
Model: m.qualified(),
|
||||
Usage: llm.Usage{InputTokens: cr.PromptEvalCount, OutputTokens: cr.EvalCount},
|
||||
Raw: cr,
|
||||
}
|
||||
if cr.Message.Content != "" {
|
||||
out.Parts = append(out.Parts, llm.Text(cr.Message.Content))
|
||||
}
|
||||
out.ToolCalls = convertToolCalls(cr.Message.ToolCalls)
|
||||
out.FinishReason = finishReason(cr.DoneReason, len(out.ToolCalls) > 0)
|
||||
return out
|
||||
}
|
||||
|
||||
// convertToolCalls maps wire tool calls, synthesizing ids where the model
|
||||
// omitted them (ids are optional in Ollama's shape but required by our
|
||||
// agent loop to match results to calls).
|
||||
func convertToolCalls(calls []toolCall) []llm.ToolCall {
|
||||
out := make([]llm.ToolCall, 0, len(calls))
|
||||
for i, tc := range calls {
|
||||
id := tc.ID
|
||||
if id == "" {
|
||||
id = "call_" + strconv.Itoa(i)
|
||||
}
|
||||
args := tc.Function.Arguments
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
out = append(out, llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args})
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func finishReason(doneReason string, hasToolCalls bool) llm.FinishReason {
|
||||
if hasToolCalls {
|
||||
return llm.FinishToolCalls
|
||||
}
|
||||
switch doneReason {
|
||||
case "stop", "":
|
||||
return llm.FinishStop
|
||||
case "length":
|
||||
return llm.FinishLength
|
||||
default:
|
||||
return llm.FinishOther
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// model is one provider-bound target.
|
||||
type model struct {
|
||||
p *Provider
|
||||
id string
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
// Capabilities implements llm.Model.
|
||||
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
||||
|
||||
// Generate implements llm.Model.
|
||||
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := checkRequest(m.caps, req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
var wire chatResponse
|
||||
if err := json.NewDecoder(httpResp.Body).Decode(&wire); err != nil {
|
||||
return nil, fmt.Errorf("openai: decode response: %w", err)
|
||||
}
|
||||
return m.toResponse(&wire), nil
|
||||
}
|
||||
|
||||
// Stream implements llm.Model.
|
||||
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
||||
req = req.Apply(opts...)
|
||||
if !m.caps.SupportsStreaming {
|
||||
return nil, fmt.Errorf("%w: streaming not supported by %s/%s", llm.ErrUnsupported, m.p.name, m.id)
|
||||
}
|
||||
if err := checkRequest(m.caps, req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
defer httpResp.Body.Close()
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
sc := bufio.NewScanner(httpResp.Body)
|
||||
// Why: a single SSE data line carries a whole JSON chunk; tool-call
|
||||
// argument fragments can make lines far larger than Scanner's 64 KiB
|
||||
// default cap.
|
||||
sc.Buffer(make([]byte, 0, 64*1024), 16<<20)
|
||||
return &stream{m: m, body: httpResp.Body, sc: sc}, nil
|
||||
}
|
||||
|
||||
// do builds and performs the HTTP request. Transport failures are wrapped
|
||||
// raw (never as *llm.APIError) so llm.Classify still sees net.Error,
|
||||
// syscall errnos, and context errors underneath.
|
||||
func (m *model) do(ctx context.Context, req llm.Request, stream bool) (*http.Response, error) {
|
||||
if m.p.apiKey == "" {
|
||||
// Why a synthetic 401: the constructor never fails, so a missing
|
||||
// key must surface at request time as the auth failure it is —
|
||||
// permanent under llm.Classify, like a real 401.
|
||||
return nil, &llm.APIError{
|
||||
Provider: m.p.name,
|
||||
Model: m.id,
|
||||
Status: http.StatusUnauthorized,
|
||||
Code: "missing_api_key",
|
||||
Message: "no API key configured: set OPENAI_API_KEY or use WithAPIKey",
|
||||
}
|
||||
}
|
||||
body, err := json.Marshal(m.buildRequest(req, stream))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("openai: encode request: %w", err)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, m.p.baseURL+"/chat/completions", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("openai: build request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
httpReq.Header.Set("Authorization", "Bearer "+m.p.apiKey)
|
||||
if stream {
|
||||
httpReq.Header.Set("Accept", "text/event-stream")
|
||||
}
|
||||
httpResp, err := m.p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("openai: do request: %w", err)
|
||||
}
|
||||
return httpResp, nil
|
||||
}
|
||||
|
||||
// apiError converts a non-2xx response into *llm.APIError, pulling code and
|
||||
// message from the {"error":{...}} body when it parses.
|
||||
func (m *model) apiError(httpResp *http.Response) error {
|
||||
apiErr := &llm.APIError{Provider: m.p.name, Model: m.id, Status: httpResp.StatusCode}
|
||||
body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 1<<20))
|
||||
var env errorEnvelope
|
||||
if err := json.Unmarshal(body, &env); err == nil &&
|
||||
(env.Error.Message != "" || env.Error.Type != "" || env.Error.Code != "") {
|
||||
apiErr.Message = env.Error.Message
|
||||
apiErr.Code = env.Error.Code
|
||||
if apiErr.Code == "" {
|
||||
apiErr.Code = env.Error.Type
|
||||
}
|
||||
} else {
|
||||
// Why: compat servers emit all sorts of error bodies; a raw snippet
|
||||
// beats silence when the canonical envelope is absent.
|
||||
apiErr.Message = strings.TrimSpace(string(body))
|
||||
}
|
||||
return apiErr
|
||||
}
|
||||
|
||||
// toResponse maps the wire response onto the canonical llm.Response.
|
||||
func (m *model) toResponse(wire *chatResponse) *llm.Response {
|
||||
resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
|
||||
if wire.Usage != nil {
|
||||
resp.Usage = llm.Usage{
|
||||
InputTokens: wire.Usage.PromptTokens,
|
||||
OutputTokens: wire.Usage.CompletionTokens,
|
||||
}
|
||||
}
|
||||
if len(wire.Choices) == 0 {
|
||||
resp.FinishReason = llm.FinishOther
|
||||
return resp
|
||||
}
|
||||
choice := wire.Choices[0]
|
||||
if choice.Message.Content != "" {
|
||||
resp.Parts = append(resp.Parts, llm.TextPart{Text: choice.Message.Content})
|
||||
}
|
||||
for i, tc := range choice.Message.ToolCalls {
|
||||
id := tc.ID
|
||||
if id == "" {
|
||||
// Why: ToolResult.ID must echo ToolCall.ID, so calls from compat
|
||||
// servers that omit ids get synthesized ones.
|
||||
id = fmt.Sprintf("call_%d", i)
|
||||
}
|
||||
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
||||
ID: id,
|
||||
Name: tc.Function.Name,
|
||||
Arguments: json.RawMessage(tc.Function.Arguments),
|
||||
})
|
||||
}
|
||||
resp.FinishReason = mapFinish(choice.FinishReason, len(resp.ToolCalls) > 0)
|
||||
return resp
|
||||
}
|
||||
|
||||
// mapFinish maps a wire finish_reason to the canonical enum. Tool-call
|
||||
// presence wins over the reported reason: a forced (named tool_choice) call
|
||||
// can finish with "stop" while still carrying tool_calls.
|
||||
func mapFinish(reason string, hasToolCalls bool) llm.FinishReason {
|
||||
if hasToolCalls {
|
||||
return llm.FinishToolCalls
|
||||
}
|
||||
switch reason {
|
||||
case "stop":
|
||||
return llm.FinishStop
|
||||
case "length":
|
||||
return llm.FinishLength
|
||||
case "tool_calls":
|
||||
return llm.FinishToolCalls
|
||||
case "content_filter":
|
||||
return llm.FinishContentFilter
|
||||
default:
|
||||
return llm.FinishOther
|
||||
}
|
||||
}
|
||||
|
||||
// checkRequest enforces the model's effective capabilities. Why enforcement
|
||||
// rather than normalization: a separate media layer resizes/transcodes
|
||||
// images BEFORE requests reach the provider; this check is the honest
|
||||
// backstop that refuses, with llm.ErrUnsupported, what the target
|
||||
// declaredly cannot serve (chains advance past it penalty-free).
|
||||
func checkRequest(caps llm.Capabilities, req llm.Request) error {
|
||||
if len(req.Tools) > 0 && !caps.SupportsTools {
|
||||
return fmt.Errorf("%w: tools not supported", llm.ErrUnsupported)
|
||||
}
|
||||
if len(req.Schema) > 0 && !caps.SupportsStructured {
|
||||
return fmt.Errorf("%w: structured output not supported", llm.ErrUnsupported)
|
||||
}
|
||||
images := 0
|
||||
for _, msg := range req.Messages {
|
||||
for _, part := range msg.Parts {
|
||||
img, ok := part.(llm.ImagePart)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
images++
|
||||
if !caps.SupportsImages() {
|
||||
return fmt.Errorf("%w: image input not supported", llm.ErrUnsupported)
|
||||
}
|
||||
if !caps.MIMEAllowed(img.MIME) {
|
||||
return fmt.Errorf("%w: image MIME type %q not allowed (allowed: %s)",
|
||||
llm.ErrUnsupported, img.MIME, strings.Join(caps.AllowedImageMIME, ", "))
|
||||
}
|
||||
if caps.MaxImageBytes > 0 && len(img.Data) > caps.MaxImageBytes {
|
||||
return fmt.Errorf("%w: image is %d bytes, limit is %d",
|
||||
llm.ErrUnsupported, len(img.Data), caps.MaxImageBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
if images > caps.MaxImagesPerReq {
|
||||
return fmt.Errorf("%w: request carries %d images, limit is %d",
|
||||
llm.ErrUnsupported, images, caps.MaxImagesPerReq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
// Package openai implements llm.Provider for the OpenAI Chat Completions
|
||||
// API and, via WithBaseURL/WithName, any OpenAI-compatible endpoint
|
||||
// (vLLM, Groq, Together, LM Studio, Ollama's /v1 shim, ...).
|
||||
//
|
||||
// Targeted API surface (verified against developers.openai.com, June 2026):
|
||||
// POST {base}/chat/completions with
|
||||
// - messages: plain-string content for text-only turns, part arrays with
|
||||
// base64 data-URL image_url entries for multimodal turns, assistant
|
||||
// tool_calls history, and {"role":"tool","tool_call_id",...} results;
|
||||
// - tools as {"type":"function","function":{...}} with tool_choice
|
||||
// "auto"/"none"/"required" or a named-function object;
|
||||
// - response_format {"type":"json_schema",...} structured output;
|
||||
// - max_completion_tokens (or legacy max_tokens via WithLegacyMaxTokens
|
||||
// for compat servers), temperature, top_p, stop, reasoning_effort;
|
||||
// - data-only SSE streaming with stream_options.include_usage, the
|
||||
// "data: [DONE]" sentinel, and tool-call deltas accumulated by index.
|
||||
//
|
||||
// Newer response fields (refusal, annotations, usage *_details, delta
|
||||
// obfuscation) are tolerated and ignored so both api.openai.com and older
|
||||
// compat servers decode cleanly.
|
||||
package openai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
const defaultBaseURL = "https://api.openai.com/v1"
|
||||
|
||||
// Provider is an llm.Provider backed by an OpenAI Chat Completions endpoint.
|
||||
type Provider struct {
|
||||
name string
|
||||
apiKey string
|
||||
baseURL string
|
||||
client *http.Client
|
||||
caps llm.Capabilities
|
||||
legacyMaxTokens bool
|
||||
}
|
||||
|
||||
// Option configures the provider at construction.
|
||||
type Option func(*Provider)
|
||||
|
||||
// WithAPIKey sets the API key. When absent, New reads OPENAI_API_KEY from
|
||||
// the environment at construction time.
|
||||
func WithAPIKey(key string) Option {
|
||||
return func(p *Provider) { p.apiKey = key }
|
||||
}
|
||||
|
||||
// WithBaseURL points the client at a different endpoint (compat servers).
|
||||
// The path "/chat/completions" is appended; a trailing slash is trimmed.
|
||||
func WithBaseURL(u string) Option {
|
||||
return func(p *Provider) { p.baseURL = u }
|
||||
}
|
||||
|
||||
// WithHTTPClient substitutes the HTTP client (timeouts, proxies, tests).
|
||||
func WithHTTPClient(c *http.Client) Option {
|
||||
return func(p *Provider) {
|
||||
if c != nil {
|
||||
p.client = c
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithName overrides the registry name ("openai" by default). Why: the same
|
||||
// client serves many OpenAI-compatible endpoints, and each needs a distinct
|
||||
// name in "provider/model" specs and error reporting.
|
||||
func WithName(name string) Option {
|
||||
return func(p *Provider) { p.name = name }
|
||||
}
|
||||
|
||||
// WithDefaultCapabilities replaces the provider-default capabilities.
|
||||
// Per-model overrides via llm.WithCapabilities still take precedence.
|
||||
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
||||
return func(p *Provider) { p.caps = caps }
|
||||
}
|
||||
|
||||
// WithLegacyMaxTokens sends Request.MaxTokens as "max_tokens" instead of
|
||||
// "max_completion_tokens". Why: OpenAI deprecated max_tokens, but many
|
||||
// third-party compat servers still only honor the legacy field.
|
||||
func WithLegacyMaxTokens() Option {
|
||||
return func(p *Provider) { p.legacyMaxTokens = true }
|
||||
}
|
||||
|
||||
// defaultCapabilities reflects OpenAI's current vision-capable chat models.
|
||||
// Why these limits: the published per-request caps (1500 images, 512 MB)
|
||||
// are far beyond what compat servers accept; 100 images / 20 MB each is a
|
||||
// conservative envelope, and the MIME list is the documented set (PNG,
|
||||
// JPEG, WEBP, non-animated GIF).
|
||||
func defaultCapabilities() llm.Capabilities {
|
||||
return llm.Capabilities{
|
||||
SupportsTools: true,
|
||||
SupportsStructured: true,
|
||||
SupportsStreaming: true,
|
||||
MaxImagesPerReq: 100,
|
||||
MaxImageBytes: 20 << 20,
|
||||
AllowedImageMIME: []string{"image/jpeg", "image/png", "image/webp", "image/gif"},
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a Provider. It never fails: a missing API key surfaces as a
|
||||
// 401-style *llm.APIError at request time, not at construction.
|
||||
func New(opts ...Option) *Provider {
|
||||
p := &Provider{
|
||||
name: "openai",
|
||||
apiKey: os.Getenv("OPENAI_API_KEY"),
|
||||
baseURL: defaultBaseURL,
|
||||
client: http.DefaultClient,
|
||||
caps: defaultCapabilities(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(p)
|
||||
}
|
||||
p.baseURL = strings.TrimRight(p.baseURL, "/")
|
||||
return p
|
||||
}
|
||||
|
||||
// Name implements llm.Provider.
|
||||
func (p *Provider) Name() string { return p.name }
|
||||
|
||||
// Model implements llm.Provider. The id is passed through verbatim — no
|
||||
// catalog validation; unknown models fail at request time with the
|
||||
// backend's own error.
|
||||
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
||||
cfg := llm.ApplyModelOptions(opts)
|
||||
caps := p.caps
|
||||
if cfg.Capabilities != nil {
|
||||
caps = *cfg.Capabilities
|
||||
}
|
||||
return &model{p: p, id: id, caps: caps}, nil
|
||||
}
|
||||
@@ -0,0 +1,614 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
var (
|
||||
_ llm.Provider = (*Provider)(nil)
|
||||
_ llm.Model = (*model)(nil)
|
||||
_ llm.Stream = (*stream)(nil)
|
||||
)
|
||||
|
||||
const textResponse = `{
|
||||
"id": "chatcmpl-1", "object": "chat.completion", "created": 1741570283, "model": "gpt-test",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": "hello", "refusal": null, "annotations": []},
|
||||
"logprobs": null,
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {
|
||||
"prompt_tokens": 19, "completion_tokens": 10, "total_tokens": 29,
|
||||
"prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
|
||||
"completion_tokens_details": {"reasoning_tokens": 0}
|
||||
},
|
||||
"service_tier": "default", "system_fingerprint": "fp_x"
|
||||
}`
|
||||
|
||||
// recorded captures the last request a test server received.
|
||||
type recorded struct {
|
||||
body map[string]any
|
||||
header http.Header
|
||||
path string
|
||||
hits int
|
||||
}
|
||||
|
||||
// newServer starts a test server that records the request and replies with
|
||||
// a fixed status and body.
|
||||
func newServer(t *testing.T, status int, respBody string) (*httptest.Server, *recorded) {
|
||||
t.Helper()
|
||||
rec := &recorded{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
rec.hits++
|
||||
rec.header = r.Header.Clone()
|
||||
rec.path = r.URL.Path
|
||||
raw, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Errorf("read request body: %v", err)
|
||||
}
|
||||
if len(raw) > 0 {
|
||||
if err := json.Unmarshal(raw, &rec.body); err != nil {
|
||||
t.Errorf("request body is not JSON: %v\n%s", err, raw)
|
||||
}
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
io.WriteString(w, respBody)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
return srv, rec
|
||||
}
|
||||
|
||||
func testModel(t *testing.T, srv *httptest.Server, popts []Option, mopts ...llm.ModelOption) llm.Model {
|
||||
t.Helper()
|
||||
opts := append([]Option{WithAPIKey("test-key"), WithBaseURL(srv.URL)}, popts...)
|
||||
m, err := New(opts...).Model("gpt-test", mopts...)
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func fptr(f float64) *float64 { return &f }
|
||||
|
||||
func TestGenerateRequestShape(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
|
||||
req := llm.Request{
|
||||
System: "base system",
|
||||
Messages: []llm.Message{
|
||||
llm.SystemText("folded system"),
|
||||
llm.UserParts(llm.Text("look:"), llm.Image("image/png", []byte{1, 2, 3})),
|
||||
{
|
||||
Role: llm.RoleAssistant,
|
||||
Parts: []llm.Part{llm.Text("checking")},
|
||||
ToolCalls: []llm.ToolCall{
|
||||
{ID: "call_1", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Boston"}`)},
|
||||
},
|
||||
},
|
||||
llm.ToolResultsMessage(
|
||||
llm.ToolResult{ID: "call_1", Name: "get_weather", Content: "72F"},
|
||||
llm.ToolResult{ID: "call_2", Name: "get_weather", Content: "boom", IsError: true},
|
||||
),
|
||||
llm.UserText("thanks"),
|
||||
},
|
||||
Tools: []llm.Tool{{
|
||||
Name: "get_weather",
|
||||
Description: "Get current weather",
|
||||
Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}}}`),
|
||||
}},
|
||||
ToolChoice: "auto",
|
||||
Temperature: fptr(0.5),
|
||||
TopP: fptr(0.9),
|
||||
MaxTokens: 256,
|
||||
StopSequences: []string{"END"},
|
||||
ReasoningEffort: "high",
|
||||
Schema: json.RawMessage(`{"type":"object","properties":{"ok":{"type":"boolean"}}}`),
|
||||
SchemaName: "verdict",
|
||||
}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
|
||||
want := map[string]any{
|
||||
"model": "gpt-test",
|
||||
"messages": []any{
|
||||
map[string]any{"role": "system", "content": "base system\n\nfolded system"},
|
||||
map[string]any{"role": "user", "content": []any{
|
||||
map[string]any{"type": "text", "text": "look:"},
|
||||
map[string]any{"type": "image_url", "image_url": map[string]any{"url": "data:image/png;base64,AQID"}},
|
||||
}},
|
||||
map[string]any{"role": "assistant", "content": "checking", "tool_calls": []any{
|
||||
map[string]any{"id": "call_1", "type": "function", "function": map[string]any{
|
||||
"name": "get_weather", "arguments": `{"city":"Boston"}`,
|
||||
}},
|
||||
}},
|
||||
map[string]any{"role": "tool", "content": "72F", "tool_call_id": "call_1"},
|
||||
map[string]any{"role": "tool", "content": "ERROR: boom", "tool_call_id": "call_2"},
|
||||
map[string]any{"role": "user", "content": "thanks"},
|
||||
},
|
||||
"tools": []any{
|
||||
map[string]any{"type": "function", "function": map[string]any{
|
||||
"name": "get_weather",
|
||||
"description": "Get current weather",
|
||||
"parameters": map[string]any{"type": "object", "properties": map[string]any{"city": map[string]any{"type": "string"}}},
|
||||
}},
|
||||
},
|
||||
"tool_choice": "auto",
|
||||
"temperature": 0.5,
|
||||
"top_p": 0.9,
|
||||
"max_completion_tokens": float64(256),
|
||||
"stop": []any{"END"},
|
||||
"reasoning_effort": "high",
|
||||
"response_format": map[string]any{"type": "json_schema", "json_schema": map[string]any{
|
||||
"name": "verdict",
|
||||
"schema": map[string]any{"type": "object", "properties": map[string]any{"ok": map[string]any{"type": "boolean"}}},
|
||||
}},
|
||||
}
|
||||
if !reflect.DeepEqual(rec.body, want) {
|
||||
got, _ := json.MarshalIndent(rec.body, "", " ")
|
||||
exp, _ := json.MarshalIndent(want, "", " ")
|
||||
t.Errorf("request body mismatch\ngot:\n%s\nwant:\n%s", got, exp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolChoiceForms(t *testing.T) {
|
||||
tests := []struct {
|
||||
choice string
|
||||
want any // nil = key absent
|
||||
}{
|
||||
{"", nil},
|
||||
{"auto", "auto"},
|
||||
{"none", "none"},
|
||||
{"required", "required"},
|
||||
{"get_weather", map[string]any{"type": "function", "function": map[string]any{"name": "get_weather"}}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run("choice="+tt.choice, func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
req := llm.Request{
|
||||
Messages: []llm.Message{llm.UserText("hi")},
|
||||
Tools: []llm.Tool{{Name: "get_weather"}},
|
||||
ToolChoice: tt.choice,
|
||||
}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
got, present := rec.body["tool_choice"]
|
||||
if tt.want == nil {
|
||||
if present {
|
||||
t.Errorf("tool_choice present, want omitted: %v", got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("tool_choice = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaxTokensField(t *testing.T) {
|
||||
t.Run("default uses max_completion_tokens", func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
req := llm.Request{Messages: []llm.Message{llm.UserText("hi")}, MaxTokens: 64}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if got := rec.body["max_completion_tokens"]; got != float64(64) {
|
||||
t.Errorf("max_completion_tokens = %v, want 64", got)
|
||||
}
|
||||
if _, present := rec.body["max_tokens"]; present {
|
||||
t.Error("max_tokens present, want omitted")
|
||||
}
|
||||
})
|
||||
t.Run("WithLegacyMaxTokens uses max_tokens", func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, []Option{WithLegacyMaxTokens()})
|
||||
req := llm.Request{Messages: []llm.Message{llm.UserText("hi")}, MaxTokens: 64}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if got := rec.body["max_tokens"]; got != float64(64) {
|
||||
t.Errorf("max_tokens = %v, want 64", got)
|
||||
}
|
||||
if _, present := rec.body["max_completion_tokens"]; present {
|
||||
t.Error("max_completion_tokens present, want omitted")
|
||||
}
|
||||
})
|
||||
t.Run("zero omits both", func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
req := llm.Request{Messages: []llm.Message{llm.UserText("hi")}}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if _, present := rec.body["max_tokens"]; present {
|
||||
t.Error("max_tokens present, want omitted")
|
||||
}
|
||||
if _, present := rec.body["max_completion_tokens"]; present {
|
||||
t.Error("max_completion_tokens present, want omitted")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSchemaNameDefault(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
req := llm.Request{
|
||||
Messages: []llm.Message{llm.UserText("hi")},
|
||||
Schema: json.RawMessage(`{"type":"object"}`),
|
||||
}
|
||||
if _, err := m.Generate(context.Background(), req); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
rf, ok := rec.body["response_format"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("response_format missing: %v", rec.body)
|
||||
}
|
||||
js, ok := rf["json_schema"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("json_schema missing: %v", rf)
|
||||
}
|
||||
if js["name"] != "response" {
|
||||
t.Errorf("schema name = %v, want %q", js["name"], "response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTextResponse(t *testing.T) {
|
||||
srv, _ := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
resp, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if got := resp.Text(); got != "hello" {
|
||||
t.Errorf("Text = %q, want %q", got, "hello")
|
||||
}
|
||||
if resp.FinishReason != llm.FinishStop {
|
||||
t.Errorf("FinishReason = %v, want %v", resp.FinishReason, llm.FinishStop)
|
||||
}
|
||||
if resp.Usage != (llm.Usage{InputTokens: 19, OutputTokens: 10}) {
|
||||
t.Errorf("Usage = %+v, want {19 10}", resp.Usage)
|
||||
}
|
||||
if resp.Model != "openai/gpt-test" {
|
||||
t.Errorf("Model = %q, want %q", resp.Model, "openai/gpt-test")
|
||||
}
|
||||
if len(resp.ToolCalls) != 0 {
|
||||
t.Errorf("ToolCalls = %v, want none", resp.ToolCalls)
|
||||
}
|
||||
if resp.Raw == nil {
|
||||
t.Error("Raw is nil, want wire response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateToolCallResponse(t *testing.T) {
|
||||
const body = `{
|
||||
"id": "chatcmpl-2", "object": "chat.completion", "created": 1, "model": "gpt-test",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": null, "tool_calls": [
|
||||
{"id": "call_9", "type": "function", "function": {"name": "get_weather", "arguments": "{\"city\":\"Boston\"}"}},
|
||||
{"id": "", "type": "function", "function": {"name": "get_time", "arguments": "{}"}}
|
||||
]},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7}
|
||||
}`
|
||||
srv, _ := newServer(t, http.StatusOK, body)
|
||||
m := testModel(t, srv, nil)
|
||||
resp, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if len(resp.ToolCalls) != 2 {
|
||||
t.Fatalf("ToolCalls = %d, want 2", len(resp.ToolCalls))
|
||||
}
|
||||
tc := resp.ToolCalls[0]
|
||||
if tc.ID != "call_9" || tc.Name != "get_weather" || string(tc.Arguments) != `{"city":"Boston"}` {
|
||||
t.Errorf("ToolCalls[0] = %+v", tc)
|
||||
}
|
||||
if resp.ToolCalls[1].ID != "call_1" {
|
||||
t.Errorf("synthesized ID = %q, want %q", resp.ToolCalls[1].ID, "call_1")
|
||||
}
|
||||
// finish_reason "stop" with tool_calls present: presence wins.
|
||||
if resp.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("FinishReason = %v, want %v", resp.FinishReason, llm.FinishToolCalls)
|
||||
}
|
||||
if len(resp.Parts) != 0 {
|
||||
t.Errorf("Parts = %v, want none", resp.Parts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFinishReasonMapping(t *testing.T) {
|
||||
tests := []struct {
|
||||
wire string
|
||||
want llm.FinishReason
|
||||
}{
|
||||
{"stop", llm.FinishStop},
|
||||
{"length", llm.FinishLength},
|
||||
{"tool_calls", llm.FinishToolCalls},
|
||||
{"content_filter", llm.FinishContentFilter},
|
||||
{"function_call", llm.FinishOther},
|
||||
{"weird_new_reason", llm.FinishOther},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.wire, func(t *testing.T) {
|
||||
body := `{"choices":[{"index":0,"message":{"role":"assistant","content":"x"},"finish_reason":"` + tt.wire + `"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`
|
||||
srv, _ := newServer(t, http.StatusOK, body)
|
||||
m := testModel(t, srv, nil)
|
||||
resp, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if resp.FinishReason != tt.want {
|
||||
t.Errorf("FinishReason = %v, want %v", resp.FinishReason, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPIErrorMapping(t *testing.T) {
|
||||
t.Run("429 rate limit is transient", func(t *testing.T) {
|
||||
const body = `{"error":{"message":"Rate limit reached","type":"rate_limit_error","param":null,"code":"rate_limit_exceeded"}}`
|
||||
srv, _ := newServer(t, http.StatusTooManyRequests, body)
|
||||
m := testModel(t, srv, nil)
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusTooManyRequests {
|
||||
t.Errorf("Status = %d, want 429", apiErr.Status)
|
||||
}
|
||||
if apiErr.Code != "rate_limit_exceeded" {
|
||||
t.Errorf("Code = %q, want %q", apiErr.Code, "rate_limit_exceeded")
|
||||
}
|
||||
if apiErr.Message != "Rate limit reached" {
|
||||
t.Errorf("Message = %q", apiErr.Message)
|
||||
}
|
||||
if apiErr.Provider != "openai" || apiErr.Model != "gpt-test" {
|
||||
t.Errorf("Provider/Model = %q/%q", apiErr.Provider, apiErr.Model)
|
||||
}
|
||||
if got := llm.Classify(err); got != llm.ClassTransient {
|
||||
t.Errorf("Classify = %v, want transient", got)
|
||||
}
|
||||
})
|
||||
t.Run("401 code null falls back to type, permanent", func(t *testing.T) {
|
||||
const body = `{"error":{"message":"Incorrect API key provided","type":"authentication_error","param":null,"code":null}}`
|
||||
srv, _ := newServer(t, http.StatusUnauthorized, body)
|
||||
m := testModel(t, srv, nil)
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusUnauthorized || apiErr.Code != "authentication_error" {
|
||||
t.Errorf("Status/Code = %d/%q", apiErr.Status, apiErr.Code)
|
||||
}
|
||||
if got := llm.Classify(err); got != llm.ClassPermanent {
|
||||
t.Errorf("Classify = %v, want permanent", got)
|
||||
}
|
||||
})
|
||||
t.Run("non-JSON body becomes message", func(t *testing.T) {
|
||||
srv, _ := newServer(t, http.StatusServiceUnavailable, "upstream exploded\n")
|
||||
m := testModel(t, srv, nil)
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusServiceUnavailable || apiErr.Message != "upstream exploded" {
|
||||
t.Errorf("Status/Message = %d/%q", apiErr.Status, apiErr.Message)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestMissingAPIKey(t *testing.T) {
|
||||
t.Setenv("OPENAI_API_KEY", "")
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m, err := New(WithBaseURL(srv.URL)).Model("gpt-test")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
_, err = m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusUnauthorized || apiErr.Code != "missing_api_key" {
|
||||
t.Errorf("Status/Code = %d/%q, want 401/missing_api_key", apiErr.Status, apiErr.Code)
|
||||
}
|
||||
if rec.hits != 0 {
|
||||
t.Errorf("server hit %d times, want 0", rec.hits)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvAPIKeyReadAtConstruction(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
t.Setenv("OPENAI_API_KEY", "env-secret")
|
||||
p := New(WithBaseURL(srv.URL))
|
||||
t.Setenv("OPENAI_API_KEY", "changed-later") // must not affect p
|
||||
m, err := p.Model("gpt-test")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
if _, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}}); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if got := rec.header.Get("Authorization"); got != "Bearer env-secret" {
|
||||
t.Errorf("Authorization = %q, want %q", got, "Bearer env-secret")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuthAndContentTypeHeaders(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil)
|
||||
if _, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}}); err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if got := rec.header.Get("Authorization"); got != "Bearer test-key" {
|
||||
t.Errorf("Authorization = %q, want %q", got, "Bearer test-key")
|
||||
}
|
||||
if got := rec.header.Get("Content-Type"); got != "application/json" {
|
||||
t.Errorf("Content-Type = %q, want application/json", got)
|
||||
}
|
||||
if rec.path != "/chat/completions" {
|
||||
t.Errorf("path = %q, want /chat/completions", rec.path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompatEndpointNameAndBaseURL(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
p := New(WithName("groq"), WithAPIKey("k"), WithBaseURL(srv.URL+"/openai/v1/"))
|
||||
if p.Name() != "groq" {
|
||||
t.Errorf("Name = %q, want groq", p.Name())
|
||||
}
|
||||
m, err := p.Model("llama-3.3-70b")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
resp, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Generate: %v", err)
|
||||
}
|
||||
if rec.path != "/openai/v1/chat/completions" {
|
||||
t.Errorf("path = %q, want /openai/v1/chat/completions (trailing slash trimmed)", rec.path)
|
||||
}
|
||||
if resp.Model != "groq/llama-3.3-70b" {
|
||||
t.Errorf("Model = %q, want groq/llama-3.3-70b", resp.Model)
|
||||
}
|
||||
if rec.body["model"] != "llama-3.3-70b" {
|
||||
t.Errorf("wire model = %v, want llama-3.3-70b (verbatim)", rec.body["model"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCapabilityEnforcement(t *testing.T) {
|
||||
img := func(mime string, n int) llm.Part { return llm.Image(mime, make([]byte, n)) }
|
||||
tests := []struct {
|
||||
name string
|
||||
caps *llm.Capabilities // nil = provider defaults
|
||||
msg llm.Message
|
||||
}{
|
||||
{
|
||||
name: "images unsupported",
|
||||
caps: &llm.Capabilities{SupportsTools: true, SupportsStreaming: true},
|
||||
msg: llm.UserParts(img("image/png", 4)),
|
||||
},
|
||||
{
|
||||
name: "too many images",
|
||||
caps: &llm.Capabilities{MaxImagesPerReq: 1},
|
||||
msg: llm.UserParts(img("image/png", 4), img("image/png", 4)),
|
||||
},
|
||||
{
|
||||
name: "disallowed MIME under defaults",
|
||||
msg: llm.UserParts(img("image/bmp", 4)),
|
||||
},
|
||||
{
|
||||
name: "image too large",
|
||||
caps: &llm.Capabilities{MaxImagesPerReq: 4, MaxImageBytes: 2},
|
||||
msg: llm.UserParts(img("image/png", 3)),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
var mopts []llm.ModelOption
|
||||
if tt.caps != nil {
|
||||
mopts = append(mopts, llm.WithCapabilities(*tt.caps))
|
||||
}
|
||||
m := testModel(t, srv, nil, mopts...)
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{tt.msg}})
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Fatalf("err = %v, want ErrUnsupported", err)
|
||||
}
|
||||
if got := llm.Classify(err); got != llm.ClassPermanent {
|
||||
t.Errorf("Classify = %v, want permanent", got)
|
||||
}
|
||||
if rec.hits != 0 {
|
||||
t.Errorf("server hit %d times, want 0 (must refuse before sending)", rec.hits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("streaming unsupported", func(t *testing.T) {
|
||||
srv, rec := newServer(t, http.StatusOK, textResponse)
|
||||
m := testModel(t, srv, nil, llm.WithCapabilities(llm.Capabilities{SupportsTools: true}))
|
||||
_, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if !errors.Is(err, llm.ErrUnsupported) {
|
||||
t.Fatalf("err = %v, want ErrUnsupported", err)
|
||||
}
|
||||
if rec.hits != 0 {
|
||||
t.Errorf("server hit %d times, want 0", rec.hits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestModelCapabilitiesOverride(t *testing.T) {
|
||||
p := New(WithAPIKey("k"))
|
||||
def, err := p.Model("a")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
if caps := def.Capabilities(); !caps.SupportsTools || caps.MaxImagesPerReq != 100 || caps.MaxImageBytes != 20<<20 {
|
||||
t.Errorf("default caps = %+v", caps)
|
||||
}
|
||||
custom := llm.Capabilities{SupportsStreaming: true, ContextWindow: 8192}
|
||||
ovr, err := p.Model("b", llm.WithCapabilities(custom))
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
if got := ovr.Capabilities(); !reflect.DeepEqual(got, custom) {
|
||||
t.Errorf("override caps = %+v, want %+v", got, custom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransportErrorIsNotAPIError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
|
||||
url := srv.URL
|
||||
srv.Close() // guarantee connection refused
|
||||
p := New(WithAPIKey("k"), WithBaseURL(url))
|
||||
m, err := p.Model("gpt-test")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
_, err = m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err == nil {
|
||||
t.Fatal("Generate succeeded against closed server")
|
||||
}
|
||||
if _, ok := errors.AsType[*llm.APIError](err); ok {
|
||||
t.Errorf("transport error wrapped in APIError: %v", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "openai: do request") {
|
||||
t.Errorf("err = %v, want openai: do request context", err)
|
||||
}
|
||||
if got := llm.Classify(err); got != llm.ClassTransient {
|
||||
t.Errorf("Classify = %v, want transient (net error must stay visible)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeErrorWrapped(t *testing.T) {
|
||||
srv, _ := newServer(t, http.StatusOK, "{not json")
|
||||
m := testModel(t, srv, nil)
|
||||
_, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err == nil || !strings.Contains(err.Error(), "openai: decode response") {
|
||||
t.Errorf("err = %v, want decode response context", err)
|
||||
}
|
||||
if _, ok := errors.AsType[*llm.APIError](err); ok {
|
||||
t.Errorf("decode error wrapped in APIError: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// stream consumes the data-only SSE stream of chat.completion.chunk events.
|
||||
//
|
||||
// Delivery contract: TextDelta events as content fragments arrive; ToolCall
|
||||
// events only once fully assembled (fragments are buffered internally and
|
||||
// flushed at stream end — simplest correct handling of interleaved parallel
|
||||
// calls); exactly one final Response event; then io.EOF.
|
||||
type stream struct {
|
||||
m *model
|
||||
body io.ReadCloser
|
||||
sc *bufio.Scanner
|
||||
|
||||
closeOnce sync.Once
|
||||
closeErr error
|
||||
|
||||
queue []llm.StreamEvent
|
||||
done bool // finalize ran; drain queue then io.EOF
|
||||
|
||||
text strings.Builder
|
||||
calls []*toolCallAcc // first-appearance order
|
||||
byIndex map[int]*toolCallAcc
|
||||
finish string
|
||||
usage llm.Usage
|
||||
}
|
||||
|
||||
// toolCallAcc accumulates one tool call's fragments. The id and name arrive
|
||||
// on the first fragment for an index; arguments arrive as string pieces to
|
||||
// concatenate.
|
||||
type toolCallAcc struct {
|
||||
id string
|
||||
name string
|
||||
args strings.Builder
|
||||
}
|
||||
|
||||
// Next implements llm.Stream.
|
||||
func (s *stream) Next() (llm.StreamEvent, error) {
|
||||
for {
|
||||
if len(s.queue) > 0 {
|
||||
ev := s.queue[0]
|
||||
s.queue = s.queue[1:]
|
||||
return ev, nil
|
||||
}
|
||||
if s.done {
|
||||
return llm.StreamEvent{}, io.EOF
|
||||
}
|
||||
if !s.sc.Scan() {
|
||||
if err := s.sc.Err(); err != nil {
|
||||
return llm.StreamEvent{}, fmt.Errorf("openai: read stream: %w", err)
|
||||
}
|
||||
// Why: some compat servers close the body without a [DONE]
|
||||
// sentinel; a clean EOF still finalizes with what arrived.
|
||||
s.finalize()
|
||||
continue
|
||||
}
|
||||
line := strings.TrimSpace(s.sc.Text())
|
||||
if !strings.HasPrefix(line, "data:") {
|
||||
continue // SSE comments, event:/id: fields, blank separators
|
||||
}
|
||||
payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
|
||||
if payload == "" {
|
||||
continue
|
||||
}
|
||||
if payload == "[DONE]" {
|
||||
s.finalize()
|
||||
continue
|
||||
}
|
||||
if err := s.handleChunk([]byte(payload)); err != nil {
|
||||
return llm.StreamEvent{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleChunk folds one chat.completion.chunk into the stream state,
|
||||
// queueing any events it produces.
|
||||
func (s *stream) handleChunk(data []byte) error {
|
||||
var chunk streamChunk
|
||||
if err := json.Unmarshal(data, &chunk); err != nil {
|
||||
return fmt.Errorf("openai: decode stream chunk: %w", err)
|
||||
}
|
||||
if chunk.Error != nil {
|
||||
// Mid-stream error event on an otherwise-200 stream. Status stays 0:
|
||||
// there is no failing HTTP status to report.
|
||||
apiErr := &llm.APIError{
|
||||
Provider: s.m.p.name,
|
||||
Model: s.m.id,
|
||||
Code: chunk.Error.Code,
|
||||
Message: chunk.Error.Message,
|
||||
}
|
||||
if apiErr.Code == "" {
|
||||
apiErr.Code = chunk.Error.Type
|
||||
}
|
||||
return apiErr
|
||||
}
|
||||
if chunk.Usage != nil {
|
||||
s.usage = llm.Usage{
|
||||
InputTokens: chunk.Usage.PromptTokens,
|
||||
OutputTokens: chunk.Usage.CompletionTokens,
|
||||
}
|
||||
}
|
||||
// Why the guard: the include_usage chunk arrives with an EMPTY choices
|
||||
// array; indexing choices[0] unconditionally would panic on it.
|
||||
if len(chunk.Choices) == 0 {
|
||||
return nil
|
||||
}
|
||||
choice := chunk.Choices[0]
|
||||
if choice.FinishReason != "" {
|
||||
s.finish = choice.FinishReason
|
||||
}
|
||||
if choice.Delta.Content != "" {
|
||||
s.text.WriteString(choice.Delta.Content)
|
||||
s.queue = append(s.queue, llm.StreamEvent{TextDelta: choice.Delta.Content})
|
||||
}
|
||||
for _, tc := range choice.Delta.ToolCalls {
|
||||
acc := s.byIndex[tc.Index]
|
||||
if acc == nil {
|
||||
if s.byIndex == nil {
|
||||
s.byIndex = make(map[int]*toolCallAcc)
|
||||
}
|
||||
acc = &toolCallAcc{}
|
||||
s.byIndex[tc.Index] = acc
|
||||
s.calls = append(s.calls, acc)
|
||||
}
|
||||
if tc.ID != "" {
|
||||
acc.id = tc.ID
|
||||
}
|
||||
if tc.Function.Name != "" {
|
||||
acc.name = tc.Function.Name
|
||||
}
|
||||
acc.args.WriteString(tc.Function.Arguments)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalize assembles the buffered tool calls and the final Response, queues
|
||||
// them (ToolCall events first, Response last), and marks the stream done.
|
||||
func (s *stream) finalize() {
|
||||
if s.done {
|
||||
return
|
||||
}
|
||||
s.done = true
|
||||
resp := &llm.Response{Model: s.m.p.name + "/" + s.m.id, Usage: s.usage}
|
||||
if s.text.Len() > 0 {
|
||||
resp.Parts = []llm.Part{llm.TextPart{Text: s.text.String()}}
|
||||
}
|
||||
for i, acc := range s.calls {
|
||||
id := acc.id
|
||||
if id == "" {
|
||||
// Why: ToolResult.ID must echo ToolCall.ID; synthesize for
|
||||
// compat servers that stream calls without ids.
|
||||
id = fmt.Sprintf("call_%d", i)
|
||||
}
|
||||
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
||||
ID: id,
|
||||
Name: acc.name,
|
||||
Arguments: json.RawMessage(acc.args.String()),
|
||||
})
|
||||
}
|
||||
resp.FinishReason = mapFinish(s.finish, len(resp.ToolCalls) > 0)
|
||||
for i := range resp.ToolCalls {
|
||||
tc := resp.ToolCalls[i] // copy so the event doesn't alias the slice
|
||||
s.queue = append(s.queue, llm.StreamEvent{ToolCall: &tc})
|
||||
}
|
||||
s.queue = append(s.queue, llm.StreamEvent{Response: resp})
|
||||
}
|
||||
|
||||
// Close implements llm.Stream. Closing the body unblocks any in-flight read
|
||||
// and aborts the HTTP stream; safe to call at any time, including twice.
|
||||
func (s *stream) Close() error {
|
||||
s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
|
||||
return s.closeErr
|
||||
}
|
||||
@@ -0,0 +1,267 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// sseServer streams each payload as one "data: <payload>" SSE event and
|
||||
// records the request like newServer.
|
||||
func sseServer(t *testing.T, payloads ...string) (*httptest.Server, *recorded) {
|
||||
t.Helper()
|
||||
rec := &recorded{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
rec.hits++
|
||||
rec.header = r.Header.Clone()
|
||||
rec.path = r.URL.Path
|
||||
raw, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Errorf("read request body: %v", err)
|
||||
}
|
||||
if len(raw) > 0 {
|
||||
if err := json.Unmarshal(raw, &rec.body); err != nil {
|
||||
t.Errorf("request body is not JSON: %v\n%s", err, raw)
|
||||
}
|
||||
}
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
for _, p := range payloads {
|
||||
io.WriteString(w, "data: "+p+"\n\n")
|
||||
}
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
return srv, rec
|
||||
}
|
||||
|
||||
// collect drains a stream to io.EOF, failing the test on any other error.
|
||||
func collect(t *testing.T, s llm.Stream) []llm.StreamEvent {
|
||||
t.Helper()
|
||||
var events []llm.StreamEvent
|
||||
for {
|
||||
ev, err := s.Next()
|
||||
if err == io.EOF {
|
||||
return events
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Next: %v", err)
|
||||
}
|
||||
events = append(events, ev)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamText(t *testing.T) {
|
||||
srv, rec := sseServer(t,
|
||||
`{"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-test","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"content":"Hel"},"finish_reason":null}],"obfuscation":"xK9q"}`,
|
||||
`{"choices":[{"index":0,"delta":{"content":"lo"},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}`,
|
||||
`{"choices":[],"usage":{"prompt_tokens":5,"completion_tokens":2,"total_tokens":7}}`,
|
||||
`[DONE]`,
|
||||
)
|
||||
m := testModel(t, srv, nil)
|
||||
s, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
events := collect(t, s)
|
||||
|
||||
// Request shape: stream flag, usage opt-in, SSE accept header.
|
||||
if rec.body["stream"] != true {
|
||||
t.Errorf("stream = %v, want true", rec.body["stream"])
|
||||
}
|
||||
so, _ := rec.body["stream_options"].(map[string]any)
|
||||
if so == nil || so["include_usage"] != true {
|
||||
t.Errorf("stream_options = %v, want include_usage true", rec.body["stream_options"])
|
||||
}
|
||||
if got := rec.header.Get("Accept"); got != "text/event-stream" {
|
||||
t.Errorf("Accept = %q, want text/event-stream", got)
|
||||
}
|
||||
|
||||
if len(events) != 3 {
|
||||
t.Fatalf("got %d events, want 3: %+v", len(events), events)
|
||||
}
|
||||
if events[0].TextDelta != "Hel" || events[1].TextDelta != "lo" {
|
||||
t.Errorf("deltas = %q, %q, want Hel, lo", events[0].TextDelta, events[1].TextDelta)
|
||||
}
|
||||
final := events[2].Response
|
||||
if final == nil {
|
||||
t.Fatal("last event has no Response")
|
||||
}
|
||||
if got := final.Text(); got != "Hello" {
|
||||
t.Errorf("final text = %q, want Hello", got)
|
||||
}
|
||||
if final.FinishReason != llm.FinishStop {
|
||||
t.Errorf("FinishReason = %v, want stop", final.FinishReason)
|
||||
}
|
||||
if final.Usage != (llm.Usage{InputTokens: 5, OutputTokens: 2}) {
|
||||
t.Errorf("Usage = %+v, want {5 2}", final.Usage)
|
||||
}
|
||||
if final.Model != "openai/gpt-test" {
|
||||
t.Errorf("Model = %q, want openai/gpt-test", final.Model)
|
||||
}
|
||||
|
||||
// Next after EOF keeps returning EOF; Close is idempotent.
|
||||
if _, err := s.Next(); err != io.EOF {
|
||||
t.Errorf("Next after EOF = %v, want io.EOF", err)
|
||||
}
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("first Close: %v", err)
|
||||
}
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("second Close: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamParallelToolCalls(t *testing.T) {
|
||||
// Two interleaved calls with distinct indexes; id/name only on the first
|
||||
// fragment of each; arguments split across fragments.
|
||||
srv, _ := sseServer(t,
|
||||
`{"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_a","type":"function","function":{"name":"get_weather","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"city\":"}}]},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"id":"call_b","type":"function","function":{"name":"get_time","arguments":"{\"tz\":"}}]},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"Boston\"}"}}]},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\"EST\"}"}}]},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}`,
|
||||
`{"choices":[],"usage":{"prompt_tokens":11,"completion_tokens":9,"total_tokens":20}}`,
|
||||
`[DONE]`,
|
||||
)
|
||||
m := testModel(t, srv, nil)
|
||||
s, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
events := collect(t, s)
|
||||
|
||||
if len(events) != 3 {
|
||||
t.Fatalf("got %d events, want 3 (two tool calls + response): %+v", len(events), events)
|
||||
}
|
||||
a, b := events[0].ToolCall, events[1].ToolCall
|
||||
if a == nil || b == nil {
|
||||
t.Fatalf("events 0/1 are not tool calls: %+v", events)
|
||||
}
|
||||
if a.ID != "call_a" || a.Name != "get_weather" || string(a.Arguments) != `{"city":"Boston"}` {
|
||||
t.Errorf("first call = %+v", a)
|
||||
}
|
||||
if b.ID != "call_b" || b.Name != "get_time" || string(b.Arguments) != `{"tz":"EST"}` {
|
||||
t.Errorf("second call = %+v", b)
|
||||
}
|
||||
final := events[2].Response
|
||||
if final == nil {
|
||||
t.Fatal("last event has no Response")
|
||||
}
|
||||
if len(final.ToolCalls) != 2 {
|
||||
t.Fatalf("final ToolCalls = %d, want 2", len(final.ToolCalls))
|
||||
}
|
||||
if final.ToolCalls[0].ID != "call_a" || final.ToolCalls[1].ID != "call_b" {
|
||||
t.Errorf("final ToolCalls order = %q, %q", final.ToolCalls[0].ID, final.ToolCalls[1].ID)
|
||||
}
|
||||
if final.FinishReason != llm.FinishToolCalls {
|
||||
t.Errorf("FinishReason = %v, want tool_calls", final.FinishReason)
|
||||
}
|
||||
if final.Usage != (llm.Usage{InputTokens: 11, OutputTokens: 9}) {
|
||||
t.Errorf("Usage = %+v, want {11 9}", final.Usage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamMidStreamError(t *testing.T) {
|
||||
srv, _ := sseServer(t,
|
||||
`{"choices":[{"index":0,"delta":{"content":"par"},"finish_reason":null}]}`,
|
||||
`{"error":{"message":"The server had an error while processing your request","type":"server_error","param":null,"code":null}}`,
|
||||
)
|
||||
m := testModel(t, srv, nil)
|
||||
s, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
ev, err := s.Next()
|
||||
if err != nil || ev.TextDelta != "par" {
|
||||
t.Fatalf("first event = %+v, %v; want TextDelta par", ev, err)
|
||||
}
|
||||
_, err = s.Next()
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError", err, err)
|
||||
}
|
||||
if apiErr.Code != "server_error" {
|
||||
t.Errorf("Code = %q, want server_error", apiErr.Code)
|
||||
}
|
||||
if apiErr.Message != "The server had an error while processing your request" {
|
||||
t.Errorf("Message = %q", apiErr.Message)
|
||||
}
|
||||
if apiErr.Status != 0 {
|
||||
t.Errorf("Status = %d, want 0 (the HTTP stream was 200)", apiErr.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamHTTPError(t *testing.T) {
|
||||
srv, _ := newServer(t, http.StatusTooManyRequests,
|
||||
`{"error":{"message":"Rate limit reached","type":"rate_limit_error","param":null,"code":"rate_limit_exceeded"}}`)
|
||||
m := testModel(t, srv, nil)
|
||||
_, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
apiErr, ok := errors.AsType[*llm.APIError](err)
|
||||
if !ok {
|
||||
t.Fatalf("err = %v (%T), want *llm.APIError from Stream itself", err, err)
|
||||
}
|
||||
if apiErr.Status != http.StatusTooManyRequests || apiErr.Code != "rate_limit_exceeded" {
|
||||
t.Errorf("Status/Code = %d/%q", apiErr.Status, apiErr.Code)
|
||||
}
|
||||
if got := llm.Classify(err); got != llm.ClassTransient {
|
||||
t.Errorf("Classify = %v, want transient", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamWithoutDoneSentinel(t *testing.T) {
|
||||
// Why: some compat servers close the connection without "data: [DONE]";
|
||||
// a clean EOF must still produce the final Response.
|
||||
srv, _ := sseServer(t,
|
||||
`{"choices":[{"index":0,"delta":{"content":"ok"},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}`,
|
||||
)
|
||||
m := testModel(t, srv, nil)
|
||||
s, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
defer s.Close()
|
||||
events := collect(t, s)
|
||||
if len(events) != 2 {
|
||||
t.Fatalf("got %d events, want 2: %+v", len(events), events)
|
||||
}
|
||||
final := events[1].Response
|
||||
if final == nil || final.Text() != "ok" || final.FinishReason != llm.FinishStop {
|
||||
t.Errorf("final = %+v", final)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamCloseEarly(t *testing.T) {
|
||||
srv, _ := sseServer(t,
|
||||
`{"choices":[{"index":0,"delta":{"content":"a"},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{"content":"b"},"finish_reason":null}]}`,
|
||||
`{"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}`,
|
||||
`[DONE]`,
|
||||
)
|
||||
m := testModel(t, srv, nil)
|
||||
s, err := m.Stream(context.Background(), llm.Request{Messages: []llm.Message{llm.UserText("hi")}})
|
||||
if err != nil {
|
||||
t.Fatalf("Stream: %v", err)
|
||||
}
|
||||
if _, err := s.Next(); err != nil {
|
||||
t.Fatalf("Next: %v", err)
|
||||
}
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("Close mid-stream: %v", err)
|
||||
}
|
||||
if err := s.Close(); err != nil {
|
||||
t.Errorf("Close again: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,321 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// --- request wire shapes ---
|
||||
|
||||
type chatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []wireMessage `json:"messages"`
|
||||
Tools []wireTool `json:"tools,omitempty"`
|
||||
// ToolChoice is "auto"/"none"/"required" (string) or a named-function
|
||||
// object; any avoids two fields for one wire key.
|
||||
ToolChoice any `json:"tool_choice,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
|
||||
MaxTokens int `json:"max_tokens,omitempty"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
ReasoningEffort string `json:"reasoning_effort,omitempty"`
|
||||
ResponseFormat *wireRespFormat `json:"response_format,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
StreamOptions *wireStreamOptions `json:"stream_options,omitempty"`
|
||||
}
|
||||
|
||||
type wireMessage struct {
|
||||
Role string `json:"role"`
|
||||
// Content is a string for text-only turns, a part array for multimodal
|
||||
// turns, or nil (wire null) for assistant turns that only call tools.
|
||||
Content any `json:"content"`
|
||||
ToolCalls []wireToolCall `json:"tool_calls,omitempty"`
|
||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||
}
|
||||
|
||||
type wireTextPart struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type wireImagePart struct {
|
||||
Type string `json:"type"`
|
||||
ImageURL wireImageURL `json:"image_url"`
|
||||
}
|
||||
|
||||
type wireImageURL struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type wireToolCall struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Function wireFunctionCall `json:"function"`
|
||||
}
|
||||
|
||||
type wireFunctionCall struct {
|
||||
Name string `json:"name"`
|
||||
// Arguments is a JSON-encoded STRING per the wire format, not an object.
|
||||
Arguments string `json:"arguments"`
|
||||
}
|
||||
|
||||
type wireTool struct {
|
||||
Type string `json:"type"`
|
||||
Function wireToolFunction `json:"function"`
|
||||
}
|
||||
|
||||
type wireToolFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters json.RawMessage `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type wireNamedToolChoice struct {
|
||||
Type string `json:"type"`
|
||||
Function wireToolName `json:"function"`
|
||||
}
|
||||
|
||||
type wireToolName struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type wireRespFormat struct {
|
||||
Type string `json:"type"`
|
||||
JSONSchema *wireJSONSchema `json:"json_schema,omitempty"`
|
||||
}
|
||||
|
||||
// wireJSONSchema omits the strict flag on purpose: strict mode imposes
|
||||
// schema rewrites (every property required, additionalProperties:false at
|
||||
// every level) that belong to the caller, not the transport.
|
||||
type wireJSONSchema struct {
|
||||
Name string `json:"name"`
|
||||
Schema json.RawMessage `json:"schema"`
|
||||
}
|
||||
|
||||
type wireStreamOptions struct {
|
||||
IncludeUsage bool `json:"include_usage"`
|
||||
}
|
||||
|
||||
// --- response wire shapes (loose: unknown fields ignored) ---
|
||||
|
||||
type chatResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []chatChoice `json:"choices"`
|
||||
Usage *wireUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type chatChoice struct {
|
||||
Index int `json:"index"`
|
||||
Message wireRespMessage `json:"message"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
}
|
||||
|
||||
type wireRespMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"` // null decodes to ""
|
||||
Refusal string `json:"refusal"` // tolerated, unused
|
||||
ToolCalls []wireToolCall `json:"tool_calls"`
|
||||
}
|
||||
|
||||
type wireUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
type errorEnvelope struct {
|
||||
Error wireError `json:"error"`
|
||||
}
|
||||
|
||||
type wireError struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Code string `json:"code"` // null decodes to ""
|
||||
}
|
||||
|
||||
// --- streaming wire shapes ---
|
||||
|
||||
type streamChunk struct {
|
||||
Choices []streamChoice `json:"choices"`
|
||||
Usage *wireUsage `json:"usage"`
|
||||
Error *wireError `json:"error"` // mid-stream error event
|
||||
}
|
||||
|
||||
type streamChoice struct {
|
||||
Index int `json:"index"`
|
||||
Delta streamDelta `json:"delta"`
|
||||
FinishReason string `json:"finish_reason"` // null decodes to ""
|
||||
}
|
||||
|
||||
type streamDelta struct {
|
||||
Content string `json:"content"` // null decodes to ""
|
||||
ToolCalls []streamToolCallDelta `json:"tool_calls"`
|
||||
}
|
||||
|
||||
// streamToolCallDelta is one tool-call fragment. The id and name appear only
|
||||
// on a call's first fragment; later fragments carry just index + an
|
||||
// arguments substring. Accumulation keys on Index, never ID.
|
||||
type streamToolCallDelta struct {
|
||||
Index int `json:"index"`
|
||||
ID string `json:"id"`
|
||||
Function wireFunctionCall `json:"function"`
|
||||
}
|
||||
|
||||
// --- mapping: llm.Request -> chatRequest ---
|
||||
|
||||
// buildRequest translates the canonical request to the wire shape. The
|
||||
// capability check has already passed by the time this runs.
|
||||
func (m *model) buildRequest(req llm.Request, stream bool) *chatRequest {
|
||||
out := &chatRequest{
|
||||
Model: m.id,
|
||||
Temperature: req.Temperature,
|
||||
TopP: req.TopP,
|
||||
Stop: req.StopSequences,
|
||||
ReasoningEffort: req.ReasoningEffort,
|
||||
}
|
||||
|
||||
// Fold Request.System and every RoleSystem message into one leading
|
||||
// system message, System field first. Why: the canonical contract allows
|
||||
// system content in both places; OpenAI wants one system mechanism.
|
||||
var sys []string
|
||||
if req.System != "" {
|
||||
sys = append(sys, req.System)
|
||||
}
|
||||
for _, msg := range req.Messages {
|
||||
if msg.Role == llm.RoleSystem {
|
||||
if t := msg.Text(); t != "" {
|
||||
sys = append(sys, t)
|
||||
}
|
||||
}
|
||||
}
|
||||
if joined := strings.Join(sys, "\n\n"); joined != "" {
|
||||
out.Messages = append(out.Messages, wireMessage{Role: "system", Content: joined})
|
||||
}
|
||||
|
||||
for _, msg := range req.Messages {
|
||||
switch msg.Role {
|
||||
case llm.RoleSystem:
|
||||
// Folded above; excluded from the normal message list.
|
||||
case llm.RoleUser:
|
||||
out.Messages = append(out.Messages, wireMessage{Role: "user", Content: contentValue(msg.Parts)})
|
||||
case llm.RoleAssistant:
|
||||
wm := wireMessage{Role: "assistant"}
|
||||
if text := msg.Text(); text != "" {
|
||||
wm.Content = text
|
||||
}
|
||||
for _, tc := range msg.ToolCalls {
|
||||
args := string(tc.Arguments)
|
||||
if args == "" {
|
||||
// Why: arguments must be a valid JSON document string;
|
||||
// an empty string is not one.
|
||||
args = "{}"
|
||||
}
|
||||
wm.ToolCalls = append(wm.ToolCalls, wireToolCall{
|
||||
ID: tc.ID,
|
||||
Type: "function",
|
||||
Function: wireFunctionCall{Name: tc.Name, Arguments: args},
|
||||
})
|
||||
}
|
||||
out.Messages = append(out.Messages, wm)
|
||||
case llm.RoleTool:
|
||||
// One wire message per result: the API pairs each tool output
|
||||
// with its call via tool_call_id, one message each.
|
||||
for _, tr := range msg.ToolResults {
|
||||
content := tr.Content
|
||||
if tr.IsError {
|
||||
content = "ERROR: " + content
|
||||
}
|
||||
out.Messages = append(out.Messages, wireMessage{
|
||||
Role: "tool",
|
||||
Content: content,
|
||||
ToolCallID: tr.ID,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range req.Tools {
|
||||
out.Tools = append(out.Tools, wireTool{
|
||||
Type: "function",
|
||||
Function: wireToolFunction{Name: t.Name, Description: t.Description, Parameters: t.Parameters},
|
||||
})
|
||||
}
|
||||
|
||||
switch req.ToolChoice {
|
||||
case "":
|
||||
// Omit: provider default ("auto" when tools are present).
|
||||
case "auto", "none", "required":
|
||||
out.ToolChoice = req.ToolChoice
|
||||
default:
|
||||
// Any other value names the one tool the model must call.
|
||||
out.ToolChoice = wireNamedToolChoice{Type: "function", Function: wireToolName{Name: req.ToolChoice}}
|
||||
}
|
||||
|
||||
if req.MaxTokens > 0 {
|
||||
if m.p.legacyMaxTokens {
|
||||
out.MaxTokens = req.MaxTokens
|
||||
} else {
|
||||
out.MaxCompletionTokens = req.MaxTokens
|
||||
}
|
||||
}
|
||||
|
||||
if len(req.Schema) > 0 {
|
||||
name := req.SchemaName
|
||||
if name == "" {
|
||||
name = "response"
|
||||
}
|
||||
out.ResponseFormat = &wireRespFormat{
|
||||
Type: "json_schema",
|
||||
JSONSchema: &wireJSONSchema{Name: name, Schema: req.Schema},
|
||||
}
|
||||
}
|
||||
|
||||
if stream {
|
||||
out.Stream = true
|
||||
// Why: without include_usage the stream never reports token counts;
|
||||
// the usage arrives in one extra chunk with an empty choices array.
|
||||
out.StreamOptions = &wireStreamOptions{IncludeUsage: true}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// contentValue renders message parts as the wire content value: a plain
|
||||
// string when text-only (maximum compat), a part array when images are
|
||||
// present.
|
||||
func contentValue(parts []llm.Part) any {
|
||||
multimodal := false
|
||||
for _, p := range parts {
|
||||
if _, ok := p.(llm.ImagePart); ok {
|
||||
multimodal = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !multimodal {
|
||||
var b strings.Builder
|
||||
for _, p := range parts {
|
||||
if t, ok := p.(llm.TextPart); ok {
|
||||
b.WriteString(t.Text)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
out := make([]any, 0, len(parts))
|
||||
for _, p := range parts {
|
||||
switch v := p.(type) {
|
||||
case llm.TextPart:
|
||||
out = append(out, wireTextPart{Type: "text", Text: v.Text})
|
||||
case llm.ImagePart:
|
||||
url := "data:" + v.MIME + ";base64," + base64.StdEncoding.EncodeToString(v.Data)
|
||||
out = append(out, wireImagePart{Type: "image_url", ImageURL: wireImageURL{URL: url}})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user