feat(llamaswap): add llama-swap provider + canonical imagegen interface

Add provider/llamaswap, a tailored provider for llama-swap (the model-swapping proxy over llama.cpp / stable-diffusion.cpp). Its chat path delegates to provider/openai at {base}/v1 — no duplicated wire client (ADR-0007) — with legacy max_tokens, a Bearer no-key placeholder for keyless local instances, and a timeout-free client so cold model swaps rely on context deadlines. The "tailored" surface is concrete management methods (ListModels / Running / Unload) that don't belong on the canonical llm.Provider interface. The llama-swap:// DSN scheme builds an http base URL (local-first); a no-URL built-in errors clearly on use, mirroring foreman. Add imagegen, a new canonical text-to-image interface separate from llm (Request/Result/Model/Provider; Image = llm.ImagePart so generated images feed straight back into chat). First backend is llama-swap via OpenAI /v1/images/generations (b64_json, bytes-only). Re-exported from the root. v1 is txt2img only. Hermetic httptest coverage for chat delegation, management endpoints, image decode, and scheme wiring. ADR-0015 + ADR-0016, README support matrix + image-gen section, CLAUDE.md package map, and progress.md updated in the same commit. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 15:01:54 -04:00
parent 1fd7109a42
commit 96c612e707
14 changed files with 994 additions and 7 deletions
@@ -0,0 +1,104 @@
+package llamaswap
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/imagegen"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// ImageModel implements imagegen.Provider, binding an image-generation model
+// served by llama-swap (routed to a stable-diffusion.cpp upstream). The id is
+// passed through verbatim and selects which upstream llama-swap loads.
+func (p *Provider) ImageModel(id string, opts ...imagegen.ModelOption) (imagegen.Model, error) {
+	if p.baseURL == "" {
+		return nil, fmt.Errorf("llama-swap provider %q: no base URL configured (set one via WithBaseURL or an LLM_* env DSN)", p.name)
+	}
+	_ = imagegen.ApplyModelOptions(opts)
+	return &imageModel{p: p, id: id}, nil
+}
+
+type imageModel struct {
+	p  *Provider
+	id string
+}
+
+// imageRequest is the OpenAI /v1/images/generations request shape. We always
+// request b64_json so the bytes come back inline (no second fetch).
+type imageRequest struct {
+	Model          string `json:"model"`
+	Prompt         string `json:"prompt"`
+	N              int    `json:"n,omitempty"`
+	Size           string `json:"size,omitempty"`
+	ResponseFormat string `json:"response_format"`
+}
+
+type imageResponse struct {
+	Created int64 `json:"created"`
+	Data    []struct {
+		B64JSON string `json:"b64_json"`
+		URL     string `json:"url"`
+	} `json:"data"`
+}
+
+// Generate implements imagegen.Model via POST {base}/v1/images/generations.
+func (m *imageModel) Generate(ctx context.Context, req imagegen.Request, opts ...imagegen.Option) (*imagegen.Result, error) {
+	req = req.Apply(opts...)
+	if strings.TrimSpace(req.Prompt) == "" {
+		return nil, fmt.Errorf("%w: image generation requires a prompt", llm.ErrUnsupported)
+	}
+
+	wire := imageRequest{
+		Model:          m.id,
+		Prompt:         req.Prompt,
+		N:              req.N,
+		Size:           req.Size,
+		ResponseFormat: "b64_json",
+	}
+
+	var resp imageResponse
+	if err := m.p.doJSON(ctx, http.MethodPost, "/v1/images/generations", &wire, &resp); err != nil {
+		return nil, err
+	}
+
+	out := &imagegen.Result{Raw: &resp}
+	for i, d := range resp.Data {
+		if d.B64JSON == "" {
+			// Why error rather than skip: a url-only entry means the backend
+			// ignored response_format; we don't fetch remote content (mirrors
+			// llm.ImagePart's bytes-only contract), so surface it.
+			return nil, &llm.APIError{
+				Provider: m.p.name,
+				Model:    m.id,
+				Message:  fmt.Sprintf("image %d returned no inline b64_json data", i),
+			}
+		}
+		raw, err := base64.StdEncoding.DecodeString(d.B64JSON)
+		if err != nil {
+			return nil, fmt.Errorf("llama-swap: decode image %d: %w", i, err)
+		}
+		out.Images = append(out.Images, llm.ImagePart{MIME: sniffImageMIME(raw), Data: raw})
+	}
+	if len(out.Images) == 0 {
+		return nil, &llm.APIError{
+			Provider: m.p.name,
+			Model:    m.id,
+			Message:  "image response contained no images",
+		}
+	}
+	return out, nil
+}
+
+// sniffImageMIME identifies the image format from its leading bytes, defaulting
+// to image/png (stable-diffusion.cpp emits PNG) when detection is inconclusive.
+func sniffImageMIME(data []byte) string {
+	mime := http.DetectContentType(data)
+	if !strings.HasPrefix(mime, "image/") {
+		return "image/png"
+	}
+	return mime
+}