96c612e707
Add provider/llamaswap, a tailored provider for llama-swap (the model-swapping
proxy over llama.cpp / stable-diffusion.cpp). Its chat path delegates to
provider/openai at {base}/v1 — no duplicated wire client (ADR-0007) — with
legacy max_tokens, a Bearer no-key placeholder for keyless local instances, and
a timeout-free client so cold model swaps rely on context deadlines. The
"tailored" surface is concrete management methods (ListModels / Running /
Unload) that don't belong on the canonical llm.Provider interface. The
llama-swap:// DSN scheme builds an http base URL (local-first); a no-URL
built-in errors clearly on use, mirroring foreman.
Add imagegen, a new canonical text-to-image interface separate from llm
(Request/Result/Model/Provider; Image = llm.ImagePart so generated images feed
straight back into chat). First backend is llama-swap via OpenAI
/v1/images/generations (b64_json, bytes-only). Re-exported from the root. v1 is
txt2img only.
Hermetic httptest coverage for chat delegation, management endpoints, image
decode, and scheme wiring. ADR-0015 + ADR-0016, README support matrix +
image-gen section, CLAUDE.md package map, and progress.md updated in the same
commit.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
105 lines
3.1 KiB
Go
105 lines
3.1 KiB
Go
package llamaswap
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/imagegen"
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// ImageModel implements imagegen.Provider, binding an image-generation model
|
|
// served by llama-swap (routed to a stable-diffusion.cpp upstream). The id is
|
|
// passed through verbatim and selects which upstream llama-swap loads.
|
|
func (p *Provider) ImageModel(id string, opts ...imagegen.ModelOption) (imagegen.Model, error) {
|
|
if p.baseURL == "" {
|
|
return nil, fmt.Errorf("llama-swap provider %q: no base URL configured (set one via WithBaseURL or an LLM_* env DSN)", p.name)
|
|
}
|
|
_ = imagegen.ApplyModelOptions(opts)
|
|
return &imageModel{p: p, id: id}, nil
|
|
}
|
|
|
|
type imageModel struct {
|
|
p *Provider
|
|
id string
|
|
}
|
|
|
|
// imageRequest is the OpenAI /v1/images/generations request shape. We always
|
|
// request b64_json so the bytes come back inline (no second fetch).
|
|
type imageRequest struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
N int `json:"n,omitempty"`
|
|
Size string `json:"size,omitempty"`
|
|
ResponseFormat string `json:"response_format"`
|
|
}
|
|
|
|
type imageResponse struct {
|
|
Created int64 `json:"created"`
|
|
Data []struct {
|
|
B64JSON string `json:"b64_json"`
|
|
URL string `json:"url"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
// Generate implements imagegen.Model via POST {base}/v1/images/generations.
|
|
func (m *imageModel) Generate(ctx context.Context, req imagegen.Request, opts ...imagegen.Option) (*imagegen.Result, error) {
|
|
req = req.Apply(opts...)
|
|
if strings.TrimSpace(req.Prompt) == "" {
|
|
return nil, fmt.Errorf("%w: image generation requires a prompt", llm.ErrUnsupported)
|
|
}
|
|
|
|
wire := imageRequest{
|
|
Model: m.id,
|
|
Prompt: req.Prompt,
|
|
N: req.N,
|
|
Size: req.Size,
|
|
ResponseFormat: "b64_json",
|
|
}
|
|
|
|
var resp imageResponse
|
|
if err := m.p.doJSON(ctx, http.MethodPost, "/v1/images/generations", &wire, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out := &imagegen.Result{Raw: &resp}
|
|
for i, d := range resp.Data {
|
|
if d.B64JSON == "" {
|
|
// Why error rather than skip: a url-only entry means the backend
|
|
// ignored response_format; we don't fetch remote content (mirrors
|
|
// llm.ImagePart's bytes-only contract), so surface it.
|
|
return nil, &llm.APIError{
|
|
Provider: m.p.name,
|
|
Model: m.id,
|
|
Message: fmt.Sprintf("image %d returned no inline b64_json data", i),
|
|
}
|
|
}
|
|
raw, err := base64.StdEncoding.DecodeString(d.B64JSON)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("llama-swap: decode image %d: %w", i, err)
|
|
}
|
|
out.Images = append(out.Images, llm.ImagePart{MIME: sniffImageMIME(raw), Data: raw})
|
|
}
|
|
if len(out.Images) == 0 {
|
|
return nil, &llm.APIError{
|
|
Provider: m.p.name,
|
|
Model: m.id,
|
|
Message: "image response contained no images",
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// sniffImageMIME identifies the image format from its leading bytes, defaulting
|
|
// to image/png (stable-diffusion.cpp emits PNG) when detection is inconclusive.
|
|
func sniffImageMIME(data []byte) string {
|
|
mime := http.DetectContentType(data)
|
|
if !strings.HasPrefix(mime, "image/") {
|
|
return "image/png"
|
|
}
|
|
return mime
|
|
}
|