feat(llamaswap): add llama-swap provider + canonical imagegen interface
Add provider/llamaswap, a tailored provider for llama-swap (the model-swapping
proxy over llama.cpp / stable-diffusion.cpp). Its chat path delegates to
provider/openai at {base}/v1 — no duplicated wire client (ADR-0007) — with
legacy max_tokens, a Bearer no-key placeholder for keyless local instances, and
a timeout-free client so cold model swaps rely on context deadlines. The
"tailored" surface is concrete management methods (ListModels / Running /
Unload) that don't belong on the canonical llm.Provider interface. The
llama-swap:// DSN scheme builds an http base URL (local-first); a no-URL
built-in errors clearly on use, mirroring foreman.
Add imagegen, a new canonical text-to-image interface separate from llm
(Request/Result/Model/Provider; Image = llm.ImagePart so generated images feed
straight back into chat). First backend is llama-swap via OpenAI
/v1/images/generations (b64_json, bytes-only). Re-exported from the root. v1 is
txt2img only.
Hermetic httptest coverage for chat delegation, management endpoints, image
decode, and scheme wiring. ADR-0015 + ADR-0016, README support matrix +
image-gen section, CLAUDE.md package map, and progress.md updated in the same
commit.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,105 @@
|
||||
// Package imagegen is majordomo's canonical text-to-image surface. It is a
|
||||
// deliberately separate contract from the llm package: image generation does
|
||||
// not fit the chat Request/Response shape (no messages, tools, streaming, or
|
||||
// failover chains in v1), so it gets its own small Provider/Model interface
|
||||
// rather than overloading llm.Model.
|
||||
//
|
||||
// Generated images are carried as llm.ImagePart (bytes + MIME), so a result
|
||||
// drops straight back into a chat turn:
|
||||
//
|
||||
// res, _ := im.Generate(ctx, imagegen.Request{Prompt: "a red bicycle"})
|
||||
// msg := llm.UserParts(llm.Text("describe this"), res.Images[0])
|
||||
//
|
||||
// The first implementation is provider/llamaswap, which targets the OpenAI
|
||||
// /v1/images/generations endpoint routed to a stable-diffusion.cpp backend.
|
||||
package imagegen
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// Image is one generated image: raw bytes plus a MIME type. Aliased to
|
||||
// llm.ImagePart so generated images are interchangeable with chat content and
|
||||
// can be fed into llm.UserParts without conversion.
|
||||
type Image = llm.ImagePart
|
||||
|
||||
// Request is a text-to-image generation request. Pointer-free zero values mean
|
||||
// "provider default": N == 0 yields the backend's default count (usually one),
|
||||
// and an empty Size leaves the backend's default resolution.
|
||||
type Request struct {
|
||||
// Prompt is the text description of the image to generate.
|
||||
Prompt string
|
||||
|
||||
// N is the number of images to generate; 0 = provider default.
|
||||
N int
|
||||
|
||||
// Size is the requested resolution, e.g. "512x512" or "1024x1024";
|
||||
// "" = provider default.
|
||||
Size string
|
||||
}
|
||||
|
||||
// Result is the canonical image-generation result.
|
||||
type Result struct {
|
||||
// Images are the generated images, in the order the backend returned them.
|
||||
Images []Image
|
||||
|
||||
// Raw is the provider-native response object, an escape hatch for
|
||||
// provider-specific fields. May be nil; never required for normal use.
|
||||
Raw any
|
||||
}
|
||||
|
||||
// Option mutates a Request before it is sent. Options passed to Generate are
|
||||
// applied to a copy of the request, so a Request value can be reused.
|
||||
type Option func(*Request)
|
||||
|
||||
// WithN sets the number of images to generate.
|
||||
func WithN(n int) Option { return func(r *Request) { r.N = n } }
|
||||
|
||||
// WithSize sets the requested resolution (e.g. "1024x1024").
|
||||
func WithSize(size string) Option { return func(r *Request) { r.Size = size } }
|
||||
|
||||
// Apply returns a copy of the request with all options applied. Providers call
|
||||
// this once at the top of Generate.
|
||||
func (r Request) Apply(opts ...Option) Request {
|
||||
for _, opt := range opts {
|
||||
opt(&r)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Model generates images from a text prompt. It is intentionally narrower than
|
||||
// llm.Model — no Stream, no Capabilities, no tool calls.
|
||||
type Model interface {
|
||||
// Generate produces one or more images for the request's prompt.
|
||||
Generate(ctx context.Context, req Request, opts ...Option) (*Result, error)
|
||||
}
|
||||
|
||||
// ModelOption configures a Model at construction time (Provider.ImageModel).
|
||||
// Reserved for future per-model settings (e.g. a default size); present now so
|
||||
// the interface is forward-compatible.
|
||||
type ModelOption func(*ModelConfig)
|
||||
|
||||
// ModelConfig carries per-model construction settings.
|
||||
type ModelConfig struct{}
|
||||
|
||||
// ApplyModelOptions folds options into a config.
|
||||
func ApplyModelOptions(opts []ModelOption) ModelConfig {
|
||||
var cfg ModelConfig
|
||||
for _, opt := range opts {
|
||||
opt(&cfg)
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// Provider mints image Models bound to one backend. It mirrors llm.Provider
|
||||
// but for image generation.
|
||||
type Provider interface {
|
||||
// Name is the registry identifier for the provider.
|
||||
Name() string
|
||||
|
||||
// ImageModel returns a Model bound to the given id (passed through to the
|
||||
// backend verbatim; no catalog validation).
|
||||
ImageModel(id string, opts ...ModelOption) (Model, error)
|
||||
}
|
||||
Reference in New Issue
Block a user