db8d455bd8
Add Steps, CFGScale, NegativePrompt, Sampler, Seed to imagegen.Request (pointer/empty = leave the backend's per-model default), with mirror options, and forward them in the llamaswap wire payload as the stable-diffusion.cpp fields (steps/cfg_scale/negative_prompt/ sample_method/seed). Unset fields are omitted so sd-server keeps its baked defaults. Lets callers (e.g. mort drawbots) override only what they explicitly set.
144 lines
5.2 KiB
Go
144 lines
5.2 KiB
Go
// Package imagegen is majordomo's canonical text-to-image surface. It is a
|
|
// deliberately separate contract from the llm package: image generation does
|
|
// not fit the chat Request/Response shape (no messages, tools, streaming, or
|
|
// failover chains in v1), so it gets its own small Provider/Model interface
|
|
// rather than overloading llm.Model.
|
|
//
|
|
// Generated images are carried as llm.ImagePart (bytes + MIME), so a result
|
|
// drops straight back into a chat turn:
|
|
//
|
|
// res, _ := im.Generate(ctx, imagegen.Request{Prompt: "a red bicycle"})
|
|
// msg := llm.UserParts(llm.Text("describe this"), res.Images[0])
|
|
//
|
|
// The first implementation is provider/llamaswap, which targets the OpenAI
|
|
// /v1/images/generations endpoint routed to a stable-diffusion.cpp backend.
|
|
package imagegen
|
|
|
|
import (
|
|
"context"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// Image is one generated image: raw bytes plus a MIME type. Aliased to
|
|
// llm.ImagePart so generated images are interchangeable with chat content and
|
|
// can be fed into llm.UserParts without conversion.
|
|
type Image = llm.ImagePart
|
|
|
|
// Request is a text-to-image generation request. Pointer-free zero values mean
|
|
// "provider default": N == 0 yields the backend's default count (usually one),
|
|
// and an empty Size leaves the backend's default resolution.
|
|
type Request struct {
|
|
// Prompt is the text description of the image to generate.
|
|
Prompt string
|
|
|
|
// N is the number of images to generate; 0 = provider default.
|
|
N int
|
|
|
|
// Size is the requested resolution, e.g. "512x512" or "1024x1024";
|
|
// "" = provider default.
|
|
Size string
|
|
|
|
// The fields below are optional per-request overrides. Their zero value
|
|
// (nil pointer or empty string) means "leave the backend's own default" —
|
|
// for stable-diffusion.cpp that is the per-model default baked into the
|
|
// llama-swap launch flags. A caller overrides only what it explicitly sets.
|
|
|
|
// Steps is the number of diffusion steps; nil = backend default.
|
|
Steps *int
|
|
|
|
// CFGScale is the classifier-free-guidance scale; nil = backend default.
|
|
// Architecture-sensitive (SDXL likes ~7, Flux wants 1), so prefer leaving
|
|
// it nil unless the caller knows the target model.
|
|
CFGScale *float64
|
|
|
|
// NegativePrompt steers generation away from concepts; "" = none.
|
|
NegativePrompt string
|
|
|
|
// Sampler selects the sampling method (e.g. "euler", "euler_a");
|
|
// "" = backend default.
|
|
Sampler string
|
|
|
|
// Seed fixes the RNG seed for reproducible output; nil = random.
|
|
Seed *int64
|
|
}
|
|
|
|
// Result is the canonical image-generation result.
|
|
type Result struct {
|
|
// Images are the generated images, in the order the backend returned them.
|
|
Images []Image
|
|
|
|
// Raw is the provider-native response object, an escape hatch for
|
|
// provider-specific fields. May be nil; never required for normal use.
|
|
Raw any
|
|
}
|
|
|
|
// Option mutates a Request before it is sent. Options passed to Generate are
|
|
// applied to a copy of the request, so a Request value can be reused.
|
|
type Option func(*Request)
|
|
|
|
// WithN sets the number of images to generate.
|
|
func WithN(n int) Option { return func(r *Request) { r.N = n } }
|
|
|
|
// WithSize sets the requested resolution (e.g. "1024x1024").
|
|
func WithSize(size string) Option { return func(r *Request) { r.Size = size } }
|
|
|
|
// WithSteps overrides the number of diffusion steps.
|
|
func WithSteps(n int) Option { return func(r *Request) { r.Steps = &n } }
|
|
|
|
// WithCFGScale overrides the classifier-free-guidance scale.
|
|
func WithCFGScale(s float64) Option { return func(r *Request) { r.CFGScale = &s } }
|
|
|
|
// WithNegativePrompt sets a negative prompt.
|
|
func WithNegativePrompt(s string) Option { return func(r *Request) { r.NegativePrompt = s } }
|
|
|
|
// WithSampler overrides the sampling method (e.g. "euler", "euler_a").
|
|
func WithSampler(s string) Option { return func(r *Request) { r.Sampler = s } }
|
|
|
|
// WithSeed fixes the RNG seed for reproducible output.
|
|
func WithSeed(seed int64) Option { return func(r *Request) { r.Seed = &seed } }
|
|
|
|
// Apply returns a copy of the request with all options applied. Providers call
|
|
// this once at the top of Generate.
|
|
func (r Request) Apply(opts ...Option) Request {
|
|
for _, opt := range opts {
|
|
opt(&r)
|
|
}
|
|
return r
|
|
}
|
|
|
|
// Model generates images from a text prompt. It is intentionally narrower than
|
|
// llm.Model — no Stream, no Capabilities, no tool calls.
|
|
type Model interface {
|
|
// Generate produces one or more images for the request's prompt.
|
|
Generate(ctx context.Context, req Request, opts ...Option) (*Result, error)
|
|
}
|
|
|
|
// ModelOption configures a Model at construction time (Provider.ImageModel).
|
|
// Reserved for future per-model settings (e.g. a default size); present now so
|
|
// the interface is forward-compatible.
|
|
type ModelOption func(*ModelConfig)
|
|
|
|
// ModelConfig carries per-model construction settings.
|
|
type ModelConfig struct{}
|
|
|
|
// ApplyModelOptions folds options into a config.
|
|
func ApplyModelOptions(opts []ModelOption) ModelConfig {
|
|
var cfg ModelConfig
|
|
for _, opt := range opts {
|
|
opt(&cfg)
|
|
}
|
|
return cfg
|
|
}
|
|
|
|
// Provider mints image Models bound to one backend. It mirrors llm.Provider
|
|
// but for image generation.
|
|
type Provider interface {
|
|
// Name is the registry identifier for the provider.
|
|
Name() string
|
|
|
|
// ImageModel returns a Model bound to the given id (passed through to the
|
|
// backend verbatim; no catalog validation).
|
|
ImageModel(id string, opts ...ModelOption) (Model, error)
|
|
}
|