// Package imagegen is majordomo's canonical text-to-image surface. It is a // deliberately separate contract from the llm package: image generation does // not fit the chat Request/Response shape (no messages, tools, streaming, or // failover chains in v1), so it gets its own small Provider/Model interface // rather than overloading llm.Model. // // Generated images are carried as llm.ImagePart (bytes + MIME), so a result // drops straight back into a chat turn: // // res, _ := im.Generate(ctx, imagegen.Request{Prompt: "a red bicycle"}) // msg := llm.UserParts(llm.Text("describe this"), res.Images[0]) // // The first implementation is provider/llamaswap, which targets the OpenAI // /v1/images/generations endpoint routed to a stable-diffusion.cpp backend. package imagegen import ( "context" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // Image is one generated image: raw bytes plus a MIME type. Aliased to // llm.ImagePart so generated images are interchangeable with chat content and // can be fed into llm.UserParts without conversion. type Image = llm.ImagePart // Request is a text-to-image generation request. Pointer-free zero values mean // "provider default": N == 0 yields the backend's default count (usually one), // and an empty Size leaves the backend's default resolution. type Request struct { // Prompt is the text description of the image to generate. Prompt string // N is the number of images to generate; 0 = provider default. N int // Size is the requested resolution, e.g. "512x512" or "1024x1024"; // "" = provider default. Size string // The fields below are optional per-request overrides. Their zero value // (nil pointer or empty string) means "leave the backend's own default" — // for stable-diffusion.cpp that is the per-model default baked into the // llama-swap launch flags. A caller overrides only what it explicitly sets. // Steps is the number of diffusion steps; nil = backend default. Steps *int // CFGScale is the classifier-free-guidance scale; nil = backend default. // Architecture-sensitive (SDXL likes ~7, Flux wants 1), so prefer leaving // it nil unless the caller knows the target model. CFGScale *float64 // NegativePrompt steers generation away from concepts; "" = none. NegativePrompt string // Sampler selects the sampling method (e.g. "euler", "euler_a"); // "" = backend default. Sampler string // Seed fixes the RNG seed for reproducible output; nil = random. Seed *int64 } // Result is the canonical image-generation result. type Result struct { // Images are the generated images, in the order the backend returned them. Images []Image // Raw is the provider-native response object, an escape hatch for // provider-specific fields. May be nil; never required for normal use. Raw any } // Option mutates a Request before it is sent. Options passed to Generate are // applied to a copy of the request, so a Request value can be reused. type Option func(*Request) // WithN sets the number of images to generate. func WithN(n int) Option { return func(r *Request) { r.N = n } } // WithSize sets the requested resolution (e.g. "1024x1024"). func WithSize(size string) Option { return func(r *Request) { r.Size = size } } // WithSteps overrides the number of diffusion steps. func WithSteps(n int) Option { return func(r *Request) { r.Steps = &n } } // WithCFGScale overrides the classifier-free-guidance scale. func WithCFGScale(s float64) Option { return func(r *Request) { r.CFGScale = &s } } // WithNegativePrompt sets a negative prompt. func WithNegativePrompt(s string) Option { return func(r *Request) { r.NegativePrompt = s } } // WithSampler overrides the sampling method (e.g. "euler", "euler_a"). func WithSampler(s string) Option { return func(r *Request) { r.Sampler = s } } // WithSeed fixes the RNG seed for reproducible output. func WithSeed(seed int64) Option { return func(r *Request) { r.Seed = &seed } } // Apply returns a copy of the request with all options applied. Providers call // this once at the top of Generate. func (r Request) Apply(opts ...Option) Request { for _, opt := range opts { opt(&r) } return r } // Model generates images from a text prompt. It is intentionally narrower than // llm.Model — no Stream, no Capabilities, no tool calls. type Model interface { // Generate produces one or more images for the request's prompt. Generate(ctx context.Context, req Request, opts ...Option) (*Result, error) } // ModelOption configures a Model at construction time (Provider.ImageModel). // Reserved for future per-model settings (e.g. a default size); present now so // the interface is forward-compatible. type ModelOption func(*ModelConfig) // ModelConfig carries per-model construction settings. type ModelConfig struct{} // ApplyModelOptions folds options into a config. func ApplyModelOptions(opts []ModelOption) ModelConfig { var cfg ModelConfig for _, opt := range opts { opt(&cfg) } return cfg } // Provider mints image Models bound to one backend. It mirrors llm.Provider // but for image generation. type Provider interface { // Name is the registry identifier for the provider. Name() string // ImageModel returns a Model bound to the given id (passed through to the // backend verbatim; no catalog validation). ImageModel(id string, opts ...ModelOption) (Model, error) }