Files
majordomo/provider/llamaswap/image.go
T
steve a744cdc335
CI / Tidy (pull_request) Successful in 9m27s
CI / Build & Test (pull_request) Successful in 9m47s
feat(imagegen): optional per-request generation settings
Add Steps, CFGScale, NegativePrompt, Sampler, Seed to imagegen.Request
(pointer/empty = leave the backend's per-model default), with mirror
options, and forward them in the llamaswap wire payload as the
stable-diffusion.cpp fields (steps/cfg_scale/negative_prompt/
sample_method/seed). Unset fields are omitted so sd-server keeps its
baked defaults.

Lets callers (e.g. mort drawbots) override only what they explicitly set.
2026-06-28 19:05:49 -04:00

122 lines
4.0 KiB
Go

package llamaswap
import (
"context"
"encoding/base64"
"fmt"
"net/http"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/imagegen"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// ImageModel implements imagegen.Provider, binding an image-generation model
// served by llama-swap (routed to a stable-diffusion.cpp upstream). The id is
// passed through verbatim and selects which upstream llama-swap loads.
func (p *Provider) ImageModel(id string, opts ...imagegen.ModelOption) (imagegen.Model, error) {
if p.baseURL == "" {
return nil, fmt.Errorf("llama-swap provider %q: no base URL configured (set one via WithBaseURL or an LLM_* env DSN)", p.name)
}
_ = imagegen.ApplyModelOptions(opts)
return &imageModel{p: p, id: id}, nil
}
type imageModel struct {
p *Provider
id string
}
// imageRequest is the OpenAI /v1/images/generations request shape, plus the
// stable-diffusion.cpp extras llama-swap forwards to sd-server. We always
// request b64_json so the bytes come back inline (no second fetch). The
// optional fields are pointers/omitempty so an unset value is omitted entirely
// and sd-server falls back to the model's own default (a field name a given
// sd-server build doesn't recognize is simply ignored — harmless).
type imageRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
N int `json:"n,omitempty"`
Size string `json:"size,omitempty"`
ResponseFormat string `json:"response_format"`
Steps *int `json:"steps,omitempty"`
CFGScale *float64 `json:"cfg_scale,omitempty"`
NegativePrompt string `json:"negative_prompt,omitempty"`
SampleMethod string `json:"sample_method,omitempty"`
Seed *int64 `json:"seed,omitempty"`
}
type imageResponse struct {
Created int64 `json:"created"`
Data []struct {
B64JSON string `json:"b64_json"`
URL string `json:"url"`
} `json:"data"`
}
// Generate implements imagegen.Model via POST {base}/v1/images/generations.
func (m *imageModel) Generate(ctx context.Context, req imagegen.Request, opts ...imagegen.Option) (*imagegen.Result, error) {
req = req.Apply(opts...)
if strings.TrimSpace(req.Prompt) == "" {
return nil, fmt.Errorf("%w: image generation requires a prompt", llm.ErrUnsupported)
}
if req.N < 0 {
return nil, fmt.Errorf("%w: image count N must be >= 0, got %d", llm.ErrUnsupported, req.N)
}
wire := imageRequest{
Model: m.id,
Prompt: req.Prompt,
N: req.N,
Size: req.Size,
ResponseFormat: "b64_json",
Steps: req.Steps,
CFGScale: req.CFGScale,
NegativePrompt: req.NegativePrompt,
SampleMethod: req.Sampler,
Seed: req.Seed,
}
var resp imageResponse
if err := m.p.doJSON(ctx, http.MethodPost, "/v1/images/generations", m.id, &wire, &resp); err != nil {
return nil, err
}
out := &imagegen.Result{Raw: &resp}
for i, d := range resp.Data {
if d.B64JSON == "" {
// Why error rather than skip: a url-only entry means the backend
// ignored response_format; we don't fetch remote content (mirrors
// llm.ImagePart's bytes-only contract), so surface it.
return nil, &llm.APIError{
Provider: m.p.name,
Model: m.id,
Message: fmt.Sprintf("image %d returned no inline b64_json data", i),
}
}
raw, err := base64.StdEncoding.DecodeString(d.B64JSON)
if err != nil {
return nil, fmt.Errorf("llama-swap: decode image %d: %w", i, err)
}
out.Images = append(out.Images, llm.ImagePart{MIME: sniffImageMIME(raw), Data: raw})
}
if len(out.Images) == 0 {
return nil, &llm.APIError{
Provider: m.p.name,
Model: m.id,
Message: "image response contained no images",
}
}
return out, nil
}
// sniffImageMIME identifies the image format from its leading bytes, defaulting
// to image/png (stable-diffusion.cpp emits PNG) when detection is inconclusive.
func sniffImageMIME(data []byte) string {
mime := http.DetectContentType(data)
if !strings.HasPrefix(mime, "image/") {
return "image/png"
}
return mime
}