Files
majordomo/media/media_test.go
steve 70b7aebd86
CI / Tidy (pull_request) Successful in 9m25s
CI / Build & Test (pull_request) Successful in 9m49s
test(media): match the overflow placeholder by const, not substring (gadfly #8)
ragnaros/qwen3.6-27b noted TestNormalizeOverCount matched 'omitted' by substring;
the test is in-package, so assert == imageOverflowPlaceholder instead — robust to
wording changes. No behavior change.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 18:33:01 -04:00

544 lines
17 KiB
Go

package media
import (
"bytes"
"errors"
"image"
"image/color"
"image/gif"
"image/jpeg"
"image/png"
"math/rand/v2"
"strings"
"testing"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// --- test image builders -------------------------------------------------
// gradient builds a smooth w x h RGBA image (compresses well).
func gradient(w, h int) *image.RGBA {
img := image.NewRGBA(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
img.SetRGBA(x, y, color.RGBA{
R: uint8(x * 255 / max(w-1, 1)),
G: uint8(y * 255 / max(h-1, 1)),
B: 128,
A: 255,
})
}
}
return img
}
// noisy builds a w x h image of deterministic random pixels (compresses
// terribly — ideal for exercising the byte-budget ladder).
func noisy(w, h int) *image.RGBA {
rng := rand.New(rand.NewPCG(1, 2))
img := image.NewRGBA(image.Rect(0, 0, w, h))
for i := range img.Pix {
img.Pix[i] = uint8(rng.UintN(256))
}
return img
}
func encPNG(t *testing.T, img image.Image) []byte {
t.Helper()
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
t.Fatalf("png encode: %v", err)
}
return buf.Bytes()
}
func encJPEG(t *testing.T, img image.Image) []byte {
t.Helper()
var buf bytes.Buffer
if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 90}); err != nil {
t.Fatalf("jpeg encode: %v", err)
}
return buf.Bytes()
}
func encGIF(t *testing.T, img image.Image) []byte {
t.Helper()
var buf bytes.Buffer
if err := gif.Encode(&buf, img, nil); err != nil {
t.Fatalf("gif encode: %v", err)
}
return buf.Bytes()
}
// webpBlob is a minimal byte sequence carrying the RIFF/WEBP signature.
// The stdlib cannot decode webp, so sniffing is all that ever reads it.
func webpBlob() []byte {
b := []byte("RIFF")
b = append(b, 0x1a, 0x00, 0x00, 0x00)
b = append(b, "WEBPVP8 "...)
b = append(b, make([]byte, 18)...)
return b
}
func imgReq(parts ...llm.Part) llm.Request {
return llm.Request{Messages: []llm.Message{llm.UserParts(parts...)}}
}
// firstImage returns the first image part in the request.
func firstImage(t *testing.T, req llm.Request) llm.ImagePart {
t.Helper()
for _, m := range req.Messages {
for _, p := range m.Parts {
if ip, ok := p.(llm.ImagePart); ok {
return ip
}
}
}
t.Fatal("no image part in request")
return llm.ImagePart{}
}
// --- fast paths -----------------------------------------------------------
func TestNormalizeFastPathNoImages(t *testing.T) {
req := llm.Request{Messages: []llm.Message{llm.UserText("hello")}}
got, err := Normalize(req, llm.Capabilities{}) // even a no-image target
if err != nil {
t.Fatalf("Normalize: %v", err)
}
if &got.Messages[0] != &req.Messages[0] {
t.Error("messages slice was copied on the no-image fast path")
}
if &got.Messages[0].Parts[0] != &req.Messages[0].Parts[0] {
t.Error("parts slice was copied on the no-image fast path")
}
}
func TestNormalizeFastPathFittingImages(t *testing.T) {
data := encPNG(t, gradient(20, 10))
req := imgReq(llm.Text("look:"), llm.Image("image/png", data))
caps := llm.Capabilities{
MaxImagesPerReq: 4,
MaxImageBytes: len(data) + 100,
MaxImageDimension: 64,
AllowedImageMIME: []string{"image/png"},
}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
if &got.Messages[0] != &req.Messages[0] {
t.Error("messages slice was copied although every image already fits")
}
if &got.Messages[0].Parts[1] != &req.Messages[0].Parts[1] {
t.Error("parts slice was copied although every image already fits")
}
}
// --- rejection paths ------------------------------------------------------
func TestNormalizeImagesUnsupported(t *testing.T) {
req := imgReq(llm.Image("image/png", encPNG(t, gradient(4, 4))))
_, err := Normalize(req, llm.Capabilities{MaxImagesPerReq: 0})
if !errors.Is(err, llm.ErrUnsupported) {
t.Fatalf("err = %v, want ErrUnsupported", err)
}
if !strings.Contains(err.Error(), "does not accept image input") {
t.Errorf("err message %q lacks explanation", err)
}
}
func TestNormalizeOverCount(t *testing.T) {
// 3 distinguishable images across 2 messages; cap = 2. Over-count no longer
// errors — the OLDEST image is replaced with a placeholder and the most-recent
// two (the relevant ones in an iterative run) are kept, in order.
a := llm.Image("image/png", encPNG(t, gradient(2, 2))).(llm.ImagePart)
b := llm.Image("image/png", encPNG(t, gradient(4, 4))).(llm.ImagePart)
c := llm.Image("image/png", encPNG(t, gradient(8, 8))).(llm.ImagePart)
req := llm.Request{Messages: []llm.Message{
llm.UserParts(a, b),
llm.UserParts(c),
}}
caps := llm.Capabilities{MaxImagesPerReq: 2, MaxImageDimension: 64, MaxImageBytes: 1 << 20, AllowedImageMIME: []string{"image/png"}}
out, err := Normalize(req, caps)
if err != nil {
t.Fatalf("over-count should not error: %v", err)
}
var imgs []llm.ImagePart
placeholders := 0
for _, m := range out.Messages {
for _, p := range m.Parts {
switch v := p.(type) {
case llm.ImagePart:
imgs = append(imgs, v)
case llm.TextPart:
if v.Text == imageOverflowPlaceholder {
placeholders++
}
}
}
}
// The exact survivors are the most-recent two, in order: b then c (a elided).
if len(imgs) != 2 || !bytes.Equal(imgs[0].Data, b.Data) || !bytes.Equal(imgs[1].Data, c.Data) {
t.Fatalf("kept %d images; want exactly [b, c] (the most-recent two)", len(imgs))
}
if placeholders != 1 {
t.Errorf("placeholders = %d, want 1 for the elided oldest image", placeholders)
}
// Input request untouched (copy-on-write): the first part is still image a,
// not a placeholder — a len check alone wouldn't catch in-place substitution.
first, ok := req.Messages[0].Parts[0].(llm.ImagePart)
if !ok || !bytes.Equal(first.Data, a.Data) {
t.Errorf("input request was mutated; first part = %+v", req.Messages[0].Parts[0])
}
}
func TestNormalizeGarbageBytes(t *testing.T) {
req := imgReq(llm.Image("image/png", []byte("certainly not an image")))
_, err := Normalize(req, llm.Capabilities{MaxImagesPerReq: 1})
if !errors.Is(err, llm.ErrUnsupported) {
t.Fatalf("err = %v, want ErrUnsupported", err)
}
if !strings.Contains(err.Error(), "no known format") {
t.Errorf("err message %q lacks a clear explanation", err)
}
}
// --- MIME sniffing & correction --------------------------------------------
func TestNormalizeMIMECorrection(t *testing.T) {
data := encPNG(t, gradient(8, 8))
req := imgReq(llm.Image("image/jpeg", data)) // caller lies: bytes are png
got, err := Normalize(req, llm.Capabilities{MaxImagesPerReq: 1})
if err != nil {
t.Fatalf("Normalize: %v", err)
}
ip := firstImage(t, got)
if ip.MIME != "image/png" {
t.Errorf("MIME = %q, want sniff-corrected %q", ip.MIME, "image/png")
}
if !bytes.Equal(ip.Data, data) {
t.Error("image bytes changed although only the MIME needed correcting")
}
if orig := firstImage(t, req); orig.MIME != "image/jpeg" {
t.Errorf("input request mutated: MIME now %q", orig.MIME)
}
}
func TestNormalizeCopyOnWrite(t *testing.T) {
data := encPNG(t, gradient(8, 8))
req := llm.Request{Messages: []llm.Message{
llm.UserText("untouched message"),
llm.UserParts(llm.Text("untouched part"), llm.Image("image/jpeg", data)),
}}
got, err := Normalize(req, llm.Capabilities{MaxImagesPerReq: 1})
if err != nil {
t.Fatalf("Normalize: %v", err)
}
if &got.Messages[0] == &req.Messages[0] {
t.Error("messages slice shared although a part changed (mutation hazard)")
}
if &got.Messages[0].Parts[0] != &req.Messages[0].Parts[0] {
t.Error("parts slice of the untouched message was copied")
}
if &got.Messages[1].Parts[0] == &req.Messages[1].Parts[0] {
t.Error("parts slice of the changed message is still shared (mutation hazard)")
}
}
// --- dimension capping ------------------------------------------------------
func TestNormalizeDownscale(t *testing.T) {
req := imgReq(llm.Image("image/png", encPNG(t, gradient(200, 100))))
caps := llm.Capabilities{MaxImagesPerReq: 1, MaxImageDimension: 50}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
format, w, h, err := Info(firstImage(t, got))
if err != nil {
t.Fatalf("Info: %v", err)
}
if format != "png" {
t.Errorf("format = %q, want original format %q preserved", format, "png")
}
if w != 50 || h != 25 {
t.Errorf("dimensions = %dx%d, want 50x25 (aspect preserved)", w, h)
}
}
func TestNormalizeDownscalePortrait(t *testing.T) {
req := imgReq(llm.Image("image/png", encPNG(t, gradient(100, 200))))
caps := llm.Capabilities{MaxImagesPerReq: 1, MaxImageDimension: 50}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
_, w, h, err := Info(firstImage(t, got))
if err != nil {
t.Fatalf("Info: %v", err)
}
if w != 25 || h != 50 {
t.Errorf("dimensions = %dx%d, want 25x50 (aspect preserved)", w, h)
}
}
// --- transcoding -------------------------------------------------------------
func TestNormalizeTranscode(t *testing.T) {
tests := []struct {
name string
data []byte
mime string
allowed []string
want string
}{
{"png to jpeg", encPNG(t, gradient(16, 16)), "image/png", []string{"image/jpeg"}, "jpeg"},
{"jpeg to png", encJPEG(t, gradient(16, 16)), "image/jpeg", []string{"image/png"}, "png"},
{"gif to png", encGIF(t, gradient(16, 16)), "image/gif", []string{"image/png"}, "png"},
{"png to gif fallback", encPNG(t, gradient(16, 16)), "image/png", []string{"image/gif"}, "gif"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := imgReq(llm.Image(tt.mime, tt.data))
caps := llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: tt.allowed}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
ip := firstImage(t, got)
if ip.MIME != "image/"+tt.want {
t.Errorf("MIME = %q, want %q", ip.MIME, "image/"+tt.want)
}
format, w, h, err := Info(ip)
if err != nil {
t.Fatalf("Info: %v", err)
}
if format != tt.want {
t.Errorf("sniffed format = %q, want %q", format, tt.want)
}
if w != 16 || h != 16 {
t.Errorf("dimensions = %dx%d, want 16x16 (no resize needed)", w, h)
}
})
}
}
func TestNormalizeNoEncodableAllowedType(t *testing.T) {
// png needs transcoding but the only allowed type is webp, which the
// stdlib cannot encode.
req := imgReq(llm.Image("image/png", encPNG(t, gradient(8, 8))))
caps := llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/webp"}}
_, err := Normalize(req, caps)
if !errors.Is(err, llm.ErrUnsupported) {
t.Fatalf("err = %v, want ErrUnsupported", err)
}
if !strings.Contains(err.Error(), "image/webp") {
t.Errorf("err message %q does not name the unencodable allowed types", err)
}
}
// --- byte budget ---------------------------------------------------------------
func TestNormalizeByteBudgetFits(t *testing.T) {
// Random noise defeats q85 jpeg at full size; the ladder must walk down
// quality and then resolution until the encoding fits.
req := imgReq(llm.Image("image/png", encPNG(t, noisy(256, 256))))
caps := llm.Capabilities{
MaxImagesPerReq: 1,
AllowedImageMIME: []string{"image/jpeg"},
MaxImageBytes: 8 * 1024,
}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
ip := firstImage(t, got)
if len(ip.Data) > caps.MaxImageBytes {
t.Errorf("len(Data) = %d, exceeds budget %d", len(ip.Data), caps.MaxImageBytes)
}
if format, _, _, err := Info(ip); err != nil || format != "jpeg" {
t.Errorf("Info = %q, %v; want jpeg, nil", format, err)
}
}
func TestNormalizeByteBudgetImpossible(t *testing.T) {
req := imgReq(llm.Image("image/png", encPNG(t, noisy(256, 256))))
caps := llm.Capabilities{
MaxImagesPerReq: 1,
AllowedImageMIME: []string{"image/jpeg"},
MaxImageBytes: 10, // no image fits in 10 bytes
}
_, err := Normalize(req, caps)
if !errors.Is(err, llm.ErrUnsupported) {
t.Fatalf("err = %v, want ErrUnsupported", err)
}
if !strings.Contains(err.Error(), "10-byte limit") {
t.Errorf("err message %q lacks the budget", err)
}
if !strings.Contains(err.Error(), "smallest achievable") {
t.Errorf("err message %q lacks the achieved size", err)
}
}
// --- webp ---------------------------------------------------------------------
func TestNormalizeWebPPassThrough(t *testing.T) {
data := webpBlob()
req := imgReq(llm.Image("image/webp", data))
caps := llm.Capabilities{
MaxImagesPerReq: 1,
MaxImageBytes: 1024,
MaxImageDimension: 50, // unverifiable for webp; must not force a transform
AllowedImageMIME: []string{"image/webp"},
}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
if &got.Messages[0] != &req.Messages[0] {
t.Error("request copied although the webp image passes through")
}
if ip := firstImage(t, got); !bytes.Equal(ip.Data, data) {
t.Error("webp bytes changed on pass-through")
}
}
func TestNormalizeWebPNeedsTransform(t *testing.T) {
req := imgReq(llm.Image("image/webp", webpBlob()))
caps := llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/jpeg"}}
_, err := Normalize(req, caps)
if !errors.Is(err, llm.ErrUnsupported) {
t.Fatalf("err = %v, want ErrUnsupported", err)
}
if !strings.Contains(err.Error(), "webp") {
t.Errorf("err message %q does not name the format", err)
}
if !strings.Contains(err.Error(), "jpeg, png, or gif") {
t.Errorf("err message %q does not say what to provide instead", err)
}
}
// --- input immutability ----------------------------------------------------------
func TestNormalizeInputNotMutated(t *testing.T) {
data := encPNG(t, gradient(200, 100))
snapshot := bytes.Clone(data)
req := llm.Request{
System: "sys",
Messages: []llm.Message{
llm.UserParts(llm.Text("scale me"), llm.Image("image/jpeg", data)),
},
}
caps := llm.Capabilities{
MaxImagesPerReq: 1,
MaxImageDimension: 50,
AllowedImageMIME: []string{"image/jpeg"},
}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
orig := firstImage(t, req)
if orig.MIME != "image/jpeg" {
t.Errorf("input MIME mutated to %q", orig.MIME)
}
if !bytes.Equal(orig.Data, snapshot) {
t.Error("input image bytes mutated")
}
if txt := req.Messages[0].Parts[0].(llm.TextPart); txt.Text != "scale me" {
t.Errorf("input text part mutated: %q", txt.Text)
}
if ip := firstImage(t, got); bytes.Equal(ip.Data, snapshot) {
t.Error("output image was expected to transform but is byte-identical")
}
}
// --- alpha handling ----------------------------------------------------------------
func TestNormalizeAlphaPNGToJPEG(t *testing.T) {
img := image.NewRGBA(image.Rect(0, 0, 32, 32))
for y := 0; y < 32; y++ {
for x := 0; x < 32; x++ {
img.SetRGBA(x, y, color.RGBA{R: 200, G: 60, B: 30, A: uint8(x * 8)})
}
}
req := imgReq(llm.Image("image/png", encPNG(t, img)))
caps := llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/jpeg"}}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
ip := firstImage(t, got)
decoded, err := jpeg.Decode(bytes.NewReader(ip.Data))
if err != nil {
t.Fatalf("decoding transcoded jpeg: %v", err)
}
if b := decoded.Bounds(); b.Dx() != 32 || b.Dy() != 32 {
t.Errorf("decoded dimensions = %dx%d, want 32x32", b.Dx(), b.Dy())
}
}
// --- Info ----------------------------------------------------------------------------
func TestInfo(t *testing.T) {
pngData := encPNG(t, gradient(10, 7))
jpegData := encJPEG(t, gradient(5, 9))
gifData := encGIF(t, gradient(6, 4))
tests := []struct {
name string
part llm.ImagePart
format string
w, h int
wantErr bool
}{
{"png", llm.ImagePart{MIME: "image/png", Data: pngData}, "png", 10, 7, false},
{"jpeg", llm.ImagePart{MIME: "image/jpeg", Data: jpegData}, "jpeg", 5, 9, false},
{"gif", llm.ImagePart{MIME: "image/gif", Data: gifData}, "gif", 6, 4, false},
{"mislabeled png", llm.ImagePart{MIME: "image/jpeg", Data: pngData}, "png", 10, 7, false},
{"webp", llm.ImagePart{MIME: "image/webp", Data: webpBlob()}, "webp", 0, 0, false},
{"garbage", llm.ImagePart{MIME: "image/png", Data: []byte("nope")}, "", 0, 0, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
format, w, h, err := Info(tt.part)
if tt.wantErr {
if err == nil {
t.Fatal("Info: expected error, got nil")
}
return
}
if err != nil {
t.Fatalf("Info: %v", err)
}
if format != tt.format || w != tt.w || h != tt.h {
t.Errorf("Info = %q, %d, %d; want %q, %d, %d", format, w, h, tt.format, tt.w, tt.h)
}
})
}
}
// --- byte-cap pass-through interplay ----------------------------------------------------
func TestNormalizeOversizeBytesTriggersTransform(t *testing.T) {
// A fitting MIME and dimension but an over-budget payload must re-encode,
// not pass through.
data := encPNG(t, noisy(64, 64))
req := imgReq(llm.Image("image/png", data))
caps := llm.Capabilities{
MaxImagesPerReq: 1,
MaxImageBytes: len(data) / 2,
AllowedImageMIME: []string{"image/png", "image/jpeg"},
}
got, err := Normalize(req, caps)
if err != nil {
t.Fatalf("Normalize: %v", err)
}
ip := firstImage(t, got)
if len(ip.Data) > caps.MaxImageBytes {
t.Errorf("len(Data) = %d, exceeds budget %d", len(ip.Data), caps.MaxImageBytes)
}
}