From e0adc406619e10b41db239f34dac00cdf0410adb Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Mon, 21 Jul 2025 22:53:11 -0400 Subject: [PATCH] fix junie's bad idea --- anthropic.go | 42 +++---------- go.mod | 5 +- go.sum | 10 ++-- utils/compress_image.go | 129 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 41 deletions(-) create mode 100644 utils/compress_image.go diff --git a/anthropic.go b/anthropic.go index 9e6df9f..faaa164 100644 --- a/anthropic.go +++ b/anthropic.go @@ -1,20 +1,17 @@ package go_llm import ( - "bytes" "context" "encoding/base64" "encoding/json" "fmt" - "image" - "image/gif" - "image/jpeg" - "image/png" "io" "log" "log/slog" "net/http" + "gitea.stevedudenhoeffer.com/steve/go-llm/utils" + anth "github.com/liushuangls/go-anthropic/v2" ) @@ -83,7 +80,6 @@ func (a anthropic) requestToAnthropicRequest(req Request) anth.MessagesRequest { } if img.Base64 != "" { - // Anthropic models expect images to be < 5MiB in size raw, err := base64.StdEncoding.DecodeString(img.Base64) @@ -93,39 +89,15 @@ func (a anthropic) requestToAnthropicRequest(req Request) anth.MessagesRequest { // Check if image size exceeds 5MiB (5242880 bytes) if len(raw) >= 5242880 { - // Decode the image - imgData, format, err := image.Decode(bytes.NewReader(raw)) + + compressed, err := utils.CompressImage(img.Base64, 5*1024*1024) + + // just replace the image with the compressed one if err != nil { - log.Println("failed to decode image", err) continue } - var buf bytes.Buffer - - switch format { - case "jpeg", "jpg": - err = jpeg.Encode(&buf, imgData, &jpeg.Options{Quality: 60}) - case "png": - // For PNG, use a higher compression level - enc := &png.Encoder{ - CompressionLevel: png.BestCompression, - } - err = enc.Encode(&buf, imgData) - case "gif": - err = gif.Encode(&buf, imgData, &gif.Options{ - NumColors: 128, - }) - default: - continue - } - - if err != nil { - log.Println("failed to encode image", err) - continue - } - - // Update the base64 string - img.Base64 = base64.StdEncoding.EncodeToString(buf.Bytes()) + img.Base64 = compressed } m.Content = append(m.Content, anth.NewImageMessageContent( diff --git a/go.mod b/go.mod index e9596d7..987fed4 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/google/generative-ai-go v0.19.0 github.com/liushuangls/go-anthropic/v2 v2.15.0 github.com/openai/openai-go v0.1.0-beta.9 + golang.org/x/image v0.29.0 google.golang.org/api v0.228.0 ) @@ -36,9 +37,9 @@ require ( golang.org/x/crypto v0.37.0 // indirect golang.org/x/net v0.39.0 // indirect golang.org/x/oauth2 v0.29.0 // indirect - golang.org/x/sync v0.13.0 // indirect + golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.32.0 // indirect - golang.org/x/text v0.24.0 // indirect + golang.org/x/text v0.27.0 // indirect golang.org/x/time v0.11.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250409194420-de1ac958c67a // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250409194420-de1ac958c67a // indirect diff --git a/go.sum b/go.sum index 86c95a7..4434996 100644 --- a/go.sum +++ b/go.sum @@ -69,16 +69,18 @@ go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/image v0.29.0 h1:HcdsyR4Gsuys/Axh0rDEmlBmB68rW1U9BUdB3UVHsas= +golang.org/x/image v0.29.0/go.mod h1:RVJROnf3SLK8d26OW91j4FrIHGbsJ8QnbEocVTOWQDA= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= -golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= -golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= +golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= -golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= google.golang.org/api v0.228.0 h1:X2DJ/uoWGnY5obVjewbp8icSL5U4FzuCfy9OjbLSnLs= diff --git a/utils/compress_image.go b/utils/compress_image.go new file mode 100644 index 0000000..b42b14c --- /dev/null +++ b/utils/compress_image.go @@ -0,0 +1,129 @@ +package utils + +import ( + "bytes" + "encoding/base64" + "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" + "net/http" + + "golang.org/x/image/draw" +) + +// CompressImage takes a base‑64‑encoded image (JPEG, PNG or GIF) and returns +// a base‑64‑encoded version that is at most maxLength in size, or an error. +func CompressImage(b64 string, maxLength int) (string, error) { + raw, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return "", fmt.Errorf("base64 decode: %w", err) + } + if len(raw) <= maxLength { + return b64, nil // small enough already + } + + switch mime := http.DetectContentType(raw); mime { + case "image/gif": + return compressGIF(raw, maxLength) + default: // jpeg, png, webp, etc. → treat as raster + return compressRaster(raw, maxLength) + } +} + +// ---------- Raster path (jpeg / png / single‑frame gif) ---------- + +func compressRaster(src []byte, maxLength int) (string, error) { + img, _, err := image.Decode(bytes.NewReader(src)) + if err != nil { + return "", fmt.Errorf("decode raster: %w", err) + } + + quality := 95 + for { + var buf bytes.Buffer + if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: quality}); err != nil { + return "", fmt.Errorf("jpeg encode: %w", err) + } + if buf.Len() <= maxLength { + return base64.StdEncoding.EncodeToString(buf.Bytes()), nil + } + + if quality > 20 { + quality -= 5 + continue + } + + // down‑scale 80% + b := img.Bounds() + if b.Dx() < 100 || b.Dy() < 100 { + return "", fmt.Errorf("cannot compress below %.02fMiB without destroying image", float64(maxLength)/1048576.0) + } + dst := image.NewRGBA(image.Rect(0, 0, int(float64(b.Dx())*0.8), int(float64(b.Dy())*0.8))) + draw.ApproxBiLinear.Scale(dst, dst.Bounds(), img, b, draw.Over, nil) + img = dst + quality = 95 // restart ladder + } +} + +// ---------- Animated GIF path ---------- + +func compressGIF(src []byte, maxLength int) (string, error) { + g, err := gif.DecodeAll(bytes.NewReader(src)) + if err != nil { + return "", fmt.Errorf("gif decode: %w", err) + } + + for { + var buf bytes.Buffer + if err := gif.EncodeAll(&buf, g); err != nil { + return "", fmt.Errorf("gif encode: %w", err) + } + if buf.Len() <= maxLength { + return base64.StdEncoding.EncodeToString(buf.Bytes()), nil + } + + // down‑scale every frame by 80% + w, h := g.Config.Width, g.Config.Height + if w < 100 || h < 100 { + return "", fmt.Errorf("cannot compress animated GIF below 5 MiB without excessive quality loss") + } + + nw, nh := int(float64(w)*0.8), int(float64(h)*0.8) + for i, frm := range g.Image { + // convert paletted frame → RGBA for scaling + rgba := image.NewRGBA(frm.Bounds()) + draw.Draw(rgba, rgba.Bounds(), frm, frm.Bounds().Min, draw.Src) + + // scaled destination + dst := image.NewRGBA(image.Rect(0, 0, nw, nh)) + draw.ApproxBiLinear.Scale(dst, dst.Bounds(), rgba, rgba.Bounds(), draw.Over, nil) + + // quantize back to paletted using default encoder quantizer + paletted := image.NewPaletted(dst.Bounds(), nil) + draw.FloydSteinberg.Draw(paletted, paletted.Bounds(), dst, dst.Bounds().Min) + + g.Image[i] = paletted + } + g.Config.Width, g.Config.Height = nw, nh + // loop back and test size again … + } +} + +// ---------- Helpers for precise MIME decodes (optional) ---------- + +func decode(r *bytes.Reader, mime string) (image.Image, error) { + switch mime { + case "image/jpeg": + return jpeg.Decode(r) + case "image/png": + return png.Decode(r) + case "image/gif": + // for single‑frame GIFs only – call DecodeAll in caller if animated + return gif.Decode(r) + default: + i, _, err := image.Decode(r) + return i, err // let the stdlib guess the format + } +}