feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
package media
|
||||
|
||||
import "image"
|
||||
|
||||
// fitDims scales (w, h) so the longer side equals limit, preserving aspect
|
||||
// ratio with round-half-up on the shorter side, floored at 1 pixel.
|
||||
func fitDims(w, h, limit int) (int, int) {
|
||||
if w >= h {
|
||||
return limit, max((h*limit+w/2)/w, 1)
|
||||
}
|
||||
return max((w*limit+h/2)/h, 1), limit
|
||||
}
|
||||
|
||||
// downscale resizes src to dw x dh using area averaging (a box filter): each
|
||||
// destination pixel is the mean of its corresponding source region.
|
||||
//
|
||||
// Why hand-rolled: the stdlib has no scaler and ADR-0007 bars
|
||||
// golang.org/x/image without a new ADR. Area averaging is dependency-free,
|
||||
// alias-resistant when shrinking (every source pixel contributes exactly
|
||||
// once), and entirely adequate quality for vision-model input. It is only
|
||||
// ever called to shrink — Normalize never upscales.
|
||||
func downscale(src image.Image, dw, dh int) *image.RGBA {
|
||||
b := src.Bounds()
|
||||
sw, sh := b.Dx(), b.Dy()
|
||||
dst := image.NewRGBA(image.Rect(0, 0, dw, dh))
|
||||
for dy := 0; dy < dh; dy++ {
|
||||
// Integer box edges: destination pixel dy covers source rows
|
||||
// [dy*sh/dh, (dy+1)*sh/dh), widened to at least one row.
|
||||
sy0 := dy * sh / dh
|
||||
sy1 := max((dy+1)*sh/dh, sy0+1)
|
||||
for dx := 0; dx < dw; dx++ {
|
||||
sx0 := dx * sw / dw
|
||||
sx1 := max((dx+1)*sw/dw, sx0+1)
|
||||
var r, g, bl, a uint64
|
||||
for sy := sy0; sy < sy1; sy++ {
|
||||
for sx := sx0; sx < sx1; sx++ {
|
||||
pr, pg, pb, pa := src.At(b.Min.X+sx, b.Min.Y+sy).RGBA()
|
||||
r += uint64(pr)
|
||||
g += uint64(pg)
|
||||
bl += uint64(pb)
|
||||
a += uint64(pa)
|
||||
}
|
||||
}
|
||||
n := uint64((sy1 - sy0) * (sx1 - sx0))
|
||||
i := dst.PixOffset(dx, dy)
|
||||
// RGBA() returns 16-bit channels; average, then drop to 8 bits.
|
||||
dst.Pix[i+0] = uint8(r / n >> 8)
|
||||
dst.Pix[i+1] = uint8(g / n >> 8)
|
||||
dst.Pix[i+2] = uint8(bl / n >> 8)
|
||||
dst.Pix[i+3] = uint8(a / n >> 8)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
Reference in New Issue
Block a user