fix(run): harden input-file staging per gadfly #18 validation pass
executus CI / test (pull_request) Successful in 48s
executus CI / test (pull_request) Successful in 48s
Second-pass findings on the security fix: - Mime sanitized ONCE and passed to BOTH StageInputFile and the descriptor (was passing raw f.MimeType to the host store while only the descriptor sanitized) — 3 models. - sanitizeField now also strips Unicode format chars (category Cf, incl. the bidi overrides U+202A–U+202E that can reorder how the descriptor renders); IsControl already covers \n\r\t so the explicit checks are dropped. - fileID is sanitized before inlining + an empty file_id drops the file (defense vs a misbehaving stager). - humanizeBytes clamps the prefix index so an absurd size (≥1024^6) can't index past "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap. - Docs sync: README Ports list gains InputFiles; tool.InputFile.Name doc now says the executor reduces an untrusted name to a safe base name (was claiming the field is already safe). Tests: bidi/control stripping; mime sanitized in staged value + descriptor; empty file_id drop; humanizeBytes no-panic across sizes up to 1<<62. Suite green (-race). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -37,7 +37,7 @@ bot) — mort and gadfly are the first two consumers (heavy and light). See
|
||||
tool registry, majordomo's agent loop, context compaction, run-bounding, and
|
||||
step/audit instrumentation into one `Run(ctx, RunnableAgent, inv) Result`, with
|
||||
every host concern behind a nil-safe `run.Ports` (Audit/Budget/Critic/
|
||||
Checkpointer/PaletteSource/Delivery). See `examples/minimal`.
|
||||
Checkpointer/PaletteSource/Delivery/InputFiles). See `examples/minimal`.
|
||||
- `model/` — config-driven tier resolution + failover over majordomo, with
|
||||
pluggable `UsageSink`/`TraceSink` and `GenerateWith[T]` structured output.
|
||||
- `tool/` — the tool registry + 3-stage permission model + SSRF guard.
|
||||
|
||||
+23
-8
@@ -68,13 +68,23 @@ func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, f
|
||||
// Then disambiguate colliding base names so two attachments don't both map
|
||||
// to /workspace/<name> (the second would clobber the first).
|
||||
name := uniqueName(sanitizeName(f.Name), seenNames)
|
||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
|
||||
// Sanitize the mime ONCE and pass the clean value to both the host store
|
||||
// and the descriptor (don't hand the raw value to StageInputFile).
|
||||
mime := sanitizeField(f.MimeType)
|
||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, mime, f.Data)
|
||||
if err != nil {
|
||||
slog.Warn("run: failed to stage input file",
|
||||
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
||||
continue
|
||||
}
|
||||
staged = append(staged, stagedFile{name: name, mime: sanitizeField(f.MimeType), fileID: fileID, size: len(f.Data)})
|
||||
if fileID == "" {
|
||||
slog.Warn("run: stager returned empty file_id, skipping",
|
||||
"agent", agentID, "run_id", runID, "name", name)
|
||||
continue
|
||||
}
|
||||
// fileID is host-generated, but sanitize it too before inlining — the
|
||||
// descriptor must never carry control chars no matter the stager impl.
|
||||
staged = append(staged, stagedFile{name: name, mime: mime, fileID: sanitizeField(fileID), size: len(f.Data)})
|
||||
}
|
||||
if len(staged) == 0 {
|
||||
return prompt
|
||||
@@ -113,12 +123,14 @@ func sanitizeName(name string) string {
|
||||
return base
|
||||
}
|
||||
|
||||
// sanitizeField strips control characters (incl. newlines/tabs) from a value
|
||||
// that gets inlined verbatim into the prompt descriptor, so it can't break out
|
||||
// of its line or inject instructions.
|
||||
// sanitizeField strips characters that could let a value inlined verbatim into
|
||||
// the prompt descriptor break out of its line or visually mislead: control
|
||||
// characters (IsControl covers newlines/tabs) AND Unicode format characters
|
||||
// (category Cf — e.g. the bidi overrides U+202A–U+202E, which can reorder how
|
||||
// the descriptor renders).
|
||||
func sanitizeField(s string) string {
|
||||
return strings.Map(func(r rune) rune {
|
||||
if r == '\n' || r == '\r' || r == '\t' || unicode.IsControl(r) {
|
||||
if unicode.IsControl(r) || unicode.Is(unicode.Cf, r) {
|
||||
return -1
|
||||
}
|
||||
return r
|
||||
@@ -155,10 +167,13 @@ func humanizeBytes(n int) string {
|
||||
if n < unit {
|
||||
return fmt.Sprintf("%d B", n)
|
||||
}
|
||||
const prefixes = "KMGTPE"
|
||||
div, exp := int64(unit), 0
|
||||
for v := int64(n) / unit; v >= unit; v /= unit {
|
||||
// Clamp exp to the last prefix so an absurd size (≥1024^7) can't index past
|
||||
// "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap.
|
||||
for v := int64(n) / unit; v >= unit && exp < len(prefixes)-1; v /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
|
||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), prefixes[exp])
|
||||
}
|
||||
|
||||
@@ -186,3 +186,58 @@ func TestStageInputFilesSanitizesTraversal(t *testing.T) {
|
||||
t.Errorf("descriptor leaked the traversal path:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSanitizeFieldStripsBidiAndControl: control chars AND Unicode format/bidi
|
||||
// overrides are removed from inlined values.
|
||||
func TestSanitizeFieldStripsBidiAndControl(t *testing.T) {
|
||||
in := "audio/mpg\n; rm -rf" // bidi override + newline
|
||||
got := sanitizeField(in)
|
||||
if strings.ContainsAny(got, "\n\r\t") || strings.ContainsRune(got, '') {
|
||||
t.Errorf("sanitizeField left control/bidi chars: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesSanitizesMime: a mime with a control char is cleaned in BOTH
|
||||
// the staged value and the descriptor.
|
||||
func TestStageInputFilesSanitizesMime(t *testing.T) {
|
||||
st := &stagerFunc{}
|
||||
ex := newStagerExecutor(st)
|
||||
out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||
[]tool.InputFile{{Name: "c.wav", MimeType: "audio/wav\ninjected", Data: []byte("x")}}, "go")
|
||||
if len(st.staged) != 1 || strings.ContainsAny(st.staged[0].mime, "\n\r") {
|
||||
t.Errorf("mime not sanitized before staging: %+v", st.staged)
|
||||
}
|
||||
if strings.Contains(out, "\ninjected") {
|
||||
t.Errorf("descriptor carried an unsanitized mime newline:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesEmptyFileID: a stager returning an empty file_id drops the
|
||||
// file (no blank file_id in the descriptor).
|
||||
func TestStageInputFilesEmptyFileID(t *testing.T) {
|
||||
ex := newStagerExecutor(emptyIDStager{})
|
||||
out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||
[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "p")
|
||||
if out != "p" {
|
||||
t.Errorf("empty file_id should drop the file, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
type emptyIDStager struct{}
|
||||
|
||||
func (emptyIDStager) StageInputFile(context.Context, string, string, string, string, []byte) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// TestHumanizeBytesNoPanic: an absurd size clamps to the last prefix instead of
|
||||
// indexing past "KMGTPE".
|
||||
func TestHumanizeBytesNoPanic(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("humanizeBytes panicked: %v", r)
|
||||
}
|
||||
}()
|
||||
for _, n := range []int{0, 512, 2048, 5_000_000, 1 << 62} {
|
||||
_ = humanizeBytes(n)
|
||||
}
|
||||
}
|
||||
|
||||
+4
-3
@@ -154,9 +154,10 @@ type ContinuationContext struct {
|
||||
|
||||
// InputFile is a non-image file the user supplied with a run (audio,
|
||||
// etc.). The executor stages it into the file store under run scope and
|
||||
// surfaces its file_id to the agent. Name is a safe base name (no path
|
||||
// separators) suitable for /workspace/<name>; MimeType is the resolved
|
||||
// content type; Data is the raw bytes.
|
||||
// surfaces its file_id to the agent. Name may be an untrusted attachment
|
||||
// filename — the executor reduces it to a safe base name (stripping path
|
||||
// separators + control chars) before staging or exposing it as
|
||||
// /workspace/<name>; MimeType is the resolved content type; Data is the raw bytes.
|
||||
type InputFile struct {
|
||||
Name string
|
||||
MimeType string
|
||||
|
||||
Reference in New Issue
Block a user