fix(run): harden input-file staging per gadfly #18 validation pass
executus CI / test (pull_request) Successful in 48s
executus CI / test (pull_request) Successful in 48s
Second-pass findings on the security fix: - Mime sanitized ONCE and passed to BOTH StageInputFile and the descriptor (was passing raw f.MimeType to the host store while only the descriptor sanitized) — 3 models. - sanitizeField now also strips Unicode format chars (category Cf, incl. the bidi overrides U+202A–U+202E that can reorder how the descriptor renders); IsControl already covers \n\r\t so the explicit checks are dropped. - fileID is sanitized before inlining + an empty file_id drops the file (defense vs a misbehaving stager). - humanizeBytes clamps the prefix index so an absurd size (≥1024^6) can't index past "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap. - Docs sync: README Ports list gains InputFiles; tool.InputFile.Name doc now says the executor reduces an untrusted name to a safe base name (was claiming the field is already safe). Tests: bidi/control stripping; mime sanitized in staged value + descriptor; empty file_id drop; humanizeBytes no-panic across sizes up to 1<<62. Suite green (-race). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -37,7 +37,7 @@ bot) — mort and gadfly are the first two consumers (heavy and light). See
|
|||||||
tool registry, majordomo's agent loop, context compaction, run-bounding, and
|
tool registry, majordomo's agent loop, context compaction, run-bounding, and
|
||||||
step/audit instrumentation into one `Run(ctx, RunnableAgent, inv) Result`, with
|
step/audit instrumentation into one `Run(ctx, RunnableAgent, inv) Result`, with
|
||||||
every host concern behind a nil-safe `run.Ports` (Audit/Budget/Critic/
|
every host concern behind a nil-safe `run.Ports` (Audit/Budget/Critic/
|
||||||
Checkpointer/PaletteSource/Delivery). See `examples/minimal`.
|
Checkpointer/PaletteSource/Delivery/InputFiles). See `examples/minimal`.
|
||||||
- `model/` — config-driven tier resolution + failover over majordomo, with
|
- `model/` — config-driven tier resolution + failover over majordomo, with
|
||||||
pluggable `UsageSink`/`TraceSink` and `GenerateWith[T]` structured output.
|
pluggable `UsageSink`/`TraceSink` and `GenerateWith[T]` structured output.
|
||||||
- `tool/` — the tool registry + 3-stage permission model + SSRF guard.
|
- `tool/` — the tool registry + 3-stage permission model + SSRF guard.
|
||||||
|
|||||||
+23
-8
@@ -68,13 +68,23 @@ func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, f
|
|||||||
// Then disambiguate colliding base names so two attachments don't both map
|
// Then disambiguate colliding base names so two attachments don't both map
|
||||||
// to /workspace/<name> (the second would clobber the first).
|
// to /workspace/<name> (the second would clobber the first).
|
||||||
name := uniqueName(sanitizeName(f.Name), seenNames)
|
name := uniqueName(sanitizeName(f.Name), seenNames)
|
||||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
|
// Sanitize the mime ONCE and pass the clean value to both the host store
|
||||||
|
// and the descriptor (don't hand the raw value to StageInputFile).
|
||||||
|
mime := sanitizeField(f.MimeType)
|
||||||
|
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, mime, f.Data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("run: failed to stage input file",
|
slog.Warn("run: failed to stage input file",
|
||||||
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
staged = append(staged, stagedFile{name: name, mime: sanitizeField(f.MimeType), fileID: fileID, size: len(f.Data)})
|
if fileID == "" {
|
||||||
|
slog.Warn("run: stager returned empty file_id, skipping",
|
||||||
|
"agent", agentID, "run_id", runID, "name", name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// fileID is host-generated, but sanitize it too before inlining — the
|
||||||
|
// descriptor must never carry control chars no matter the stager impl.
|
||||||
|
staged = append(staged, stagedFile{name: name, mime: mime, fileID: sanitizeField(fileID), size: len(f.Data)})
|
||||||
}
|
}
|
||||||
if len(staged) == 0 {
|
if len(staged) == 0 {
|
||||||
return prompt
|
return prompt
|
||||||
@@ -113,12 +123,14 @@ func sanitizeName(name string) string {
|
|||||||
return base
|
return base
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitizeField strips control characters (incl. newlines/tabs) from a value
|
// sanitizeField strips characters that could let a value inlined verbatim into
|
||||||
// that gets inlined verbatim into the prompt descriptor, so it can't break out
|
// the prompt descriptor break out of its line or visually mislead: control
|
||||||
// of its line or inject instructions.
|
// characters (IsControl covers newlines/tabs) AND Unicode format characters
|
||||||
|
// (category Cf — e.g. the bidi overrides U+202A–U+202E, which can reorder how
|
||||||
|
// the descriptor renders).
|
||||||
func sanitizeField(s string) string {
|
func sanitizeField(s string) string {
|
||||||
return strings.Map(func(r rune) rune {
|
return strings.Map(func(r rune) rune {
|
||||||
if r == '\n' || r == '\r' || r == '\t' || unicode.IsControl(r) {
|
if unicode.IsControl(r) || unicode.Is(unicode.Cf, r) {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
return r
|
return r
|
||||||
@@ -155,10 +167,13 @@ func humanizeBytes(n int) string {
|
|||||||
if n < unit {
|
if n < unit {
|
||||||
return fmt.Sprintf("%d B", n)
|
return fmt.Sprintf("%d B", n)
|
||||||
}
|
}
|
||||||
|
const prefixes = "KMGTPE"
|
||||||
div, exp := int64(unit), 0
|
div, exp := int64(unit), 0
|
||||||
for v := int64(n) / unit; v >= unit; v /= unit {
|
// Clamp exp to the last prefix so an absurd size (≥1024^7) can't index past
|
||||||
|
// "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap.
|
||||||
|
for v := int64(n) / unit; v >= unit && exp < len(prefixes)-1; v /= unit {
|
||||||
div *= unit
|
div *= unit
|
||||||
exp++
|
exp++
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
|
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), prefixes[exp])
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -186,3 +186,58 @@ func TestStageInputFilesSanitizesTraversal(t *testing.T) {
|
|||||||
t.Errorf("descriptor leaked the traversal path:\n%s", out)
|
t.Errorf("descriptor leaked the traversal path:\n%s", out)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestSanitizeFieldStripsBidiAndControl: control chars AND Unicode format/bidi
|
||||||
|
// overrides are removed from inlined values.
|
||||||
|
func TestSanitizeFieldStripsBidiAndControl(t *testing.T) {
|
||||||
|
in := "audio/mpg\n; rm -rf" // bidi override + newline
|
||||||
|
got := sanitizeField(in)
|
||||||
|
if strings.ContainsAny(got, "\n\r\t") || strings.ContainsRune(got, '') {
|
||||||
|
t.Errorf("sanitizeField left control/bidi chars: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStageInputFilesSanitizesMime: a mime with a control char is cleaned in BOTH
|
||||||
|
// the staged value and the descriptor.
|
||||||
|
func TestStageInputFilesSanitizesMime(t *testing.T) {
|
||||||
|
st := &stagerFunc{}
|
||||||
|
ex := newStagerExecutor(st)
|
||||||
|
out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||||
|
[]tool.InputFile{{Name: "c.wav", MimeType: "audio/wav\ninjected", Data: []byte("x")}}, "go")
|
||||||
|
if len(st.staged) != 1 || strings.ContainsAny(st.staged[0].mime, "\n\r") {
|
||||||
|
t.Errorf("mime not sanitized before staging: %+v", st.staged)
|
||||||
|
}
|
||||||
|
if strings.Contains(out, "\ninjected") {
|
||||||
|
t.Errorf("descriptor carried an unsanitized mime newline:\n%s", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStageInputFilesEmptyFileID: a stager returning an empty file_id drops the
|
||||||
|
// file (no blank file_id in the descriptor).
|
||||||
|
func TestStageInputFilesEmptyFileID(t *testing.T) {
|
||||||
|
ex := newStagerExecutor(emptyIDStager{})
|
||||||
|
out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||||
|
[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "p")
|
||||||
|
if out != "p" {
|
||||||
|
t.Errorf("empty file_id should drop the file, got %q", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type emptyIDStager struct{}
|
||||||
|
|
||||||
|
func (emptyIDStager) StageInputFile(context.Context, string, string, string, string, []byte) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHumanizeBytesNoPanic: an absurd size clamps to the last prefix instead of
|
||||||
|
// indexing past "KMGTPE".
|
||||||
|
func TestHumanizeBytesNoPanic(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
t.Fatalf("humanizeBytes panicked: %v", r)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
for _, n := range []int{0, 512, 2048, 5_000_000, 1 << 62} {
|
||||||
|
_ = humanizeBytes(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
+4
-3
@@ -154,9 +154,10 @@ type ContinuationContext struct {
|
|||||||
|
|
||||||
// InputFile is a non-image file the user supplied with a run (audio,
|
// InputFile is a non-image file the user supplied with a run (audio,
|
||||||
// etc.). The executor stages it into the file store under run scope and
|
// etc.). The executor stages it into the file store under run scope and
|
||||||
// surfaces its file_id to the agent. Name is a safe base name (no path
|
// surfaces its file_id to the agent. Name may be an untrusted attachment
|
||||||
// separators) suitable for /workspace/<name>; MimeType is the resolved
|
// filename — the executor reduces it to a safe base name (stripping path
|
||||||
// content type; Data is the raw bytes.
|
// separators + control chars) before staging or exposing it as
|
||||||
|
// /workspace/<name>; MimeType is the resolved content type; Data is the raw bytes.
|
||||||
type InputFile struct {
|
type InputFile struct {
|
||||||
Name string
|
Name string
|
||||||
MimeType string
|
MimeType string
|
||||||
|
|||||||
Reference in New Issue
Block a user