fix(run): harden input-file staging per gadfly #18 validation pass
executus CI / test (pull_request) Successful in 48s
executus CI / test (pull_request) Successful in 48s
Second-pass findings on the security fix: - Mime sanitized ONCE and passed to BOTH StageInputFile and the descriptor (was passing raw f.MimeType to the host store while only the descriptor sanitized) — 3 models. - sanitizeField now also strips Unicode format chars (category Cf, incl. the bidi overrides U+202A–U+202E that can reorder how the descriptor renders); IsControl already covers \n\r\t so the explicit checks are dropped. - fileID is sanitized before inlining + an empty file_id drops the file (defense vs a misbehaving stager). - humanizeBytes clamps the prefix index so an absurd size (≥1024^6) can't index past "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap. - Docs sync: README Ports list gains InputFiles; tool.InputFile.Name doc now says the executor reduces an untrusted name to a safe base name (was claiming the field is already safe). Tests: bidi/control stripping; mime sanitized in staged value + descriptor; empty file_id drop; humanizeBytes no-panic across sizes up to 1<<62. Suite green (-race). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+23
-8
@@ -68,13 +68,23 @@ func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, f
|
||||
// Then disambiguate colliding base names so two attachments don't both map
|
||||
// to /workspace/<name> (the second would clobber the first).
|
||||
name := uniqueName(sanitizeName(f.Name), seenNames)
|
||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
|
||||
// Sanitize the mime ONCE and pass the clean value to both the host store
|
||||
// and the descriptor (don't hand the raw value to StageInputFile).
|
||||
mime := sanitizeField(f.MimeType)
|
||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, mime, f.Data)
|
||||
if err != nil {
|
||||
slog.Warn("run: failed to stage input file",
|
||||
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
||||
continue
|
||||
}
|
||||
staged = append(staged, stagedFile{name: name, mime: sanitizeField(f.MimeType), fileID: fileID, size: len(f.Data)})
|
||||
if fileID == "" {
|
||||
slog.Warn("run: stager returned empty file_id, skipping",
|
||||
"agent", agentID, "run_id", runID, "name", name)
|
||||
continue
|
||||
}
|
||||
// fileID is host-generated, but sanitize it too before inlining — the
|
||||
// descriptor must never carry control chars no matter the stager impl.
|
||||
staged = append(staged, stagedFile{name: name, mime: mime, fileID: sanitizeField(fileID), size: len(f.Data)})
|
||||
}
|
||||
if len(staged) == 0 {
|
||||
return prompt
|
||||
@@ -113,12 +123,14 @@ func sanitizeName(name string) string {
|
||||
return base
|
||||
}
|
||||
|
||||
// sanitizeField strips control characters (incl. newlines/tabs) from a value
|
||||
// that gets inlined verbatim into the prompt descriptor, so it can't break out
|
||||
// of its line or inject instructions.
|
||||
// sanitizeField strips characters that could let a value inlined verbatim into
|
||||
// the prompt descriptor break out of its line or visually mislead: control
|
||||
// characters (IsControl covers newlines/tabs) AND Unicode format characters
|
||||
// (category Cf — e.g. the bidi overrides U+202A–U+202E, which can reorder how
|
||||
// the descriptor renders).
|
||||
func sanitizeField(s string) string {
|
||||
return strings.Map(func(r rune) rune {
|
||||
if r == '\n' || r == '\r' || r == '\t' || unicode.IsControl(r) {
|
||||
if unicode.IsControl(r) || unicode.Is(unicode.Cf, r) {
|
||||
return -1
|
||||
}
|
||||
return r
|
||||
@@ -155,10 +167,13 @@ func humanizeBytes(n int) string {
|
||||
if n < unit {
|
||||
return fmt.Sprintf("%d B", n)
|
||||
}
|
||||
const prefixes = "KMGTPE"
|
||||
div, exp := int64(unit), 0
|
||||
for v := int64(n) / unit; v >= unit; v /= unit {
|
||||
// Clamp exp to the last prefix so an absurd size (≥1024^7) can't index past
|
||||
// "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap.
|
||||
for v := int64(n) / unit; v >= unit && exp < len(prefixes)-1; v /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
|
||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), prefixes[exp])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user