df4033f42e
executus CI / test (pull_request) Successful in 48s
Second-pass findings on the security fix: - Mime sanitized ONCE and passed to BOTH StageInputFile and the descriptor (was passing raw f.MimeType to the host store while only the descriptor sanitized) — 3 models. - sanitizeField now also strips Unicode format chars (category Cf, incl. the bidi overrides U+202A–U+202E that can reorder how the descriptor renders); IsControl already covers \n\r\t so the explicit checks are dropped. - fileID is sanitized before inlining + an empty file_id drops the file (defense vs a misbehaving stager). - humanizeBytes clamps the prefix index so an absurd size (≥1024^6) can't index past "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap. - Docs sync: README Ports list gains InputFiles; tool.InputFile.Name doc now says the executor reduces an untrusted name to a safe base name (was claiming the field is already safe). Tests: bidi/control stripping; mime sanitized in staged value + descriptor; empty file_id drop; humanizeBytes no-panic across sizes up to 1<<62. Suite green (-race). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
180 lines
6.8 KiB
Go
180 lines
6.8 KiB
Go
package run
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"log/slog"
|
||
"path"
|
||
"strings"
|
||
"unicode"
|
||
|
||
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||
)
|
||
|
||
// maxInputFileBytes is a defense-in-depth cap at the staging boundary. A host's
|
||
// extraction path may already cap downloads, but stageInputFiles is the trust
|
||
// boundary for the InputFiles seam: a call site or bug that populates InputFiles
|
||
// directly must not write an unbounded blob to the host file store.
|
||
const maxInputFileBytes = 50_000_000
|
||
|
||
// maxInputFiles bounds how many attachments a single run stages, independent of
|
||
// the per-file byte cap — defense-in-depth against a flood of tiny files.
|
||
const maxInputFiles = 32
|
||
|
||
// stageInputFiles persists each non-image input attachment into the host file
|
||
// store (Ports.InputFiles) under run scope and appends a descriptor block to the
|
||
// prompt so the agent knows the file_ids it can pass to a worker tool. The bytes
|
||
// are NOT inlined into the model context — the LLM can't read raw audio/binary —
|
||
// so the agent reaches them via a file_id-aware tool (e.g. code_exec files_in,
|
||
// which writes the file to /workspace/<name>).
|
||
//
|
||
// Best-effort: a nil stager, no files, or a per-file save error degrades to
|
||
// "skip that file" — the run still proceeds. Returns the (possibly augmented)
|
||
// prompt.
|
||
func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, files []tool.InputFile, prompt string) string {
|
||
if e.cfg.Ports.InputFiles == nil || len(files) == 0 {
|
||
return prompt
|
||
}
|
||
// Count cap: bound how many attachments one run can stage, independent of the
|
||
// per-file byte cap (defense-in-depth against a flood of tiny files).
|
||
if len(files) > maxInputFiles {
|
||
slog.Warn("run: too many input files, truncating",
|
||
"agent", agentID, "run_id", runID, "count", len(files), "cap", maxInputFiles)
|
||
files = files[:maxInputFiles]
|
||
}
|
||
|
||
type stagedFile struct {
|
||
name, mime, fileID string
|
||
size int
|
||
}
|
||
var staged []stagedFile
|
||
seenNames := make(map[string]int, len(files))
|
||
for _, f := range files {
|
||
if len(f.Data) == 0 {
|
||
slog.Warn("run: skipping empty input file",
|
||
"agent", agentID, "run_id", runID, "name", f.Name)
|
||
continue
|
||
}
|
||
if len(f.Data) > maxInputFileBytes {
|
||
slog.Warn("run: skipping oversized input file",
|
||
"agent", agentID, "run_id", runID, "name", f.Name,
|
||
"size", len(f.Data), "cap", maxInputFileBytes)
|
||
continue
|
||
}
|
||
// Reduce the untrusted filename to a safe base name BEFORE staging or
|
||
// inlining: strips ../ and absolute-path components (so it can't escape
|
||
// the host store or /workspace/<name>) and drops control chars/newlines
|
||
// (so a crafted name can't inject text into the descriptor block below).
|
||
// Then disambiguate colliding base names so two attachments don't both map
|
||
// to /workspace/<name> (the second would clobber the first).
|
||
name := uniqueName(sanitizeName(f.Name), seenNames)
|
||
// Sanitize the mime ONCE and pass the clean value to both the host store
|
||
// and the descriptor (don't hand the raw value to StageInputFile).
|
||
mime := sanitizeField(f.MimeType)
|
||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, mime, f.Data)
|
||
if err != nil {
|
||
slog.Warn("run: failed to stage input file",
|
||
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
||
continue
|
||
}
|
||
if fileID == "" {
|
||
slog.Warn("run: stager returned empty file_id, skipping",
|
||
"agent", agentID, "run_id", runID, "name", name)
|
||
continue
|
||
}
|
||
// fileID is host-generated, but sanitize it too before inlining — the
|
||
// descriptor must never carry control chars no matter the stager impl.
|
||
staged = append(staged, stagedFile{name: name, mime: mime, fileID: sanitizeField(fileID), size: len(f.Data)})
|
||
}
|
||
if len(staged) == 0 {
|
||
return prompt
|
||
}
|
||
|
||
var b strings.Builder
|
||
b.WriteString("[ATTACHED FILES]\n")
|
||
b.WriteString("The user attached the following file(s). Their contents are NOT included in this prompt and you cannot read them directly. ")
|
||
b.WriteString("To work with one, call the code_exec tool with a files_in entry — e.g. ")
|
||
b.WriteString(`files_in: [{"name": "<name>", "file_id": "<file_id>"}]`)
|
||
b.WriteString(" — which writes it to /workspace/<name> inside the Python sandbox. You may also pass a file_id to any other tool that accepts one.\n")
|
||
for _, s := range staged {
|
||
fmt.Fprintf(&b, "- %s (%s, %s) → file_id: %s\n", s.name, s.mime, humanizeBytes(s.size), s.fileID)
|
||
}
|
||
|
||
if strings.TrimSpace(prompt) == "" {
|
||
return b.String()
|
||
}
|
||
return prompt + "\n\n" + b.String()
|
||
}
|
||
|
||
// sanitizeName reduces an untrusted attachment filename to a safe base name. It
|
||
// drops control characters / newlines (which would otherwise let a crafted name
|
||
// inject text into the [ATTACHED FILES] descriptor) and strips every directory
|
||
// component — defeating ../ traversal, nested dirs, and absolute / drive paths
|
||
// both in the host file store and at /workspace/<name>. Returns "attachment"
|
||
// when nothing usable remains (empty, ".", "..").
|
||
func sanitizeName(name string) string {
|
||
name = sanitizeField(name)
|
||
// Normalize backslashes so a Windows-style path also reduces to its base.
|
||
base := path.Base(strings.ReplaceAll(name, `\`, "/"))
|
||
base = strings.TrimSpace(base)
|
||
if base == "" || base == "." || base == ".." {
|
||
return "attachment"
|
||
}
|
||
return base
|
||
}
|
||
|
||
// sanitizeField strips characters that could let a value inlined verbatim into
|
||
// the prompt descriptor break out of its line or visually mislead: control
|
||
// characters (IsControl covers newlines/tabs) AND Unicode format characters
|
||
// (category Cf — e.g. the bidi overrides U+202A–U+202E, which can reorder how
|
||
// the descriptor renders).
|
||
func sanitizeField(s string) string {
|
||
return strings.Map(func(r rune) rune {
|
||
if unicode.IsControl(r) || unicode.Is(unicode.Cf, r) {
|
||
return -1
|
||
}
|
||
return r
|
||
}, s)
|
||
}
|
||
|
||
// uniqueName returns name unchanged the first time it's seen, then name-2,
|
||
// name-3, … (suffix inserted before the extension) on repeats, recording each
|
||
// result in seen so later collisions keep counting up.
|
||
func uniqueName(name string, seen map[string]int) string {
|
||
if seen[name] == 0 {
|
||
seen[name]++
|
||
return name
|
||
}
|
||
ext := path.Ext(name)
|
||
base := strings.TrimSuffix(name, ext)
|
||
for {
|
||
seen[name]++
|
||
candidate := fmt.Sprintf("%s-%d%s", base, seen[name], ext)
|
||
if seen[candidate] == 0 {
|
||
seen[candidate]++
|
||
return candidate
|
||
}
|
||
}
|
||
}
|
||
|
||
// humanizeBytes renders a byte count as a short human-readable string (e.g.
|
||
// "2.1 MB") for the attached-files descriptor block.
|
||
func humanizeBytes(n int) string {
|
||
if n < 0 {
|
||
n = 0
|
||
}
|
||
const unit = 1024
|
||
if n < unit {
|
||
return fmt.Sprintf("%d B", n)
|
||
}
|
||
const prefixes = "KMGTPE"
|
||
div, exp := int64(unit), 0
|
||
// Clamp exp to the last prefix so an absurd size (≥1024^7) can't index past
|
||
// "KMGTPE" and panic — a no-panic guarantee independent of the per-file cap.
|
||
for v := int64(n) / unit; v >= unit && exp < len(prefixes)-1; v /= unit {
|
||
div *= unit
|
||
exp++
|
||
}
|
||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), prefixes[exp])
|
||
}
|