executus/run/input_files.go

package run

import (
	"context"
	"fmt"
	"log/slog"
	"path"
	"strings"
	"unicode"

	"gitea.stevedudenhoeffer.com/steve/executus/tool"
)

// maxInputFileBytes is a defense-in-depth cap at the staging boundary. A host's
// extraction path may already cap downloads, but stageInputFiles is the trust
// boundary for the InputFiles seam: a call site or bug that populates InputFiles
// directly must not write an unbounded blob to the host file store.
const maxInputFileBytes = 50_000_000

// maxInputFiles bounds how many attachments a single run stages, independent of
// the per-file byte cap — defense-in-depth against a flood of tiny files.
const maxInputFiles = 32

// stageInputFiles persists each non-image input attachment into the host file
// store (Ports.InputFiles) under run scope and appends a descriptor block to the
// prompt so the agent knows the file_ids it can pass to a worker tool. The bytes
// are NOT inlined into the model context — the LLM can't read raw audio/binary —
// so the agent reaches them via a file_id-aware tool (e.g. code_exec files_in,
// which writes the file to /workspace/<name>).
//
// Best-effort: a nil stager, no files, or a per-file save error degrades to
// "skip that file" — the run still proceeds. Returns the (possibly augmented)
// prompt.
func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, files []tool.InputFile, prompt string) string {
	if e.cfg.Ports.InputFiles == nil || len(files) == 0 {
		return prompt
	}
	// Count cap: bound how many attachments one run can stage, independent of the
	// per-file byte cap (defense-in-depth against a flood of tiny files).
	if len(files) > maxInputFiles {
		slog.Warn("run: too many input files, truncating",
			"agent", agentID, "run_id", runID, "count", len(files), "cap", maxInputFiles)
		files = files[:maxInputFiles]
	}

	type stagedFile struct {
		name, mime, fileID string
		size               int
	}
	var staged []stagedFile
	seenNames := make(map[string]int, len(files))
	for _, f := range files {
		if len(f.Data) == 0 {
			slog.Warn("run: skipping empty input file",
				"agent", agentID, "run_id", runID, "name", f.Name)
			continue
		}
		if len(f.Data) > maxInputFileBytes {
			slog.Warn("run: skipping oversized input file",
				"agent", agentID, "run_id", runID, "name", f.Name,
				"size", len(f.Data), "cap", maxInputFileBytes)
			continue
		}
		// Reduce the untrusted filename to a safe base name BEFORE staging or
		// inlining: strips ../ and absolute-path components (so it can't escape
		// the host store or /workspace/<name>) and drops control chars/newlines
		// (so a crafted name can't inject text into the descriptor block below).
		// Then disambiguate colliding base names so two attachments don't both map
		// to /workspace/<name> (the second would clobber the first).
		name := uniqueName(sanitizeName(f.Name), seenNames)
		fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
		if err != nil {
			slog.Warn("run: failed to stage input file",
				"agent", agentID, "run_id", runID, "name", name, "error", err)
			continue
		}
		staged = append(staged, stagedFile{name: name, mime: sanitizeField(f.MimeType), fileID: fileID, size: len(f.Data)})
	}
	if len(staged) == 0 {
		return prompt
	}

	var b strings.Builder
	b.WriteString("[ATTACHED FILES]\n")
	b.WriteString("The user attached the following file(s). Their contents are NOT included in this prompt and you cannot read them directly. ")
	b.WriteString("To work with one, call the code_exec tool with a files_in entry — e.g. ")
	b.WriteString(`files_in: [{"name": "<name>", "file_id": "<file_id>"}]`)
	b.WriteString(" — which writes it to /workspace/<name> inside the Python sandbox. You may also pass a file_id to any other tool that accepts one.\n")
	for _, s := range staged {
		fmt.Fprintf(&b, "- %s (%s, %s) → file_id: %s\n", s.name, s.mime, humanizeBytes(s.size), s.fileID)
	}

	if strings.TrimSpace(prompt) == "" {
		return b.String()
	}
	return prompt + "\n\n" + b.String()
}

// sanitizeName reduces an untrusted attachment filename to a safe base name. It
// drops control characters / newlines (which would otherwise let a crafted name
// inject text into the [ATTACHED FILES] descriptor) and strips every directory
// component — defeating ../ traversal, nested dirs, and absolute / drive paths
// both in the host file store and at /workspace/<name>. Returns "attachment"
// when nothing usable remains (empty, ".", "..").
func sanitizeName(name string) string {
	name = sanitizeField(name)
	// Normalize backslashes so a Windows-style path also reduces to its base.
	base := path.Base(strings.ReplaceAll(name, `\`, "/"))
	base = strings.TrimSpace(base)
	if base == "" || base == "." || base == ".." {
		return "attachment"
	}
	return base
}

// sanitizeField strips control characters (incl. newlines/tabs) from a value
// that gets inlined verbatim into the prompt descriptor, so it can't break out
// of its line or inject instructions.
func sanitizeField(s string) string {
	return strings.Map(func(r rune) rune {
		if r == '\n' || r == '\r' || r == '\t' || unicode.IsControl(r) {
			return -1
		}
		return r
	}, s)
}

// uniqueName returns name unchanged the first time it's seen, then name-2,
// name-3, … (suffix inserted before the extension) on repeats, recording each
// result in seen so later collisions keep counting up.
func uniqueName(name string, seen map[string]int) string {
	if seen[name] == 0 {
		seen[name]++
		return name
	}
	ext := path.Ext(name)
	base := strings.TrimSuffix(name, ext)
	for {
		seen[name]++
		candidate := fmt.Sprintf("%s-%d%s", base, seen[name], ext)
		if seen[candidate] == 0 {
			seen[candidate]++
			return candidate
		}
	}
}

// humanizeBytes renders a byte count as a short human-readable string (e.g.
// "2.1 MB") for the attached-files descriptor block.
func humanizeBytes(n int) string {
	if n < 0 {
		n = 0
	}
	const unit = 1024
	if n < unit {
		return fmt.Sprintf("%d B", n)
	}
	div, exp := int64(unit), 0
	for v := int64(n) / unit; v >= unit; v /= unit {
		div *= unit
		exp++
	}
	return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
}