2ef88f2a73
executus's tool.Invocation already carried InputFiles (audio/PDF/binary), but the executor never staged them — only Images were folded into the run. This adds the host seam mort's chat/chatbot surfaces need for audio-input parity with agentexec. - run.Ports gains InputFiles InputFileStager (nil-safe; nil = input files silently ignored, run still proceeds text-only). The interface mirrors mort's skill FileStorage: StageInputFile(ctx, runID, agentID, name, mime, content) → file_id. - run/input_files.go (ported from mort agentexec/input_files.go): stageInputFiles persists each file under run scope and appends an [ATTACHED FILES] descriptor block to the prompt so the agent can reach them by file_id (e.g. code_exec files_in → /workspace/<name>). Bytes are NEVER inlined into model context. Best-effort: empty/oversized(>50MB)/save-error files are skipped; colliding base names are disambiguated (name-2, name-3) so they don't clobber at /workspace/<name>. - Executor.Run calls it after the model/toolbox build, before the loop, so the descriptor rides the first user turn (alongside the existing Images folding). Tests: stages + builds the block; nil stager / no files leave the prompt intact; dedup; empty/save-error skipping. Full suite green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
121 lines
4.1 KiB
Go
121 lines
4.1 KiB
Go
package run
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"path"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// maxInputFileBytes is a defense-in-depth cap at the staging boundary. A host's
|
|
// extraction path may already cap downloads, but stageInputFiles is the trust
|
|
// boundary for the InputFiles seam: a call site or bug that populates InputFiles
|
|
// directly must not write an unbounded blob to the host file store.
|
|
const maxInputFileBytes = 50_000_000
|
|
|
|
// stageInputFiles persists each non-image input attachment into the host file
|
|
// store (Ports.InputFiles) under run scope and appends a descriptor block to the
|
|
// prompt so the agent knows the file_ids it can pass to a worker tool. The bytes
|
|
// are NOT inlined into the model context — the LLM can't read raw audio/binary —
|
|
// so the agent reaches them via a file_id-aware tool (e.g. code_exec files_in,
|
|
// which writes the file to /workspace/<name>).
|
|
//
|
|
// Best-effort: a nil stager, no files, or a per-file save error degrades to
|
|
// "skip that file" — the run still proceeds. Returns the (possibly augmented)
|
|
// prompt.
|
|
func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, files []tool.InputFile, prompt string) string {
|
|
if e.cfg.Ports.InputFiles == nil || len(files) == 0 {
|
|
return prompt
|
|
}
|
|
|
|
type staged struct {
|
|
name, mime, fileID string
|
|
size int
|
|
}
|
|
var ok []staged
|
|
seenNames := make(map[string]int, len(files))
|
|
for _, f := range files {
|
|
if len(f.Data) == 0 {
|
|
slog.Warn("run: skipping empty input file",
|
|
"agent", agentID, "run_id", runID, "name", f.Name)
|
|
continue
|
|
}
|
|
if len(f.Data) > maxInputFileBytes {
|
|
slog.Warn("run: skipping oversized input file",
|
|
"agent", agentID, "run_id", runID, "name", f.Name,
|
|
"size", len(f.Data), "cap", maxInputFileBytes)
|
|
continue
|
|
}
|
|
// Disambiguate colliding base names so two attachments with the same
|
|
// name don't both map to /workspace/<name> (the second would clobber the
|
|
// first when the agent writes them via code_exec).
|
|
name := uniqueName(f.Name, seenNames)
|
|
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
|
|
if err != nil {
|
|
slog.Warn("run: failed to stage input file",
|
|
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
|
continue
|
|
}
|
|
ok = append(ok, staged{name: name, mime: f.MimeType, fileID: fileID, size: len(f.Data)})
|
|
}
|
|
if len(ok) == 0 {
|
|
return prompt
|
|
}
|
|
|
|
var b strings.Builder
|
|
b.WriteString("[ATTACHED FILES]\n")
|
|
b.WriteString("The user attached the following file(s). Their contents are NOT included in this prompt and you cannot read them directly. ")
|
|
b.WriteString("To work with one, call the code_exec tool with a files_in entry — e.g. ")
|
|
b.WriteString(`files_in: [{"name": "<name>", "file_id": "<file_id>"}]`)
|
|
b.WriteString(" — which writes it to /workspace/<name> inside the Python sandbox. You may also pass a file_id to any other tool that accepts one.\n")
|
|
for _, s := range ok {
|
|
fmt.Fprintf(&b, "- %s (%s, %s) → file_id: %s\n", s.name, s.mime, humanizeBytes(s.size), s.fileID)
|
|
}
|
|
|
|
if strings.TrimSpace(prompt) == "" {
|
|
return b.String()
|
|
}
|
|
return prompt + "\n\n" + b.String()
|
|
}
|
|
|
|
// uniqueName returns name unchanged the first time it's seen, then name-2,
|
|
// name-3, … (suffix inserted before the extension) on repeats, recording each
|
|
// result in seen so later collisions keep counting up.
|
|
func uniqueName(name string, seen map[string]int) string {
|
|
if seen[name] == 0 {
|
|
seen[name]++
|
|
return name
|
|
}
|
|
ext := path.Ext(name)
|
|
base := strings.TrimSuffix(name, ext)
|
|
for {
|
|
seen[name]++
|
|
candidate := fmt.Sprintf("%s-%d%s", base, seen[name], ext)
|
|
if seen[candidate] == 0 {
|
|
seen[candidate]++
|
|
return candidate
|
|
}
|
|
}
|
|
}
|
|
|
|
// humanizeBytes renders a byte count as a short human-readable string (e.g.
|
|
// "2.1 MB") for the attached-files descriptor block.
|
|
func humanizeBytes(n int) string {
|
|
if n < 0 {
|
|
n = 0
|
|
}
|
|
const unit = 1024
|
|
if n < unit {
|
|
return fmt.Sprintf("%d B", n)
|
|
}
|
|
div, exp := int64(unit), 0
|
|
for v := int64(n) / unit; v >= unit; v /= unit {
|
|
div *= unit
|
|
exp++
|
|
}
|
|
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
|
|
}
|