feat(run): InputFileStager seam — stage non-image attachments into the prompt #18
@@ -318,6 +318,12 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
}
|
||||
|
||||
ag := agent.New(model, e.systemPrompt(ra), opts...)
|
||||
// Stage non-image input attachments (audio/PDF/binary) into the host file
|
||||
// store and fold an [ATTACHED FILES] descriptor into the prompt so the agent
|
||||
// can reach them by file_id. No-op when Ports.InputFiles is nil or there are
|
||||
// no files. Done after the model/toolbox build but before the loop, so the
|
||||
// descriptor rides the very first user turn.
|
||||
input = e.stageInputFiles(runCtx, inv.RunID, ra.ID, inv.InputFiles, input)
|
||||
// One WithSteer drains BOTH the session mailbox (a tool's AttachImages) and
|
||||
// the critic's nudges before each step.
|
||||
steer := func() []llm.Message { return append(mailbox.drain(), critic.drainSteer()...) }
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
package run
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||
)
|
||||
|
||||
// maxInputFileBytes is a defense-in-depth cap at the staging boundary. A host's
|
||||
// extraction path may already cap downloads, but stageInputFiles is the trust
|
||||
// boundary for the InputFiles seam: a call site or bug that populates InputFiles
|
||||
// directly must not write an unbounded blob to the host file store.
|
||||
const maxInputFileBytes = 50_000_000
|
||||
|
||||
// stageInputFiles persists each non-image input attachment into the host file
|
||||
// store (Ports.InputFiles) under run scope and appends a descriptor block to the
|
||||
// prompt so the agent knows the file_ids it can pass to a worker tool. The bytes
|
||||
// are NOT inlined into the model context — the LLM can't read raw audio/binary —
|
||||
// so the agent reaches them via a file_id-aware tool (e.g. code_exec files_in,
|
||||
// which writes the file to /workspace/<name>).
|
||||
//
|
||||
// Best-effort: a nil stager, no files, or a per-file save error degrades to
|
||||
// "skip that file" — the run still proceeds. Returns the (possibly augmented)
|
||||
// prompt.
|
||||
func (e *Executor) stageInputFiles(ctx context.Context, runID, agentID string, files []tool.InputFile, prompt string) string {
|
||||
if e.cfg.Ports.InputFiles == nil || len(files) == 0 {
|
||||
return prompt
|
||||
}
|
||||
|
||||
type staged struct {
|
||||
name, mime, fileID string
|
||||
size int
|
||||
}
|
||||
var ok []staged
|
||||
seenNames := make(map[string]int, len(files))
|
||||
for _, f := range files {
|
||||
if len(f.Data) == 0 {
|
||||
slog.Warn("run: skipping empty input file",
|
||||
"agent", agentID, "run_id", runID, "name", f.Name)
|
||||
continue
|
||||
}
|
||||
if len(f.Data) > maxInputFileBytes {
|
||||
slog.Warn("run: skipping oversized input file",
|
||||
"agent", agentID, "run_id", runID, "name", f.Name,
|
||||
"size", len(f.Data), "cap", maxInputFileBytes)
|
||||
continue
|
||||
}
|
||||
// Disambiguate colliding base names so two attachments with the same
|
||||
// name don't both map to /workspace/<name> (the second would clobber the
|
||||
// first when the agent writes them via code_exec).
|
||||
name := uniqueName(f.Name, seenNames)
|
||||
fileID, err := e.cfg.Ports.InputFiles.StageInputFile(ctx, runID, agentID, name, f.MimeType, f.Data)
|
||||
if err != nil {
|
||||
slog.Warn("run: failed to stage input file",
|
||||
"agent", agentID, "run_id", runID, "name", name, "error", err)
|
||||
continue
|
||||
}
|
||||
ok = append(ok, staged{name: name, mime: f.MimeType, fileID: fileID, size: len(f.Data)})
|
||||
}
|
||||
if len(ok) == 0 {
|
||||
return prompt
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("[ATTACHED FILES]\n")
|
||||
b.WriteString("The user attached the following file(s). Their contents are NOT included in this prompt and you cannot read them directly. ")
|
||||
b.WriteString("To work with one, call the code_exec tool with a files_in entry — e.g. ")
|
||||
b.WriteString(`files_in: [{"name": "<name>", "file_id": "<file_id>"}]`)
|
||||
b.WriteString(" — which writes it to /workspace/<name> inside the Python sandbox. You may also pass a file_id to any other tool that accepts one.\n")
|
||||
for _, s := range ok {
|
||||
fmt.Fprintf(&b, "- %s (%s, %s) → file_id: %s\n", s.name, s.mime, humanizeBytes(s.size), s.fileID)
|
||||
}
|
||||
|
||||
if strings.TrimSpace(prompt) == "" {
|
||||
return b.String()
|
||||
}
|
||||
return prompt + "\n\n" + b.String()
|
||||
}
|
||||
|
||||
// uniqueName returns name unchanged the first time it's seen, then name-2,
|
||||
// name-3, … (suffix inserted before the extension) on repeats, recording each
|
||||
// result in seen so later collisions keep counting up.
|
||||
func uniqueName(name string, seen map[string]int) string {
|
||||
if seen[name] == 0 {
|
||||
seen[name]++
|
||||
return name
|
||||
}
|
||||
ext := path.Ext(name)
|
||||
base := strings.TrimSuffix(name, ext)
|
||||
for {
|
||||
seen[name]++
|
||||
candidate := fmt.Sprintf("%s-%d%s", base, seen[name], ext)
|
||||
if seen[candidate] == 0 {
|
||||
seen[candidate]++
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// humanizeBytes renders a byte count as a short human-readable string (e.g.
|
||||
// "2.1 MB") for the attached-files descriptor block.
|
||||
func humanizeBytes(n int) string {
|
||||
if n < 0 {
|
||||
n = 0
|
||||
}
|
||||
const unit = 1024
|
||||
if n < unit {
|
||||
return fmt.Sprintf("%d B", n)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for v := int64(n) / unit; v >= unit; v /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(n)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
package run
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||
)
|
||||
|
||||
// stagerFunc is a test InputFileStager: it records each staged file and returns
|
||||
// a deterministic file_id ("file_<name>"), or an error if err is set.
|
||||
type stagerFunc struct {
|
||||
staged []stagedRec
|
||||
err error
|
||||
}
|
||||
|
||||
type stagedRec struct {
|
||||
runID, agentID, name, mime string
|
||||
size int
|
||||
}
|
||||
|
||||
func (s *stagerFunc) StageInputFile(_ context.Context, runID, agentID, name, mime string, content []byte) (string, error) {
|
||||
if s.err != nil {
|
||||
return "", s.err
|
||||
}
|
||||
s.staged = append(s.staged, stagedRec{runID, agentID, name, mime, len(content)})
|
||||
return "file_" + name, nil
|
||||
}
|
||||
|
||||
func newStagerExecutor(s InputFileStager) *Executor {
|
||||
return New(Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, nil, nil },
|
||||
Ports: Ports{InputFiles: s},
|
||||
})
|
||||
}
|
||||
|
||||
// TestStageInputFiles: files are staged via the port and an [ATTACHED FILES]
|
||||
// descriptor (with each file_id) is appended to the prompt.
|
||||
func TestStageInputFiles(t *testing.T) {
|
||||
st := &stagerFunc{}
|
||||
ex := newStagerExecutor(st)
|
||||
out := ex.stageInputFiles(context.Background(), "run-1", "agent-1",
|
||||
[]tool.InputFile{{Name: "clip.mp3", MimeType: "audio/mpeg", Data: []byte("abcd")}},
|
||||
"transcribe this")
|
||||
|
||||
if len(st.staged) != 1 || st.staged[0].name != "clip.mp3" {
|
||||
t.Fatalf("staged = %+v, want one clip.mp3", st.staged)
|
||||
}
|
||||
if st.staged[0].runID != "run-1" || st.staged[0].agentID != "agent-1" {
|
||||
t.Errorf("stager got runID/agentID = %q/%q, want run-1/agent-1", st.staged[0].runID, st.staged[0].agentID)
|
||||
}
|
||||
for _, want := range []string{"transcribe this", "[ATTACHED FILES]", "clip.mp3", "file_clip.mp3", "audio/mpeg"} {
|
||||
if !strings.Contains(out, want) {
|
||||
t.Errorf("output missing %q:\n%s", want, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesNoStager: a nil port leaves the prompt untouched and never
|
||||
// drops the run.
|
||||
func TestStageInputFilesNoStager(t *testing.T) {
|
||||
ex := newStagerExecutor(nil) // Ports.InputFiles == nil
|
||||
out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||
[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "prompt")
|
||||
if out != "prompt" {
|
||||
t.Errorf("nil stager changed the prompt: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesNoFiles: no attachments leaves the prompt untouched.
|
||||
func TestStageInputFilesNoFiles(t *testing.T) {
|
||||
ex := newStagerExecutor(&stagerFunc{})
|
||||
out := ex.stageInputFiles(context.Background(), "r", "a", nil, "prompt")
|
||||
if out != "prompt" {
|
||||
t.Errorf("no files changed the prompt: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesDedup: colliding base names are disambiguated so they don't
|
||||
// clobber each other at /workspace/<name>.
|
||||
func TestStageInputFilesDedup(t *testing.T) {
|
||||
st := &stagerFunc{}
|
||||
ex := newStagerExecutor(st)
|
||||
out := ex.stageInputFiles(context.Background(), "r", "a", []tool.InputFile{
|
||||
{Name: "a.wav", MimeType: "audio/wav", Data: []byte("1")},
|
||||
{Name: "a.wav", MimeType: "audio/wav", Data: []byte("2")},
|
||||
}, "go")
|
||||
if len(st.staged) != 2 {
|
||||
t.Fatalf("staged %d files, want 2", len(st.staged))
|
||||
}
|
||||
if st.staged[0].name != "a.wav" || st.staged[1].name != "a-2.wav" {
|
||||
t.Errorf("dedup names = %q, %q; want a.wav, a-2.wav", st.staged[0].name, st.staged[1].name)
|
||||
}
|
||||
if !strings.Contains(out, "a-2.wav") {
|
||||
t.Errorf("output missing disambiguated name:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStageInputFilesSkipsBad: empty + oversized files are skipped; a save error
|
||||
// drops only that file. With nothing staged, the prompt is unchanged.
|
||||
func TestStageInputFilesSkipsBad(t *testing.T) {
|
||||
// Empty data → skipped; with no good files the prompt is returned as-is.
|
||||
ex := newStagerExecutor(&stagerFunc{})
|
||||
if out := ex.stageInputFiles(context.Background(), "r", "a",
|
||||
[]tool.InputFile{{Name: "empty.bin", Data: nil}}, "p"); out != "p" {
|
||||
t.Errorf("empty file should be skipped, got %q", out)
|
||||
}
|
||||
// A stager error → that file is dropped; nothing staged → prompt unchanged.
|
||||
exErr := newStagerExecutor(&stagerFunc{err: errors.New("disk full")})
|
||||
if out := exErr.stageInputFiles(context.Background(), "r", "a",
|
||||
[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "p"); out != "p" {
|
||||
t.Errorf("save error should drop the file and leave the prompt, got %q", out)
|
||||
}
|
||||
}
|
||||
@@ -42,6 +42,20 @@ type Ports struct {
|
||||
// Delivery is where the run's output + artifacts go. nil = the caller
|
||||
// reads the Result in-process (the light-host default).
|
||||
Delivery deliver.Delivery
|
||||
// InputFiles persists non-image input attachments (audio, PDF, binary)
|
||||
// carried on Invocation.InputFiles into a host file store under run scope,
|
||||
// returning file_ids the agent can hand to a worker tool. nil = input files
|
||||
// are silently ignored (the run still proceeds, text-only). The bytes are
|
||||
// never inlined into the model context — the LLM can't read raw audio/binary.
|
||||
InputFiles InputFileStager
|
||||
}
|
||||
|
||||
// InputFileStager persists a single non-image input attachment into a host file
|
||||
// store under run scope and returns a file_id the run can reference. It is the
|
||||
// seam mort's skill FileStorage (and any host blob store) implements so the
|
||||
// kernel can stage Invocation.InputFiles without importing a storage layer.
|
||||
type InputFileStager interface {
|
||||
StageInputFile(ctx context.Context, runID, agentID, name, mime string, content []byte) (fileID string, err error)
|
||||
}
|
||||
|
||||
// RunInfo describes a run at start time — the attribution a recorder/critic
|
||||
|
||||
Reference in New Issue
Block a user