executus/run/input_files_test.go

package run

import (
	"context"
	"errors"
	"strings"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"

	"gitea.stevedudenhoeffer.com/steve/executus/tool"
)

// stagerFunc is a test InputFileStager: it records each staged file and returns
// a deterministic file_id ("file_<name>"), or an error if err is set.
type stagerFunc struct {
	staged []stagedRec
	err    error
}

type stagedRec struct {
	runID, agentID, name, mime string
	size                       int
}

func (s *stagerFunc) StageInputFile(_ context.Context, runID, agentID, name, mime string, content []byte) (string, error) {
	if s.err != nil {
		return "", s.err
	}
	s.staged = append(s.staged, stagedRec{runID, agentID, name, mime, len(content)})
	return "file_" + name, nil
}

func newStagerExecutor(s InputFileStager) *Executor {
	return New(Config{
		Registry: tool.NewRegistry(),
		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, nil, nil },
		Ports:    Ports{InputFiles: s},
	})
}

// TestStageInputFiles: files are staged via the port and an [ATTACHED FILES]
// descriptor (with each file_id) is appended to the prompt.
func TestStageInputFiles(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	out := ex.stageInputFiles(context.Background(), "run-1", "agent-1",
		[]tool.InputFile{{Name: "clip.mp3", MimeType: "audio/mpeg", Data: []byte("abcd")}},
		"transcribe this")

	if len(st.staged) != 1 || st.staged[0].name != "clip.mp3" {
		t.Fatalf("staged = %+v, want one clip.mp3", st.staged)
	}
	if st.staged[0].runID != "run-1" || st.staged[0].agentID != "agent-1" {
		t.Errorf("stager got runID/agentID = %q/%q, want run-1/agent-1", st.staged[0].runID, st.staged[0].agentID)
	}
	for _, want := range []string{"transcribe this", "[ATTACHED FILES]", "clip.mp3", "file_clip.mp3", "audio/mpeg"} {
		if !strings.Contains(out, want) {
			t.Errorf("output missing %q:\n%s", want, out)
		}
	}
}

// TestStageInputFilesNoStager: a nil port leaves the prompt untouched and never
// drops the run.
func TestStageInputFilesNoStager(t *testing.T) {
	ex := newStagerExecutor(nil) // Ports.InputFiles == nil
	out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "prompt")
	if out != "prompt" {
		t.Errorf("nil stager changed the prompt: %q", out)
	}
}

// TestStageInputFilesNoFiles: no attachments leaves the prompt untouched.
func TestStageInputFilesNoFiles(t *testing.T) {
	ex := newStagerExecutor(&stagerFunc{})
	out := ex.stageInputFiles(context.Background(), "r", "a", nil, "prompt")
	if out != "prompt" {
		t.Errorf("no files changed the prompt: %q", out)
	}
}

// TestStageInputFilesDedup: colliding base names are disambiguated so they don't
// clobber each other at /workspace/<name>.
func TestStageInputFilesDedup(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	out := ex.stageInputFiles(context.Background(), "r", "a", []tool.InputFile{
		{Name: "a.wav", MimeType: "audio/wav", Data: []byte("1")},
		{Name: "a.wav", MimeType: "audio/wav", Data: []byte("2")},
	}, "go")
	if len(st.staged) != 2 {
		t.Fatalf("staged %d files, want 2", len(st.staged))
	}
	if st.staged[0].name != "a.wav" || st.staged[1].name != "a-2.wav" {
		t.Errorf("dedup names = %q, %q; want a.wav, a-2.wav", st.staged[0].name, st.staged[1].name)
	}
	if !strings.Contains(out, "a-2.wav") {
		t.Errorf("output missing disambiguated name:\n%s", out)
	}
}

// TestStageInputFilesSkipsBad: empty + oversized files are skipped; a save error
// drops only that file. With nothing staged, the prompt is unchanged.
func TestStageInputFilesSkipsBad(t *testing.T) {
	// Empty data → skipped; with no good files the prompt is returned as-is.
	ex := newStagerExecutor(&stagerFunc{})
	if out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "empty.bin", Data: nil}}, "p"); out != "p" {
		t.Errorf("empty file should be skipped, got %q", out)
	}
	// A stager error → that file is dropped; nothing staged → prompt unchanged.
	exErr := newStagerExecutor(&stagerFunc{err: errors.New("disk full")})
	if out := exErr.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "p"); out != "p" {
		t.Errorf("save error should drop the file and leave the prompt, got %q", out)
	}
}

// TestStageInputFilesOversize: a file past the byte cap is skipped (prompt
// unchanged), exercising the size guard directly.
func TestStageInputFilesOversize(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	big := make([]byte, maxInputFileBytes+1)
	out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "huge.bin", Data: big}}, "p")
	if out != "p" || len(st.staged) != 0 {
		t.Errorf("oversized file should be skipped: out=%q staged=%d", out, len(st.staged))
	}
}

// TestStageInputFilesCountCap: more than maxInputFiles attachments are truncated
// to the cap.
func TestStageInputFilesCountCap(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	files := make([]tool.InputFile, maxInputFiles+5)
	for i := range files {
		files[i] = tool.InputFile{Name: "f.bin", Data: []byte("x")}
	}
	ex.stageInputFiles(context.Background(), "r", "a", files, "p")
	if len(st.staged) != maxInputFiles {
		t.Errorf("count cap: staged %d, want %d", len(st.staged), maxInputFiles)
	}
}

// TestSanitizeName: traversal + absolute + control-char filenames are reduced to
// a safe base name (no path separators, no newlines), with a fallback.
func TestSanitizeName(t *testing.T) {
	cases := map[string]string{
		"../../etc/passwd":  "passwd",
		"/etc/cron.d/x":     "x",
		`..\..\windows\sys`: "sys",
		"clip.mp3":          "clip.mp3",
		"":                  "attachment",
		"..":                "attachment",
		".":                 "attachment",
		"evil\n- injected":  "evil- injected",
		"a/b/c.wav":         "c.wav",
	}
	for in, want := range cases {
		if got := sanitizeName(in); got != want {
			t.Errorf("sanitizeName(%q) = %q, want %q", in, got, want)
		}
		// A sanitized name must never carry a path separator or newline.
		got := sanitizeName(in)
		if strings.ContainsAny(got, "/\\\n\r") {
			t.Errorf("sanitizeName(%q) = %q still contains a separator/newline", in, got)
		}
	}
}

// TestStageInputFilesSanitizesTraversal: a traversal filename is staged AND
// described under its safe base name only.
func TestStageInputFilesSanitizesTraversal(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "../../../etc/passwd", MimeType: "text/plain", Data: []byte("x")}}, "go")
	if len(st.staged) != 1 || st.staged[0].name != "passwd" {
		t.Fatalf("staged name = %+v, want passwd", st.staged)
	}
	if strings.Contains(out, "..") || strings.Contains(out, "/etc/") {
		t.Errorf("descriptor leaked the traversal path:\n%s", out)
	}
}

// TestSanitizeFieldStripsBidiAndControl: control chars AND Unicode format/bidi
// overrides are removed from inlined values.
func TestSanitizeFieldStripsBidiAndControl(t *testing.T) {
	in := "audio/‮mpg\n; rm -rf" // bidi override + newline
	got := sanitizeField(in)
	if strings.ContainsAny(got, "\n\r\t") || strings.ContainsRune(got, '‮') {
		t.Errorf("sanitizeField left control/bidi chars: %q", got)
	}
}

// TestStageInputFilesSanitizesMime: a mime with a control char is cleaned in BOTH
// the staged value and the descriptor.
func TestStageInputFilesSanitizesMime(t *testing.T) {
	st := &stagerFunc{}
	ex := newStagerExecutor(st)
	out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "c.wav", MimeType: "audio/wav\ninjected", Data: []byte("x")}}, "go")
	if len(st.staged) != 1 || strings.ContainsAny(st.staged[0].mime, "\n\r") {
		t.Errorf("mime not sanitized before staging: %+v", st.staged)
	}
	if strings.Contains(out, "\ninjected") {
		t.Errorf("descriptor carried an unsanitized mime newline:\n%s", out)
	}
}

// TestStageInputFilesEmptyFileID: a stager returning an empty file_id drops the
// file (no blank file_id in the descriptor).
func TestStageInputFilesEmptyFileID(t *testing.T) {
	ex := newStagerExecutor(emptyIDStager{})
	out := ex.stageInputFiles(context.Background(), "r", "a",
		[]tool.InputFile{{Name: "x.bin", Data: []byte("z")}}, "p")
	if out != "p" {
		t.Errorf("empty file_id should drop the file, got %q", out)
	}
}

type emptyIDStager struct{}

func (emptyIDStager) StageInputFile(context.Context, string, string, string, string, []byte) (string, error) {
	return "", nil
}

// TestHumanizeBytesNoPanic: an absurd size clamps to the last prefix instead of
// indexing past "KMGTPE".
func TestHumanizeBytesNoPanic(t *testing.T) {
	defer func() {
		if r := recover(); r != nil {
			t.Fatalf("humanizeBytes panicked: %v", r)
		}
	}()
	for _, n := range []int{0, 512, 2048, 5_000_000, 1 << 62} {
		_ = humanizeBytes(n)
	}
}