0acaa8c9a5
executus CI / test (pull_request) Successful in 1m46s
Every reviewer flagged that runAgent appended llm.Text(input) unconditionally, so an image-only run (blank prompt) emitted an empty TextPart — inconsistent with the sibling runSession.AttachImages which guards it. Mirror that guard (strings.TrimSpace(input) != ""). Also: - copy opts before appending (variadic backing array can have spare capacity; avoid aliasing a caller's slice). - reword the doc comment to drop the mort-agentexec reference (executus is a standalone lib; a consumer name doesn't belong in its godoc). Tests: image+text are co-located in ONE user message; an image-only run emits no blank TextPart. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
122 lines
3.8 KiB
Go
122 lines
3.8 KiB
Go
package run_test
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/run"
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// TestExecutorFoldsInitialImages: when the invocation carries Images, they're
|
|
// folded into the first user message (alongside the prompt text) instead of being
|
|
// dropped — majordomo's Run input arg is text-only, so the executor seeds the
|
|
// multimodal opening turn via history.
|
|
func TestExecutorFoldsInitialImages(t *testing.T) {
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("m", fake.Reply("saw the image"))
|
|
m, _ := fp.Model("m")
|
|
|
|
img := llm.ImagePart{MIME: "image/png", Data: []byte("PNGDATA")}
|
|
inv := tool.Invocation{RunID: "r1", Images: []llm.ImagePart{img}}
|
|
ex := run.New(run.Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
|
})
|
|
res := ex.Run(context.Background(), run.RunnableAgent{ModelTier: "m"}, inv, "describe this")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
|
|
calls := fp.Calls()
|
|
if len(calls) == 0 {
|
|
t.Fatal("no model calls recorded")
|
|
}
|
|
// The text + image must be CO-LOCATED in a single user message (not split
|
|
// across two), so the model reads them as one multimodal turn.
|
|
coLocated := false
|
|
for _, msg := range calls[0].Request.Messages {
|
|
sawImage, sawText := false, false
|
|
for _, p := range msg.Parts {
|
|
switch pp := p.(type) {
|
|
case llm.ImagePart:
|
|
if string(pp.Data) == "PNGDATA" {
|
|
sawImage = true
|
|
}
|
|
case llm.TextPart:
|
|
if strings.Contains(pp.Text, "describe this") {
|
|
sawText = true
|
|
}
|
|
}
|
|
}
|
|
if sawImage && sawText {
|
|
coLocated = true
|
|
}
|
|
}
|
|
if !coLocated {
|
|
t.Error("image + prompt text were not folded into the SAME user message")
|
|
}
|
|
}
|
|
|
|
// TestExecutorImageOnlyNoBlankText: an image-only run (blank prompt) must NOT emit
|
|
// an empty TextPart — the message carries just the image, matching
|
|
// runSession.AttachImages's guard.
|
|
func TestExecutorImageOnlyNoBlankText(t *testing.T) {
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("m", fake.Reply("saw it"))
|
|
m, _ := fp.Model("m")
|
|
|
|
inv := tool.Invocation{RunID: "r3", Images: []llm.ImagePart{{MIME: "image/png", Data: []byte("IMG")}}}
|
|
ex := run.New(run.Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
|
})
|
|
res := ex.Run(context.Background(), run.RunnableAgent{ModelTier: "m"}, inv, " ")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
for _, msg := range fp.Calls()[0].Request.Messages {
|
|
for _, p := range msg.Parts {
|
|
if tp, ok := p.(llm.TextPart); ok && strings.TrimSpace(tp.Text) == "" {
|
|
t.Error("image-only run emitted a blank TextPart")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestExecutorTextOnlyUnchanged: with no Images, the prompt flows through as the
|
|
// text input (regression guard that the fold path didn't break the common case).
|
|
func TestExecutorTextOnlyUnchanged(t *testing.T) {
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("m", fake.Reply("ok"))
|
|
m, _ := fp.Model("m")
|
|
|
|
ex := run.New(run.Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
|
})
|
|
res := ex.Run(context.Background(), run.RunnableAgent{ModelTier: "m"}, tool.Invocation{RunID: "r2"}, "plain prompt")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) == 0 {
|
|
t.Fatal("no model calls recorded")
|
|
}
|
|
sawText := false
|
|
for _, msg := range calls[0].Request.Messages {
|
|
for _, p := range msg.Parts {
|
|
if tp, ok := p.(llm.TextPart); ok && strings.Contains(tp.Text, "plain prompt") {
|
|
sawText = true
|
|
}
|
|
}
|
|
}
|
|
if !sawText {
|
|
t.Error("text-only prompt did not reach the model")
|
|
}
|
|
}
|