executus/run/session_test.go

package run_test

import (
	"context"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"

	"gitea.stevedudenhoeffer.com/steve/executus/run"
	"gitea.stevedudenhoeffer.com/steve/executus/tool"
)

// TestSessionToolFactoryPostRun: a SessionToolFactory's PostRun hook produces an
// artifact (from the run output + transcript) that lands on Result.PostRunResult,
// and its Cleanup is deferred.
func TestSessionToolFactoryPostRun(t *testing.T) {
	fp := fake.New("fake")
	fp.Enqueue("m", fake.Reply("hello artifacts"))
	m, _ := fp.Model("m")

	cleanupCalled := false
	inv := tool.Invocation{
		RunID: "r1",
		SessionToolFactory: func(_ tool.AgentSession) tool.SessionTools {
			return tool.SessionTools{
				PostRun: func(_ context.Context, transcript []llm.Message, output string, _ error) *tool.PostRunResult {
					return &tool.PostRunResult{
						Artifacts: []tool.Artifact{{Name: "out.txt", MimeType: "text/plain", Data: []byte(output)}},
						Metadata:  map[string]any{"transcript_len": len(transcript)},
					}
				},
				Cleanup: func() { cleanupCalled = true },
			}
		},
	}
	ex := run.New(run.Config{
		Registry: tool.NewRegistry(),
		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
	})
	res := ex.Run(context.Background(), run.RunnableAgent{ModelTier: "m"}, inv, "go")
	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if res.PostRunResult == nil {
		t.Fatal("Result.PostRunResult is nil — PostRun hook not invoked / not attached")
	}
	if n := len(res.PostRunResult.Artifacts); n != 1 {
		t.Fatalf("artifacts = %d, want 1", n)
	}
	a := res.PostRunResult.Artifacts[0]
	if a.Name != "out.txt" || string(a.Data) != "hello artifacts" {
		t.Errorf("artifact = {%q, %q}", a.Name, string(a.Data))
	}
	if tl, _ := res.PostRunResult.Metadata["transcript_len"].(int); tl < 1 {
		t.Errorf("transcript not passed to PostRun (len=%d)", tl)
	}
	if !cleanupCalled {
		t.Error("Cleanup was not deferred/called")
	}
}

// TestSessionToolFactoryAddsTool: tools the factory returns join the run's
// toolbox and are callable by the model.
func TestSessionToolFactoryAddsTool(t *testing.T) {
	fp := fake.New("fake")
	fp.Enqueue("m",
		fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "render", Arguments: []byte(`{}`)}}}),
		fake.Reply("rendered"),
	)
	m, _ := fp.Model("m")

	toolCalled := false
	renderTool := llm.DefineTool("render", "render a preview",
		func(_ context.Context, _ struct{}) (any, error) { toolCalled = true; return "ok", nil })
	inv := tool.Invocation{
		RunID: "r2",
		SessionToolFactory: func(_ tool.AgentSession) tool.SessionTools {
			return tool.SessionTools{Tools: []llm.Tool{renderTool}}
		},
	}
	ex := run.New(run.Config{
		Registry: tool.NewRegistry(),
		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
	})
	res := ex.Run(context.Background(),
		run.RunnableAgent{ModelTier: "m", MaxIterations: 5}, inv, "go")
	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if !toolCalled {
		t.Error("session-factory tool was not added to the toolbox / not called")
	}
}