C0b: address verified gadfly findings (panic-safety + test honesty)

From PR #9 (minimax + deepseek): - Run now has a top-level recover() — the "never propagates a panic" promise was unenforced; a panicking host Port (Critic/Audit/Palette) on the run goroutine now becomes Result.Err instead of unwinding into the caller. - The critic deadline-watch goroutine recovers panics from a host Deadline() (it's a separate goroutine, so Run's recover can't catch it) — a buggy CriticHandle can't crash the process. - CriticHandle interface documents its concurrency contract (Record*/Steer on the run goroutine vs Deadline()/Stop() from the watch goroutine — impls must be concurrent-safe; the critic battery already is). - startCritic's dead `soft <= 0 -> noop` guard (withFallbacks already coerces to 90s) replaced with a defensive inline 90s default, so a bypass of withFallbacks still gets a working critic instead of silently none. - Delivery tests made honest: the old "error path" test only checked the early-return (no delivery); added TestDeliverErrorOnRunFailure (in-loop model error -> DeliverError to the target) + renamed the early-return test. Graded all #9 findings in the gadfly MCP. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 10:09:22 -04:00
parent 43b2471737
commit 4aa06f652e
4 changed files with 55 additions and 11 deletions
@@ -67,22 +67,48 @@ func TestNoDeliveryWithoutTarget(t *testing.T) {
 	}
 }

-func TestDeliveryErrorPath(t *testing.T) {
+// TestNoDeliveryOnEarlyResolveError: an error BEFORE the run starts (model
+// resolve) returns before delivery is reached — neither Deliver nor DeliverError
+// fires. (Delivery covers run OUTCOMES, not pre-run setup failures.)
+func TestNoDeliveryOnEarlyResolveError(t *testing.T) {
 	d := &recordingDelivery{}
 	ex := run.New(run.Config{
 		Registry: tool.NewRegistry(),
 		Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
-			return ctx, nil, errors.New("resolve boom") // forces a run error
+			return ctx, nil, errors.New("resolve boom")
 		},
 		Ports: run.Ports{Delivery: d},
 	})
 	ex.Run(context.Background(),
 		run.RunnableAgent{Name: "x", ModelTier: "m"},
 		tool.Invocation{RunID: "r", DeliveryKind: "channel", DeliveryID: "chan-9"}, "go")
-	// A model-resolve error returns before the run context exists, so delivery
-	// isn't reached — assert no spurious Deliver. (DeliverError on in-loop errors
-	// is exercised by the wiring; this guards the early-return path.)
-	if d.delivers != 0 {
-		t.Errorf("early failure should not Deliver, got %d", d.delivers)
+	if d.delivers != 0 || d.errored != nil {
+		t.Errorf("early resolve failure should neither Deliver nor DeliverError: delivers=%d errored=%v", d.delivers, d.errored)
+	}
+}
+
+// TestDeliverErrorOnRunFailure: an in-loop run failure (the model errors) routes
+// through DeliverError with the run error.
+func TestDeliverErrorOnRunFailure(t *testing.T) {
+	d := &recordingDelivery{}
+	fp := fake.New("fake")
+	fp.Enqueue("m", fake.Step{Err: errors.New("model boom")}) // model errors mid-run
+	m, _ := fp.Model("m")
+	ex := run.New(run.Config{
+		Registry: tool.NewRegistry(),
+		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
+		Ports:    run.Ports{Delivery: d},
+	})
+	res := ex.Run(context.Background(),
+		run.RunnableAgent{Name: "x", ModelTier: "m"},
+		tool.Invocation{RunID: "r", DeliveryKind: "channel", DeliveryID: "chan-9"}, "go")
+	if res.Err == nil {
+		t.Fatal("expected a run error")
+	}
+	if d.delivers != 0 {
+		t.Errorf("a failed run should not Deliver (success path), got %d", d.delivers)
+	}
+	if d.errored == nil || d.target.ID != "chan-9" {
+		t.Errorf("a failed run with a target should DeliverError to chan-9, got errored=%v target=%+v", d.errored, d.target)
 	}
 }