C0b: address verified gadfly findings (panic-safety + test honesty)
executus CI / test (pull_request) Failing after 58s
executus CI / test (pull_request) Failing after 58s
From PR #9 (minimax + deepseek): - Run now has a top-level recover() — the "never propagates a panic" promise was unenforced; a panicking host Port (Critic/Audit/Palette) on the run goroutine now becomes Result.Err instead of unwinding into the caller. - The critic deadline-watch goroutine recovers panics from a host Deadline() (it's a separate goroutine, so Run's recover can't catch it) — a buggy CriticHandle can't crash the process. - CriticHandle interface documents its concurrency contract (Record*/Steer on the run goroutine vs Deadline()/Stop() from the watch goroutine — impls must be concurrent-safe; the critic battery already is). - startCritic's dead `soft <= 0 -> noop` guard (withFallbacks already coerces to 90s) replaced with a defensive inline 90s default, so a bypass of withFallbacks still gets a working critic instead of silently none. - Delivery tests made honest: the old "error path" test only checked the early-return (no delivery); added TestDeliverErrorOnRunFailure (in-loop model error -> DeliverError to the target) + renamed the early-return test. Graded all #9 findings in the gadfly MCP. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+5
-1
@@ -33,7 +33,7 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
|
|||||||
}
|
}
|
||||||
soft := e.cfg.Defaults.CriticSoftTimeout
|
soft := e.cfg.Defaults.CriticSoftTimeout
|
||||||
if soft <= 0 {
|
if soft <= 0 {
|
||||||
return nil, noop
|
soft = 90 * time.Second // defensive: withFallbacks normally guarantees >0
|
||||||
}
|
}
|
||||||
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
|
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
|
||||||
if h == nil {
|
if h == nil {
|
||||||
@@ -41,6 +41,10 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
|
|||||||
}
|
}
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
|
// A host CriticHandle.Deadline() that panics must not crash the process
|
||||||
|
// (this runs on its own goroutine, so the executor's top-level recover
|
||||||
|
// can't catch it). Log-free best-effort: just stop watching.
|
||||||
|
defer func() { _ = recover() }()
|
||||||
t := time.NewTicker(criticDeadlineCheck)
|
t := time.NewTicker(criticDeadlineCheck)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
for {
|
for {
|
||||||
|
|||||||
+33
-7
@@ -67,22 +67,48 @@ func TestNoDeliveryWithoutTarget(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeliveryErrorPath(t *testing.T) {
|
// TestNoDeliveryOnEarlyResolveError: an error BEFORE the run starts (model
|
||||||
|
// resolve) returns before delivery is reached — neither Deliver nor DeliverError
|
||||||
|
// fires. (Delivery covers run OUTCOMES, not pre-run setup failures.)
|
||||||
|
func TestNoDeliveryOnEarlyResolveError(t *testing.T) {
|
||||||
d := &recordingDelivery{}
|
d := &recordingDelivery{}
|
||||||
ex := run.New(run.Config{
|
ex := run.New(run.Config{
|
||||||
Registry: tool.NewRegistry(),
|
Registry: tool.NewRegistry(),
|
||||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
||||||
return ctx, nil, errors.New("resolve boom") // forces a run error
|
return ctx, nil, errors.New("resolve boom")
|
||||||
},
|
},
|
||||||
Ports: run.Ports{Delivery: d},
|
Ports: run.Ports{Delivery: d},
|
||||||
})
|
})
|
||||||
ex.Run(context.Background(),
|
ex.Run(context.Background(),
|
||||||
run.RunnableAgent{Name: "x", ModelTier: "m"},
|
run.RunnableAgent{Name: "x", ModelTier: "m"},
|
||||||
tool.Invocation{RunID: "r", DeliveryKind: "channel", DeliveryID: "chan-9"}, "go")
|
tool.Invocation{RunID: "r", DeliveryKind: "channel", DeliveryID: "chan-9"}, "go")
|
||||||
// A model-resolve error returns before the run context exists, so delivery
|
if d.delivers != 0 || d.errored != nil {
|
||||||
// isn't reached — assert no spurious Deliver. (DeliverError on in-loop errors
|
t.Errorf("early resolve failure should neither Deliver nor DeliverError: delivers=%d errored=%v", d.delivers, d.errored)
|
||||||
// is exercised by the wiring; this guards the early-return path.)
|
}
|
||||||
if d.delivers != 0 {
|
}
|
||||||
t.Errorf("early failure should not Deliver, got %d", d.delivers)
|
|
||||||
|
// TestDeliverErrorOnRunFailure: an in-loop run failure (the model errors) routes
|
||||||
|
// through DeliverError with the run error.
|
||||||
|
func TestDeliverErrorOnRunFailure(t *testing.T) {
|
||||||
|
d := &recordingDelivery{}
|
||||||
|
fp := fake.New("fake")
|
||||||
|
fp.Enqueue("m", fake.Step{Err: errors.New("model boom")}) // model errors mid-run
|
||||||
|
m, _ := fp.Model("m")
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
Ports: run.Ports{Delivery: d},
|
||||||
|
})
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "x", ModelTier: "m"},
|
||||||
|
tool.Invocation{RunID: "r", DeliveryKind: "channel", DeliveryID: "chan-9"}, "go")
|
||||||
|
if res.Err == nil {
|
||||||
|
t.Fatal("expected a run error")
|
||||||
|
}
|
||||||
|
if d.delivers != 0 {
|
||||||
|
t.Errorf("a failed run should not Deliver (success path), got %d", d.delivers)
|
||||||
|
}
|
||||||
|
if d.errored == nil || d.target.ID != "chan-9" {
|
||||||
|
t.Errorf("a failed run with a target should DeliverError to chan-9, got errored=%v target=%+v", d.errored, d.target)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+12
-3
@@ -104,10 +104,19 @@ type Result struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run executes ra with the given invocation + input and returns the Result. It
|
// Run executes ra with the given invocation + input and returns the Result. It
|
||||||
// never propagates a panic; failures surface in Result.Err.
|
// never propagates a panic; failures surface in Result.Err (a top-level recover
|
||||||
func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocation, input string) Result {
|
// converts any panic — including from a host Port — into a run error).
|
||||||
|
func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocation, input string) (res Result) {
|
||||||
started := time.Now()
|
started := time.Now()
|
||||||
res := Result{RunID: inv.RunID}
|
res = Result{RunID: inv.RunID}
|
||||||
|
// Enforce the no-panic contract: a panic anywhere in the run (incl. a host
|
||||||
|
// Critic/Audit/Palette callback on the main goroutine) becomes Result.Err
|
||||||
|
// rather than unwinding into the caller.
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
res.Err = fmt.Errorf("run.Executor: recovered panic: %v", r)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
tier := ra.ModelTier
|
tier := ra.ModelTier
|
||||||
if tier == "" {
|
if tier == "" {
|
||||||
|
|||||||
@@ -113,6 +113,11 @@ type Critic interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CriticHandle is the executor's live link to a run's critic.
|
// CriticHandle is the executor's live link to a run's critic.
|
||||||
|
//
|
||||||
|
// Concurrency: the executor calls RecordStep/RecordToolStart/Steer from the run
|
||||||
|
// goroutine while a separate watch goroutine polls Deadline() and the run's end
|
||||||
|
// calls Stop() — so implementations MUST be safe for concurrent use across these
|
||||||
|
// methods (the critic battery's handle guards its state with a mutex).
|
||||||
type CriticHandle interface {
|
type CriticHandle interface {
|
||||||
// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
|
// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
|
||||||
// healthy-but-slow run is not mistaken for a hang.
|
// healthy-but-slow run is not mistaken for a hang.
|
||||||
|
|||||||
Reference in New Issue
Block a user