run: fix statusFor — don't relabel a generic error / caller-cancel as timeout (gadfly #11)
The WithCancelCause+timer rewrite made MaxRuntime surface as Canceled (not DeadlineExceeded), so statusFor's context.Cause(DeadlineExceeded) check could relabel (a) a genuine run error as 'timeout' and (b) a caller cancel/deadline as 'timeout' (was 'cancelled'). Convergent gadfly finding (glm-5.2 + cluster). Fix: keep MaxRuntime as WithTimeout (its DeadlineExceeded propagates → 'timeout', preserving own-timeout vs caller-cancel), add a NESTED WithCancelCause layer only for the kill. statusFor consults context.Cause ONLY for ErrCriticKill; everything else is classified by the run error itself. Tests: generic-error-not-relabeled + caller-cancel-stays-cancelled. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -155,6 +155,10 @@ func TestStatusFor(t *testing.T) {
|
||||
// context.Cause carries ErrCriticKill → "killed".
|
||||
killCtx, killCancel := context.WithCancelCause(context.Background())
|
||||
killCancel(fmt.Errorf("%w: hung", ErrCriticKill))
|
||||
// A context cancelled with a non-kill cause must NOT relabel a genuine run
|
||||
// error: a real error stays "error" even though the ctx was later cancelled.
|
||||
cancelledCtx, cc := context.WithCancelCause(context.Background())
|
||||
cc(context.DeadlineExceeded)
|
||||
cases := []struct {
|
||||
ctx context.Context
|
||||
err error
|
||||
@@ -166,6 +170,8 @@ func TestStatusFor(t *testing.T) {
|
||||
{bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
|
||||
{bg, errors.New("boom"), "error"},
|
||||
{killCtx, context.Canceled, "killed"},
|
||||
{cancelledCtx, errors.New("boom"), "error"}, // generic error not relabeled by cause
|
||||
{cancelledCtx, context.Canceled, "cancelled"}, // caller cancel stays cancelled, not timeout
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := statusFor(c.ctx, c.err); got != c.want {
|
||||
|
||||
Reference in New Issue
Block a user