fix: resolve flaky TestWebhook_LifecycleEvents caused by async delivery ordering
The test assumed webhook events arrive in wall-clock order (queued first, done last), but dispatcher.Fire spawns a goroutine per event with no ordering guarantee. On a single-core CI runner the "queued" goroutine was routinely preempted before making its HTTP POST, letting "loading"/"working"/"done" goroutines land first. Fix: wait until a "done" event appears in the received set (proving all prior transitions have been dispatched by the worker), then assert that "queued" and "done" each appear exactly once rather than checking positional order. Reproduced with: GOMAXPROCS=1 go test -race -count=100 -run TestWebhook_LifecycleEvents ./internal/server/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -317,14 +317,23 @@ func TestWebhook_LifecycleEvents(t *testing.T) {
|
|||||||
var submitResp jobSubmitResponse
|
var submitResp jobSubmitResponse
|
||||||
json.NewDecoder(submitRec.Body).Decode(&submitResp)
|
json.NewDecoder(submitRec.Body).Decode(&submitResp)
|
||||||
|
|
||||||
// Wait for webhooks to arrive.
|
// Wait until we see a "done" event. Since all webhooks are delivered in
|
||||||
|
// background goroutines there is no guaranteed wall-clock ordering between
|
||||||
|
// "queued", "loading"/"working", and "done". Waiting for "done" to appear
|
||||||
|
// is the only reliable signal that all prior events have been dispatched
|
||||||
|
// (the worker fires them in order before completing).
|
||||||
deadline := time.Now().Add(5 * time.Second)
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
for time.Now().Before(deadline) {
|
for time.Now().Before(deadline) {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
n := len(events)
|
found := false
|
||||||
|
for _, e := range events {
|
||||||
|
if e.State == "done" {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
// We expect at least: queued, working (or loading), done.
|
if found {
|
||||||
if n >= 3 {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
time.Sleep(50 * time.Millisecond)
|
time.Sleep(50 * time.Millisecond)
|
||||||
@@ -333,11 +342,12 @@ func TestWebhook_LifecycleEvents(t *testing.T) {
|
|||||||
mu.Lock()
|
mu.Lock()
|
||||||
defer mu.Unlock()
|
defer mu.Unlock()
|
||||||
|
|
||||||
|
// Verify we received at least: queued, working (or loading), done.
|
||||||
if len(events) < 3 {
|
if len(events) < 3 {
|
||||||
t.Fatalf("received %d webhook events, want >= 3", len(events))
|
t.Fatalf("received %d webhook events, want >= 3", len(events))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify all events have the correct job_id.
|
// Verify all events have the correct job_id and model.
|
||||||
for i, e := range events {
|
for i, e := range events {
|
||||||
if e.JobID != submitResp.JobID {
|
if e.JobID != submitResp.JobID {
|
||||||
t.Errorf("event[%d].job_id = %q, want %q", i, e.JobID, submitResp.JobID)
|
t.Errorf("event[%d].job_id = %q, want %q", i, e.JobID, submitResp.JobID)
|
||||||
@@ -347,15 +357,18 @@ func TestWebhook_LifecycleEvents(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// First event should be "queued".
|
// Verify that "queued" and "done" each appear exactly once across all events.
|
||||||
if events[0].State != "queued" {
|
// We do not assert wall-clock arrival order because all deliveries are async
|
||||||
t.Errorf("first event state = %q, want %q", events[0].State, "queued")
|
// goroutines that may be scheduled in any order by the OS.
|
||||||
|
stateCount := make(map[string]int)
|
||||||
|
for _, e := range events {
|
||||||
|
stateCount[e.State]++
|
||||||
}
|
}
|
||||||
|
if stateCount["queued"] != 1 {
|
||||||
// Last event should be "done".
|
t.Errorf("expected exactly 1 'queued' event, got %d", stateCount["queued"])
|
||||||
lastEvent := events[len(events)-1]
|
}
|
||||||
if lastEvent.State != "done" {
|
if stateCount["done"] != 1 {
|
||||||
t.Errorf("last event state = %q, want %q", lastEvent.State, "done")
|
t.Errorf("expected exactly 1 'done' event, got %d", stateCount["done"])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user