6fd050855a
Replace the Phase 2 in-flight chat gate (buffered channel) with a real SQLite-backed job queue and single worker loop. Every /api/chat request now creates a job row, blocks until the worker completes it, and returns the result transparently. Key changes: - internal/store: NextJob (drain-by-model ordering), IncrementAttempt, ResetInterruptedJobs, DeleteTerminalJobsBefore; busy_timeout pragma - internal/worker: single-threaded worker loop with Notifier for sync handler completion signaling; retry on ConnectionError, terminal fail on HTTPError; crash recovery resets interrupted jobs on startup - internal/webhook: dispatcher infrastructure for async webhook delivery - internal/server: chat handler rewritten to enqueue+wait; old chatGate removed; embeddings remain direct concurrent proxies (ADR-0013) - internal/config: FOREMAN_MAX_ATTEMPTS, FOREMAN_JOB_TTL Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
191 lines
5.9 KiB
Go
191 lines
5.9 KiB
Go
// Package webhook delivers state-change events to job webhook URLs.
|
|
//
|
|
// Why: async job callers need push notification of state transitions without
|
|
// polling (ADR-0005). Delivery must never block or fail the job itself.
|
|
// What: fires HTTP POSTs with JSON payloads to configured webhook URLs, retrying
|
|
// with exponential backoff. Optionally signs payloads with HMAC-SHA256.
|
|
// Test: spin up an in-test HTTP server, fire events, verify receipt, retry on 500,
|
|
// and HMAC signature verification.
|
|
package webhook
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/hmac"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
// Event is the JSON payload POSTed to a webhook URL on each state transition.
|
|
type Event struct {
|
|
JobID string `json:"job_id"`
|
|
State string `json:"state"`
|
|
PreviousState string `json:"previous_state"`
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Model string `json:"model"`
|
|
Attempt int `json:"attempt"`
|
|
Result json.RawMessage `json:"result"`
|
|
Artifacts json.RawMessage `json:"artifacts"`
|
|
Error *string `json:"error"`
|
|
}
|
|
|
|
// Dispatcher sends webhook events to job-specified URLs.
|
|
type Dispatcher struct {
|
|
secret string
|
|
httpClient *http.Client
|
|
logger *slog.Logger
|
|
|
|
maxRetries int
|
|
baseDelay time.Duration
|
|
}
|
|
|
|
// NewDispatcher creates a new webhook dispatcher.
|
|
//
|
|
// Why: centralizes webhook delivery config (secret, retry policy) in one place.
|
|
// What: returns a Dispatcher ready to fire events asynchronously.
|
|
// Test: create with a secret, fire an event, verify HMAC header.
|
|
func NewDispatcher(secret string, logger *slog.Logger) *Dispatcher {
|
|
return &Dispatcher{
|
|
secret: secret,
|
|
httpClient: &http.Client{
|
|
Timeout: 10 * time.Second,
|
|
},
|
|
logger: logger,
|
|
maxRetries: 5,
|
|
baseDelay: 1 * time.Second,
|
|
}
|
|
}
|
|
|
|
// Fire sends a webhook event to the given URL in a background goroutine. It never
|
|
// blocks the caller and never returns an error — failed deliveries are logged and
|
|
// dropped per ADR-0005.
|
|
//
|
|
// Why: webhook failures must never block or fail the worker loop.
|
|
// What: marshals the event, spawns a goroutine that retries with backoff.
|
|
// Test: fire an event at a 500-returning server, verify retries happen then stop.
|
|
func (d *Dispatcher) Fire(url string, event Event) {
|
|
go d.deliver(url, event)
|
|
}
|
|
|
|
// deliver attempts to POST the event with retries and backoff.
|
|
func (d *Dispatcher) deliver(url string, event Event) {
|
|
body, err := json.Marshal(event)
|
|
if err != nil {
|
|
d.logger.Error("webhook marshal failed", "error", err, "job_id", event.JobID)
|
|
return
|
|
}
|
|
|
|
for attempt := 0; attempt <= d.maxRetries; attempt++ {
|
|
if attempt > 0 {
|
|
delay := d.baseDelay * (1 << (attempt - 1))
|
|
time.Sleep(delay)
|
|
}
|
|
|
|
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(body))
|
|
if err != nil {
|
|
d.logger.Error("webhook request creation failed",
|
|
"error", err, "url", url, "job_id", event.JobID)
|
|
return
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
if d.secret != "" {
|
|
sig := computeHMAC(body, d.secret)
|
|
req.Header.Set("X-Foreman-Signature", "sha256="+sig)
|
|
}
|
|
|
|
resp, err := d.httpClient.Do(req)
|
|
if err != nil {
|
|
d.logger.Warn("webhook delivery failed",
|
|
"error", err, "url", url, "job_id", event.JobID,
|
|
"attempt", attempt+1, "max", d.maxRetries+1)
|
|
continue
|
|
}
|
|
resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
|
d.logger.Debug("webhook delivered",
|
|
"url", url, "job_id", event.JobID, "state", event.State)
|
|
return
|
|
}
|
|
|
|
d.logger.Warn("webhook non-2xx response",
|
|
"status", resp.StatusCode, "url", url, "job_id", event.JobID,
|
|
"attempt", attempt+1, "max", d.maxRetries+1)
|
|
}
|
|
|
|
d.logger.Error("webhook delivery exhausted retries",
|
|
"url", url, "job_id", event.JobID, "state", event.State)
|
|
}
|
|
|
|
// computeHMAC computes HMAC-SHA256 of body using the given key and returns the
|
|
// hex-encoded digest.
|
|
func computeHMAC(body []byte, key string) string {
|
|
mac := hmac.New(sha256.New, []byte(key))
|
|
mac.Write(body)
|
|
return hex.EncodeToString(mac.Sum(nil))
|
|
}
|
|
|
|
// VerifySignature checks that the signature header matches the HMAC-SHA256 of
|
|
// the body. Exported for use by webhook receivers.
|
|
//
|
|
// Why: webhook consumers need to verify authenticity of incoming payloads.
|
|
// What: computes HMAC and compares to the provided signature using constant-time comparison.
|
|
// Test: sign a body, verify with correct and incorrect secrets.
|
|
func VerifySignature(body []byte, signature, secret string) bool {
|
|
if len(signature) < 8 || signature[:7] != "sha256=" {
|
|
return false
|
|
}
|
|
expected := computeHMAC(body, secret)
|
|
return hmac.Equal([]byte(expected), []byte(signature[7:]))
|
|
}
|
|
|
|
// FormatArtifacts formats artifact metadata for webhook payloads. Small artifacts
|
|
// (under threshold) are inlined; large ones get a URL reference.
|
|
//
|
|
// Why: webhook bodies must stay bounded per ADR-0006 (~256KB threshold).
|
|
// What: returns JSON-encoded artifact metadata with inline data or URL references.
|
|
// Test: create artifacts above and below threshold, verify inline vs URL in output.
|
|
func FormatArtifacts(jobID string, artifacts []ArtifactMeta) json.RawMessage {
|
|
if len(artifacts) == 0 {
|
|
return nil
|
|
}
|
|
|
|
type artifactOut struct {
|
|
Name string `json:"name"`
|
|
ContentType string `json:"content_type"`
|
|
Size int64 `json:"size"`
|
|
Data string `json:"data,omitempty"`
|
|
URL string `json:"url,omitempty"`
|
|
}
|
|
|
|
out := make([]artifactOut, len(artifacts))
|
|
for i, a := range artifacts {
|
|
out[i] = artifactOut{
|
|
Name: a.Name,
|
|
ContentType: a.ContentType,
|
|
Size: a.Size,
|
|
}
|
|
if a.Size <= 256*1024 && a.Data != nil {
|
|
out[i].Data = string(a.Data)
|
|
} else {
|
|
out[i].URL = fmt.Sprintf("/jobs/%s/artifacts/%s", jobID, a.Name)
|
|
}
|
|
}
|
|
|
|
b, _ := json.Marshal(out)
|
|
return json.RawMessage(b)
|
|
}
|
|
|
|
// ArtifactMeta holds artifact info for webhook formatting.
|
|
type ArtifactMeta struct {
|
|
Name string
|
|
ContentType string
|
|
Size int64
|
|
Data []byte
|
|
}
|