feat: add durable queue, single worker, and drain-by-model scheduling

Replace the Phase 2 in-flight chat gate (buffered channel) with a real
SQLite-backed job queue and single worker loop. Every /api/chat request
now creates a job row, blocks until the worker completes it, and returns
the result transparently.

Key changes:
- internal/store: NextJob (drain-by-model ordering), IncrementAttempt,
  ResetInterruptedJobs, DeleteTerminalJobsBefore; busy_timeout pragma
- internal/worker: single-threaded worker loop with Notifier for sync
  handler completion signaling; retry on ConnectionError, terminal fail
  on HTTPError; crash recovery resets interrupted jobs on startup
- internal/webhook: dispatcher infrastructure for async webhook delivery
- internal/server: chat handler rewritten to enqueue+wait; old chatGate
  removed; embeddings remain direct concurrent proxies (ADR-0013)
- internal/config: FOREMAN_MAX_ATTEMPTS, FOREMAN_JOB_TTL

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 18:29:32 -04:00
parent 27f196d333
commit 6fd050855a
11 changed files with 1830 additions and 183 deletions
+61 -4
View File
@@ -14,11 +14,14 @@ import (
"os"
"os/signal"
"syscall"
"time"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/server"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
)
func main() {
@@ -53,11 +56,13 @@ func main() {
}
// runServe loads configuration, opens the store, creates the Ollama client,
// starts the model poller, warms the embedder, and starts the HTTP server.
// starts the model poller, warms the embedder, creates the worker, webhook
// dispatcher, and starts the HTTP server.
//
// Why: the serve subcommand is the daemon's primary mode of operation.
// What: wires config -> store -> ollama client -> poller -> server and blocks on
// ListenAndServe. Graceful shutdown on SIGINT/SIGTERM cancels the poller.
// What: wires config -> store -> ollama client -> poller -> worker -> server and
// blocks on ListenAndServe. Graceful shutdown on SIGINT/SIGTERM stops the worker,
// poller, and pruner.
// Test: tested indirectly via integration tests; each component is unit tested.
func runServe(logger *slog.Logger) error {
cfg, err := config.Load()
@@ -72,6 +77,8 @@ func runServe(logger *slog.Logger) error {
"poll_interval", cfg.PollInterval,
"embed_model", cfg.EmbedModel,
"auth_enabled", cfg.Token != "",
"max_attempts", cfg.MaxAttempts,
"job_ttl", cfg.JobTTL,
)
st, err := store.Open(cfg.DBPath)
@@ -95,7 +102,20 @@ func runServe(logger *slog.Logger) error {
warmEmbedder(ctx, client, cfg.EmbedModel, logger)
}
srv := server.New(cfg, st, client, inventory, logger)
// Create the webhook dispatcher.
dispatcher := webhook.NewDispatcher(cfg.WebhookSecret, logger)
// Create the notifier and worker.
notifier := worker.NewNotifier()
w := worker.New(st, client, inventory, notifier, dispatcher, logger)
// Start the worker loop in a goroutine.
go w.Run(ctx)
// Start the TTL pruner in a goroutine.
go runPruner(ctx, st, cfg.JobTTL, logger)
srv := server.New(cfg, st, client, inventory, notifier, w, dispatcher, logger)
return srv.ListenAndServe()
}
@@ -122,3 +142,40 @@ func warmEmbedder(ctx context.Context, client ollama.Client, model string, logge
}
logger.Info("embedder warmed successfully", "model", model)
}
// runPruner periodically deletes terminal jobs older than the configured TTL.
//
// Why: unbounded storage growth must be prevented (ADR-0008).
// What: runs a ticker that calls DeleteTerminalJobsBefore with the TTL cutoff.
// Test: create old terminal jobs, run pruner, verify they are deleted.
func runPruner(ctx context.Context, st *store.Store, ttl time.Duration, logger *slog.Logger) {
if ttl <= 0 {
ttl = 24 * time.Hour
}
// Prune every ttl/4, minimum 1 minute.
interval := ttl / 4
if interval < time.Minute {
interval = time.Minute
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
cutoff := time.Now().UTC().Add(-ttl)
n, err := st.DeleteTerminalJobsBefore(cutoff)
if err != nil {
logger.Error("pruner failed", "error", err)
continue
}
if n > 0 {
logger.Info("pruner deleted old jobs", "count", n, "cutoff", cutoff)
}
}
}
}