6fd050855a
Replace the Phase 2 in-flight chat gate (buffered channel) with a real SQLite-backed job queue and single worker loop. Every /api/chat request now creates a job row, blocks until the worker completes it, and returns the result transparently. Key changes: - internal/store: NextJob (drain-by-model ordering), IncrementAttempt, ResetInterruptedJobs, DeleteTerminalJobsBefore; busy_timeout pragma - internal/worker: single-threaded worker loop with Notifier for sync handler completion signaling; retry on ConnectionError, terminal fail on HTTPError; crash recovery resets interrupted jobs on startup - internal/webhook: dispatcher infrastructure for async webhook delivery - internal/server: chat handler rewritten to enqueue+wait; old chatGate removed; embeddings remain direct concurrent proxies (ADR-0013) - internal/config: FOREMAN_MAX_ATTEMPTS, FOREMAN_JOB_TTL Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
182 lines
5.3 KiB
Go
182 lines
5.3 KiB
Go
// Package main is the entry point for the foreman daemon.
|
|
//
|
|
// Why: foreman is a single binary with subcommands; the main package handles
|
|
// argument dispatch and wiring.
|
|
// What: parses subcommands (serve, submit, jobs, ps) and runs the selected one.
|
|
// Test: build and run with --help; test individual packages separately.
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/server"
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
|
|
"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
|
|
)
|
|
|
|
func main() {
|
|
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
|
|
Level: slog.LevelInfo,
|
|
}))
|
|
|
|
cmd := "serve"
|
|
if len(os.Args) > 1 {
|
|
cmd = os.Args[1]
|
|
}
|
|
|
|
switch cmd {
|
|
case "serve":
|
|
if err := runServe(logger); err != nil {
|
|
logger.Error("serve failed", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
case "submit":
|
|
fmt.Fprintln(os.Stderr, "submit: not yet implemented")
|
|
os.Exit(1)
|
|
case "jobs":
|
|
fmt.Fprintln(os.Stderr, "jobs: not yet implemented")
|
|
os.Exit(1)
|
|
case "ps":
|
|
fmt.Fprintln(os.Stderr, "ps: not yet implemented")
|
|
os.Exit(1)
|
|
default:
|
|
fmt.Fprintf(os.Stderr, "unknown command: %s\nusage: foreman [serve|submit|jobs|ps]\n", cmd)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// runServe loads configuration, opens the store, creates the Ollama client,
|
|
// starts the model poller, warms the embedder, creates the worker, webhook
|
|
// dispatcher, and starts the HTTP server.
|
|
//
|
|
// Why: the serve subcommand is the daemon's primary mode of operation.
|
|
// What: wires config -> store -> ollama client -> poller -> worker -> server and
|
|
// blocks on ListenAndServe. Graceful shutdown on SIGINT/SIGTERM stops the worker,
|
|
// poller, and pruner.
|
|
// Test: tested indirectly via integration tests; each component is unit tested.
|
|
func runServe(logger *slog.Logger) error {
|
|
cfg, err := config.Load()
|
|
if err != nil {
|
|
return fmt.Errorf("load config: %w", err)
|
|
}
|
|
|
|
logger.Info("configuration loaded",
|
|
"addr", cfg.Addr,
|
|
"ollama_url", cfg.OllamaURL,
|
|
"db_path", cfg.DBPath,
|
|
"poll_interval", cfg.PollInterval,
|
|
"embed_model", cfg.EmbedModel,
|
|
"auth_enabled", cfg.Token != "",
|
|
"max_attempts", cfg.MaxAttempts,
|
|
"job_ttl", cfg.JobTTL,
|
|
)
|
|
|
|
st, err := store.Open(cfg.DBPath)
|
|
if err != nil {
|
|
return fmt.Errorf("open store: %w", err)
|
|
}
|
|
defer st.Close()
|
|
|
|
// Create the Ollama client.
|
|
client := ollama.NewClient(cfg.OllamaURL, cfg.OllamaToken)
|
|
|
|
// Create the model inventory and start the poller.
|
|
inventory := ollama.NewModelInventory(client, logger)
|
|
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
|
defer cancel()
|
|
|
|
go inventory.Start(ctx, cfg.PollInterval)
|
|
|
|
// Warm the embedder model if configured (non-blocking).
|
|
if cfg.EmbedModel != "" {
|
|
warmEmbedder(ctx, client, cfg.EmbedModel, logger)
|
|
}
|
|
|
|
// Create the webhook dispatcher.
|
|
dispatcher := webhook.NewDispatcher(cfg.WebhookSecret, logger)
|
|
|
|
// Create the notifier and worker.
|
|
notifier := worker.NewNotifier()
|
|
w := worker.New(st, client, inventory, notifier, dispatcher, logger)
|
|
|
|
// Start the worker loop in a goroutine.
|
|
go w.Run(ctx)
|
|
|
|
// Start the TTL pruner in a goroutine.
|
|
go runPruner(ctx, st, cfg.JobTTL, logger)
|
|
|
|
srv := server.New(cfg, st, client, inventory, notifier, w, dispatcher, logger)
|
|
return srv.ListenAndServe()
|
|
}
|
|
|
|
// warmEmbedder sends a trivial embed request with keep_alive=-1 to pin the
|
|
// embedder in slot 1 (ADR-0013). Does not block startup on failure.
|
|
//
|
|
// Why: the embedder must be always-resident so embedding requests are fast and
|
|
// never trigger a swap (ADR-0013).
|
|
// What: issues /api/embed with keep_alive:-1 to load and pin the model.
|
|
// Test: start foreman with FOREMAN_EMBED_MODEL set, verify the warmup call fires.
|
|
func warmEmbedder(ctx context.Context, client ollama.Client, model string, logger *slog.Logger) {
|
|
logger.Info("warming embedder model", "model", model)
|
|
|
|
req := ollama.EmbedRequest{
|
|
Model: model,
|
|
Input: json.RawMessage(`"warmup"`),
|
|
KeepAlive: json.RawMessage(`-1`),
|
|
}
|
|
|
|
_, err := client.Embed(ctx, req)
|
|
if err != nil {
|
|
logger.Warn("embedder warmup failed (non-fatal)", "model", model, "error", err)
|
|
return
|
|
}
|
|
logger.Info("embedder warmed successfully", "model", model)
|
|
}
|
|
|
|
// runPruner periodically deletes terminal jobs older than the configured TTL.
|
|
//
|
|
// Why: unbounded storage growth must be prevented (ADR-0008).
|
|
// What: runs a ticker that calls DeleteTerminalJobsBefore with the TTL cutoff.
|
|
// Test: create old terminal jobs, run pruner, verify they are deleted.
|
|
func runPruner(ctx context.Context, st *store.Store, ttl time.Duration, logger *slog.Logger) {
|
|
if ttl <= 0 {
|
|
ttl = 24 * time.Hour
|
|
}
|
|
|
|
// Prune every ttl/4, minimum 1 minute.
|
|
interval := ttl / 4
|
|
if interval < time.Minute {
|
|
interval = time.Minute
|
|
}
|
|
|
|
ticker := time.NewTicker(interval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
cutoff := time.Now().UTC().Add(-ttl)
|
|
n, err := st.DeleteTerminalJobsBefore(cutoff)
|
|
if err != nil {
|
|
logger.Error("pruner failed", "error", err)
|
|
continue
|
|
}
|
|
if n > 0 {
|
|
logger.Info("pruner deleted old jobs", "count", n, "cutoff", cutoff)
|
|
}
|
|
}
|
|
}
|
|
}
|