foreman/cmd/foreman/main.go

// Package main is the entry point for the foreman daemon.
//
// Why: foreman is a single binary with subcommands; the main package handles
// argument dispatch and wiring.
// What: parses subcommands (serve, submit, jobs, ps) and runs the selected one.
// Test: build and run with --help; test individual packages separately.
package main

import (
	"context"
	"encoding/json"
	"fmt"
	"log/slog"
	"os"
	"os/signal"
	"syscall"
	"time"

	"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/server"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
)

func main() {
	logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
		Level: slog.LevelInfo,
	}))

	cmd := "serve"
	if len(os.Args) > 1 {
		cmd = os.Args[1]
	}

	switch cmd {
	case "serve":
		if err := runServe(logger); err != nil {
			logger.Error("serve failed", "error", err)
			os.Exit(1)
		}
	case "submit":
		fmt.Fprintln(os.Stderr, "submit: not yet implemented")
		os.Exit(1)
	case "jobs":
		fmt.Fprintln(os.Stderr, "jobs: not yet implemented")
		os.Exit(1)
	case "ps":
		fmt.Fprintln(os.Stderr, "ps: not yet implemented")
		os.Exit(1)
	default:
		fmt.Fprintf(os.Stderr, "unknown command: %s\nusage: foreman [serve|submit|jobs|ps]\n", cmd)
		os.Exit(1)
	}
}

// runServe loads configuration, opens the store, creates the Ollama client,
// starts the model poller, warms the embedder, creates the worker, webhook
// dispatcher, and starts the HTTP server.
//
// Why: the serve subcommand is the daemon's primary mode of operation.
// What: wires config -> store -> ollama client -> poller -> worker -> server and
// blocks on ListenAndServe. Graceful shutdown on SIGINT/SIGTERM stops the worker,
// poller, and pruner.
// Test: tested indirectly via integration tests; each component is unit tested.
func runServe(logger *slog.Logger) error {
	cfg, err := config.Load()
	if err != nil {
		return fmt.Errorf("load config: %w", err)
	}

	logger.Info("configuration loaded",
		"addr", cfg.Addr,
		"ollama_url", cfg.OllamaURL,
		"db_path", cfg.DBPath,
		"poll_interval", cfg.PollInterval,
		"embed_model", cfg.EmbedModel,
		"auth_enabled", cfg.Token != "",
		"max_attempts", cfg.MaxAttempts,
		"job_ttl", cfg.JobTTL,
	)

	st, err := store.Open(cfg.DBPath)
	if err != nil {
		return fmt.Errorf("open store: %w", err)
	}
	defer st.Close()

	// Create the Ollama client.
	client := ollama.NewClient(cfg.OllamaURL, cfg.OllamaToken)

	// Create the model inventory and start the poller.
	inventory := ollama.NewModelInventory(client, logger)
	ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
	defer cancel()

	go inventory.Start(ctx, cfg.PollInterval)

	// Warm the embedder model if configured (non-blocking).
	if cfg.EmbedModel != "" {
		warmEmbedder(ctx, client, cfg.EmbedModel, logger)
	}

	// Create the webhook dispatcher.
	dispatcher := webhook.NewDispatcher(cfg.WebhookSecret, logger)

	// Create the notifier and worker.
	notifier := worker.NewNotifier()
	w := worker.New(st, client, inventory, notifier, dispatcher, logger)

	// Start the worker loop in a goroutine.
	go w.Run(ctx)

	// Start the TTL pruner in a goroutine.
	go runPruner(ctx, st, cfg.JobTTL, logger)

	srv := server.New(cfg, st, client, inventory, notifier, w, dispatcher, logger)
	return srv.ListenAndServe()
}

// warmEmbedder sends a trivial embed request with keep_alive=-1 to pin the
// embedder in slot 1 (ADR-0013). Does not block startup on failure.
//
// Why: the embedder must be always-resident so embedding requests are fast and
// never trigger a swap (ADR-0013).
// What: issues /api/embed with keep_alive:-1 to load and pin the model.
// Test: start foreman with FOREMAN_EMBED_MODEL set, verify the warmup call fires.
func warmEmbedder(ctx context.Context, client ollama.Client, model string, logger *slog.Logger) {
	logger.Info("warming embedder model", "model", model)

	req := ollama.EmbedRequest{
		Model:     model,
		Input:     json.RawMessage(`"warmup"`),
		KeepAlive: json.RawMessage(`-1`),
	}

	_, err := client.Embed(ctx, req)
	if err != nil {
		logger.Warn("embedder warmup failed (non-fatal)", "model", model, "error", err)
		return
	}
	logger.Info("embedder warmed successfully", "model", model)
}

// runPruner periodically deletes terminal jobs older than the configured TTL.
//
// Why: unbounded storage growth must be prevented (ADR-0008).
// What: runs a ticker that calls DeleteTerminalJobsBefore with the TTL cutoff.
// Test: create old terminal jobs, run pruner, verify they are deleted.
func runPruner(ctx context.Context, st *store.Store, ttl time.Duration, logger *slog.Logger) {
	if ttl <= 0 {
		ttl = 24 * time.Hour
	}

	// Prune every ttl/4, minimum 1 minute.
	interval := ttl / 4
	if interval < time.Minute {
		interval = time.Minute
	}

	ticker := time.NewTicker(interval)
	defer ticker.Stop()

	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			cutoff := time.Now().UTC().Add(-ttl)
			n, err := st.DeleteTerminalJobsBefore(cutoff)
			if err != nil {
				logger.Error("pruner failed", "error", err)
				continue
			}
			if n > 0 {
				logger.Info("pruner deleted old jobs", "count", n, "cutoff", cutoff)
			}
		}
	}
}