foreman/internal/server/server.go

// Package server provides the HTTP API for the foreman daemon.
//
// Why: foreman exposes a native Ollama-compatible API plus async job endpoints;
// centralizing routing and middleware here keeps cmd/foreman thin.
// What: creates a stdlib net/http server with health checks, optional bearer-token
// auth, Ollama passthrough (chat, tags, ps, embed), /jobs async surface, and
// artifact serving.
// Test: start the server with httptest, hit /healthz, verify 200; set a token,
// verify 401 without it; test Ollama passthrough routes and /jobs lifecycle.
package server

import (
	"crypto/rand"
	"encoding/json"
	"fmt"
	"io"
	"log/slog"
	"net/http"
	"strings"
	"time"

	"github.com/oklog/ulid/v2"

	"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
)

// Server holds the HTTP server and its dependencies.
type Server struct {
	cfg        config.Config
	store      *store.Store
	client     ollama.Client
	inventory  *ollama.ModelInventory
	notifier   *worker.Notifier
	workerRef  *worker.Worker
	dispatcher *webhook.Dispatcher
	mux        *http.ServeMux
	logger     *slog.Logger
}

// New creates a new Server with the given configuration, store, Ollama client,
// model inventory, notifier, worker, and webhook dispatcher. The mux is populated
// with all routes.
//
// Why: dependency injection makes the server testable and extensible.
// What: wires config, store, client, inventory, notifier, worker, dispatcher, and
// logger into the server, registers all routes.
// Test: create with New, use httptest to exercise routes.
func New(
	cfg config.Config,
	st *store.Store,
	client ollama.Client,
	inv *ollama.ModelInventory,
	notifier *worker.Notifier,
	w *worker.Worker,
	dispatcher *webhook.Dispatcher,
	logger *slog.Logger,
) *Server {
	s := &Server{
		cfg:        cfg,
		store:      st,
		client:     client,
		inventory:  inv,
		notifier:   notifier,
		workerRef:  w,
		dispatcher: dispatcher,
		mux:        http.NewServeMux(),
		logger:     logger,
	}
	s.routes()
	return s
}

// Handler returns the server's http.Handler, with auth middleware applied.
//
// Why: allows httptest usage in tests without starting a real listener.
// What: wraps the mux with optional bearer-token middleware.
// Test: call Handler(), use httptest.NewServer, exercise endpoints.
func (s *Server) Handler() http.Handler {
	var h http.Handler = s.mux
	if s.cfg.Token != "" {
		h = s.authMiddleware(h)
	}
	return h
}

// ListenAndServe starts the HTTP server on the configured address.
func (s *Server) ListenAndServe() error {
	s.logger.Info("starting server", "addr", s.cfg.Addr)
	return http.ListenAndServe(s.cfg.Addr, s.Handler())
}

// routes registers all HTTP routes on the mux.
func (s *Server) routes() {
	s.mux.HandleFunc("GET /healthz", s.handleHealthz)
	s.mux.HandleFunc("GET /api/tags", s.handleTags)
	s.mux.HandleFunc("GET /api/ps", s.handlePs)
	s.mux.HandleFunc("POST /api/chat", s.handleChat)
	s.mux.HandleFunc("POST /api/embed", s.handleEmbed)
	s.mux.HandleFunc("POST /api/embeddings", s.handleEmbed)
	s.registerJobRoutes()
}

// healthResponse is the JSON shape returned by /healthz.
type healthResponse struct {
	Status   string `json:"status"`
	Degraded bool   `json:"degraded"`
}

// handleHealthz returns the daemon's health status, including the poller's
// degraded flag so probes and operators can see target connectivity.
//
// Why: load balancers and operators need a single endpoint for health.
// What: returns 200 with a JSON body including the degraded flag from the poller.
// Test: set up a server with a degraded inventory, assert degraded=true in response.
func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) {
	degraded := false
	if s.inventory != nil {
		degraded = s.inventory.Degraded()
	}
	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(http.StatusOK)
	json.NewEncoder(w).Encode(healthResponse{
		Status:   "ok",
		Degraded: degraded,
	})
}

// handleTags returns the cached model inventory as Ollama-format JSON.
//
// Why: foreman's /api/tags must be indistinguishable from Ollama's /api/tags.
// What: returns the poller's cached TagsResponse.
// Test: populate the inventory, GET /api/tags, assert the response matches.
func (s *Server) handleTags(w http.ResponseWriter, r *http.Request) {
	models := s.inventory.Models()
	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(ollama.TagsResponse{Models: models})
}

// handlePs returns the cached running models from the poller.
//
// Why: foreman's /api/ps lets callers see what's resident on the target.
// What: returns the poller's cached PsResponse.
// Test: populate the inventory with running models, GET /api/ps, assert match.
func (s *Server) handlePs(w http.ResponseWriter, r *http.Request) {
	running := s.inventory.ResidentModels()
	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(ollama.PsResponse{Models: running})
}

// handleEmbed proxies embedding requests directly and concurrently to the target.
// These bypass any serialization gate per ADR-0013.
//
// Why: embeddings hit the always-resident embedder and must not wait behind chat jobs.
// What: reads the request body, proxies to the target, and returns the response.
// Test: send concurrent embed requests, assert they all complete without serialization.
func (s *Server) handleEmbed(w http.ResponseWriter, r *http.Request) {
	body, err := io.ReadAll(r.Body)
	if err != nil {
		http.Error(w, `{"error":"failed to read request body"}`, http.StatusBadRequest)
		return
	}

	resp, err := s.client.RawEmbed(r.Context(), body)
	if err != nil {
		s.logger.Error("embed proxy failed", "error", err)
		if httpErr, ok := err.(*ollama.HTTPError); ok {
			http.Error(w, httpErr.Body, httpErr.StatusCode)
			return
		}
		http.Error(w, `{"error":"target unreachable"}`, http.StatusBadGateway)
		return
	}
	defer resp.Body.Close()

	// Copy response headers and body.
	for k, vv := range resp.Header {
		for _, v := range vv {
			w.Header().Add(k, v)
		}
	}
	if w.Header().Get("Content-Type") == "" {
		w.Header().Set("Content-Type", "application/json")
	}
	w.WriteHeader(resp.StatusCode)
	io.Copy(w, resp.Body)
}

// handleChat is the synchronous passthrough for /api/chat. It enqueues a job in
// the SQLite queue and blocks until the worker completes it, then returns the
// result as if it came directly from Ollama.
//
// Why: the sync passthrough is foreman's primary API surface for go-llm (ADR-0003).
// The response blocks until done so the caller gets a transparent Ollama experience.
// What: validates model, creates a job, registers a completion waiter, wakes the
// worker, and blocks until done or context cancellation.
// Test: verify model validation (404 on unknown), serialization (jobs execute one
// at a time), and that the HTTP response matches the Ollama chat response.
func (s *Server) handleChat(w http.ResponseWriter, r *http.Request) {
	body, err := io.ReadAll(r.Body)
	if err != nil {
		http.Error(w, `{"error":"failed to read request body"}`, http.StatusBadRequest)
		return
	}

	// Parse just enough to validate the model.
	var partial struct {
		Model string `json:"model"`
	}
	if err := json.Unmarshal(body, &partial); err != nil {
		http.Error(w, `{"error":"invalid JSON body"}`, http.StatusBadRequest)
		return
	}
	if partial.Model == "" {
		http.Error(w, `{"error":"model is required"}`, http.StatusBadRequest)
		return
	}

	// Validate the model exists. One re-poll on miss (ADR-0007).
	if !s.inventory.HasModel(partial.Model) {
		if err := s.inventory.Refresh(r.Context()); err != nil {
			s.logger.Warn("model re-poll failed", "error", err)
		}
		if !s.inventory.HasModel(partial.Model) {
			http.Error(w, `{"error":"model not found"}`, http.StatusNotFound)
			return
		}
	}

	// Generate a job ID and enqueue.
	jobID := ulid.MustNew(ulid.Timestamp(time.Now()), rand.Reader).String()

	maxAttempts := s.cfg.MaxAttempts
	if maxAttempts == 0 {
		maxAttempts = 3
	}

	job := store.Job{
		ID:          jobID,
		Model:       partial.Model,
		Payload:     json.RawMessage(body),
		MaxAttempts: maxAttempts,
	}

	if _, err := s.store.CreateJob(job); err != nil {
		s.logger.Error("failed to enqueue chat job", "error", err, "job_id", jobID, "model", partial.Model)
		http.Error(w, fmt.Sprintf(`{"error":"failed to enqueue job: %s"}`, err), http.StatusInternalServerError)
		return
	}

	// Register a completion waiter before waking the worker.
	waitCh := s.notifier.Register(jobID)

	// Wake the worker.
	if s.workerRef != nil {
		s.workerRef.Wake()
	}

	// Block until the job completes or the request is cancelled.
	select {
	case <-waitCh:
		// Job completed — get the result.
		state, result, errMsg, ok := s.notifier.Result(jobID)
		if !ok {
			// Should not happen, but fall back to DB.
			j, err := s.store.GetJob(jobID)
			if err != nil {
				http.Error(w, `{"error":"job lost"}`, http.StatusInternalServerError)
				return
			}
			state = j.State
			result = j.Result
			errMsg = j.Error
		}

		if state == store.JobStateFailed {
			msg := "job failed"
			if errMsg != nil {
				msg = *errMsg
			}
			http.Error(w, fmt.Sprintf(`{"error":%q}`, msg), http.StatusBadGateway)
			return
		}

		// Return the result as a direct Ollama response.
		w.Header().Set("Content-Type", "application/json")
		w.WriteHeader(http.StatusOK)
		w.Write(result)

	case <-r.Context().Done():
		http.Error(w, `{"error":"request cancelled while waiting"}`, http.StatusServiceUnavailable)
	}
}

// authMiddleware validates the Authorization: Bearer <token> header on all
// requests except /healthz. Returns 401 if the token is missing or wrong.
func (s *Server) authMiddleware(next http.Handler) http.Handler {
	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		// /healthz is always public so load balancers and probes work without auth.
		if r.URL.Path == "/healthz" {
			next.ServeHTTP(w, r)
			return
		}

		auth := r.Header.Get("Authorization")
		if auth == "" {
			http.Error(w, `{"error":"missing authorization header"}`, http.StatusUnauthorized)
			return
		}

		const prefix = "Bearer "
		if !strings.HasPrefix(auth, prefix) {
			http.Error(w, `{"error":"invalid authorization header"}`, http.StatusUnauthorized)
			return
		}

		token := strings.TrimPrefix(auth, prefix)
		if token != s.cfg.Token {
			http.Error(w, `{"error":"invalid token"}`, http.StatusUnauthorized)
			return
		}

		next.ServeHTTP(w, r)
	})
}