feat: add Ollama target client, model poller, and native passthrough
Phase 2 of foreman: the daemon now acts as a transparent Ollama proxy. - internal/ollama: Client interface and HTTP implementation for chat (streaming + non-streaming), embed, tags, ps with auth forwarding, NDJSON streaming via bufio.Scanner, and connection vs HTTP error classification via custom error types. - internal/ollama: ModelInventory with background poller for /api/tags and /api/ps, degraded mode on target unreachable with model retention, automatic recovery on reconnect. - internal/server: Passthrough routes (/api/chat, /api/tags, /api/ps, /api/embed, /api/embeddings) with model validation, chat serialization gate (capacity-1 channel), concurrent embedding bypass (ADR-0013), NDJSON streaming with per-chunk flush, and degraded health reporting. - cmd/foreman: Full serve wiring with Ollama client, poller goroutine, embedder warmup (keep_alive:-1), and signal-based shutdown. The Mac is now usable as a go-llm target through foreman. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+49
-3
@@ -7,11 +7,16 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/server"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
|
||||
)
|
||||
@@ -47,10 +52,12 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// runServe loads configuration, opens the store, and starts the HTTP server.
|
||||
// runServe loads configuration, opens the store, creates the Ollama client,
|
||||
// starts the model poller, warms the embedder, and starts the HTTP server.
|
||||
//
|
||||
// Why: the serve subcommand is the daemon's primary mode of operation.
|
||||
// What: wires config -> store -> server and blocks on ListenAndServe.
|
||||
// What: wires config -> store -> ollama client -> poller -> server and blocks on
|
||||
// ListenAndServe. Graceful shutdown on SIGINT/SIGTERM cancels the poller.
|
||||
// Test: tested indirectly via integration tests; each component is unit tested.
|
||||
func runServe(logger *slog.Logger) error {
|
||||
cfg, err := config.Load()
|
||||
@@ -73,6 +80,45 @@ func runServe(logger *slog.Logger) error {
|
||||
}
|
||||
defer st.Close()
|
||||
|
||||
srv := server.New(cfg, st, logger)
|
||||
// Create the Ollama client.
|
||||
client := ollama.NewClient(cfg.OllamaURL, cfg.OllamaToken)
|
||||
|
||||
// Create the model inventory and start the poller.
|
||||
inventory := ollama.NewModelInventory(client, logger)
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
go inventory.Start(ctx, cfg.PollInterval)
|
||||
|
||||
// Warm the embedder model if configured (non-blocking).
|
||||
if cfg.EmbedModel != "" {
|
||||
warmEmbedder(ctx, client, cfg.EmbedModel, logger)
|
||||
}
|
||||
|
||||
srv := server.New(cfg, st, client, inventory, logger)
|
||||
return srv.ListenAndServe()
|
||||
}
|
||||
|
||||
// warmEmbedder sends a trivial embed request with keep_alive=-1 to pin the
|
||||
// embedder in slot 1 (ADR-0013). Does not block startup on failure.
|
||||
//
|
||||
// Why: the embedder must be always-resident so embedding requests are fast and
|
||||
// never trigger a swap (ADR-0013).
|
||||
// What: issues /api/embed with keep_alive:-1 to load and pin the model.
|
||||
// Test: start foreman with FOREMAN_EMBED_MODEL set, verify the warmup call fires.
|
||||
func warmEmbedder(ctx context.Context, client ollama.Client, model string, logger *slog.Logger) {
|
||||
logger.Info("warming embedder model", "model", model)
|
||||
|
||||
req := ollama.EmbedRequest{
|
||||
Model: model,
|
||||
Input: json.RawMessage(`"warmup"`),
|
||||
KeepAlive: json.RawMessage(`-1`),
|
||||
}
|
||||
|
||||
_, err := client.Embed(ctx, req)
|
||||
if err != nil {
|
||||
logger.Warn("embedder warmup failed (non-fatal)", "model", model, "error", err)
|
||||
return
|
||||
}
|
||||
logger.Info("embedder warmed successfully", "model", model)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user