feat: add FOREMAN_KEEP_ALIVE config for worker model residency
CI / Tidy (push) Successful in 9m42s
CI / Build & Test (push) Successful in 10m28s
CI / Publish Docker Image (push) Successful in 21s

Allow configuring how long the worker model stays resident on the Ollama
target after a request via FOREMAN_KEEP_ALIVE env var. Accepts Ollama
duration strings ("-1" forever, "0" unload, "15m", "1h", etc). Defaults
to "-1" (pin forever). The embedder warm-up is unaffected and always
uses keep_alive=-1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 21:29:37 -04:00
parent c16e1af752
commit 7cd7eaff8b
9 changed files with 133 additions and 6 deletions
+36
View File
@@ -16,6 +16,7 @@ import (
"errors"
"fmt"
"log/slog"
"strconv"
"sync"
"time"
@@ -104,6 +105,11 @@ type Worker struct {
dispatcher *webhook.Dispatcher
logger *slog.Logger
// keepAlive is the JSON-encoded keep_alive value sent in outbound chat requests
// to control how long the worker model stays resident on the target. Derived from
// FOREMAN_KEEP_ALIVE config; does not affect the embedder.
keepAlive json.RawMessage
// wake is signaled when a new job is enqueued.
wake chan struct{}
}
@@ -120,6 +126,7 @@ func New(
notifier *Notifier,
dispatcher *webhook.Dispatcher,
logger *slog.Logger,
keepAlive string,
) *Worker {
return &Worker{
store: st,
@@ -128,10 +135,35 @@ func New(
notifier: notifier,
dispatcher: dispatcher,
logger: logger,
keepAlive: encodeKeepAlive(keepAlive),
wake: make(chan struct{}, 1),
}
}
// encodeKeepAlive converts a FOREMAN_KEEP_ALIVE config string to a json.RawMessage
// suitable for the Ollama ChatRequest KeepAlive field.
//
// Why: Ollama's keep_alive field accepts either a JSON number (seconds, or -1 for
// forever) or a JSON string duration ("15m", "1h"). Pure-numeric values and "-1"
// are encoded as JSON numbers; everything else is encoded as a JSON string.
// What: returns a json.RawMessage containing the appropriate JSON representation.
// Test: assert "-1" -> `-1`, "0" -> `0`, "15m" -> `"15m"`, "3600" -> `3600`.
func encodeKeepAlive(val string) json.RawMessage {
if val == "" {
val = "-1"
}
// If the value parses as an integer, emit it as a JSON number.
// This covers "-1", "0", "3600", etc.
if _, err := strconv.Atoi(val); err == nil {
return json.RawMessage(val)
}
// Otherwise, emit it as a JSON string (e.g. "15m", "1h").
b, _ := json.Marshal(val)
return json.RawMessage(b)
}
// Wake signals the worker that a new job may be available. Non-blocking.
//
// Why: the HTTP handlers signal the worker to check for new work immediately
@@ -241,6 +273,10 @@ func (w *Worker) executeJob(ctx context.Context, job store.Job) {
streamFalse := false
chatReq.Stream = &streamFalse
// Override keep_alive with the configured value so the worker model stays
// resident for the desired duration. The embedder is pinned separately.
chatReq.KeepAlive = w.keepAlive
// Execute the chat request.
resp, _, err := w.client.Chat(ctx, chatReq, false)
if err != nil {
+69 -1
View File
@@ -40,7 +40,7 @@ func newTestWorker(t *testing.T, client ollama.Client) (*Worker, *store.Store, *
inv := ollama.NewModelInventory(client, logger)
notifier := NewNotifier()
dispatcher := webhook.NewDispatcher("", logger)
w := New(st, client, inv, notifier, dispatcher, logger)
w := New(st, client, inv, notifier, dispatcher, logger, "-1")
return w, st, notifier
}
@@ -755,6 +755,74 @@ func TestStore_ResetInterruptedJobs(t *testing.T) {
}
}
func TestEncodeKeepAlive(t *testing.T) {
tests := []struct {
input string
want string
}{
{"-1", "-1"},
{"0", "0"},
{"3600", "3600"},
{"15m", `"15m"`},
{"1h", `"1h"`},
{"", "-1"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got := string(encodeKeepAlive(tt.input))
if got != tt.want {
t.Errorf("encodeKeepAlive(%q) = %s, want %s", tt.input, got, tt.want)
}
})
}
}
func TestWorker_SetsKeepAliveOnChatRequest(t *testing.T) {
client := &stubOllamaClient{}
st := openTestDB(t)
logger := slog.New(slog.NewJSONHandler(io.Discard, nil))
inv := ollama.NewModelInventory(client, logger)
notifier := NewNotifier()
dispatcher := webhook.NewDispatcher("", logger)
// Use "15m" to verify non-default keep_alive propagates to outbound requests.
w := New(st, client, inv, notifier, dispatcher, logger, "15m")
job := store.Job{
ID: "01KEEPALIVE",
Model: "qwen3:30b",
Payload: json.RawMessage(`{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}]}`),
MaxAttempts: 3,
}
if _, err := st.CreateJob(job); err != nil {
t.Fatalf("CreateJob: %v", err)
}
waitCh := notifier.Register("01KEEPALIVE")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go w.Run(ctx)
select {
case <-waitCh:
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for job to complete")
}
// Verify the chat request had keep_alive set to "15m".
client.mu.Lock()
defer client.mu.Unlock()
if len(client.chatCalls) != 1 {
t.Fatalf("expected 1 chat call, got %d", len(client.chatCalls))
}
gotKA := string(client.chatCalls[0].KeepAlive)
if gotKA != `"15m"` {
t.Errorf("keep_alive = %s, want %s", gotKA, `"15m"`)
}
}
func TestStore_DeleteTerminalJobsBefore(t *testing.T) {
st := openTestDB(t)