feat: add FOREMAN_KEEP_ALIVE config for worker model residency
Allow configuring how long the worker model stays resident on the Ollama
target after a request via FOREMAN_KEEP_ALIVE env var. Accepts Ollama
duration strings ("-1" forever, "0" unload, "15m", "1h", etc). Defaults
to "-1" (pin forever). The embedder warm-up is unaffected and always
uses keep_alive=-1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -40,7 +40,7 @@ func newTestWorker(t *testing.T, client ollama.Client) (*Worker, *store.Store, *
|
||||
inv := ollama.NewModelInventory(client, logger)
|
||||
notifier := NewNotifier()
|
||||
dispatcher := webhook.NewDispatcher("", logger)
|
||||
w := New(st, client, inv, notifier, dispatcher, logger)
|
||||
w := New(st, client, inv, notifier, dispatcher, logger, "-1")
|
||||
return w, st, notifier
|
||||
}
|
||||
|
||||
@@ -755,6 +755,74 @@ func TestStore_ResetInterruptedJobs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeKeepAlive(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{"-1", "-1"},
|
||||
{"0", "0"},
|
||||
{"3600", "3600"},
|
||||
{"15m", `"15m"`},
|
||||
{"1h", `"1h"`},
|
||||
{"", "-1"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
got := string(encodeKeepAlive(tt.input))
|
||||
if got != tt.want {
|
||||
t.Errorf("encodeKeepAlive(%q) = %s, want %s", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorker_SetsKeepAliveOnChatRequest(t *testing.T) {
|
||||
client := &stubOllamaClient{}
|
||||
st := openTestDB(t)
|
||||
logger := slog.New(slog.NewJSONHandler(io.Discard, nil))
|
||||
inv := ollama.NewModelInventory(client, logger)
|
||||
notifier := NewNotifier()
|
||||
dispatcher := webhook.NewDispatcher("", logger)
|
||||
|
||||
// Use "15m" to verify non-default keep_alive propagates to outbound requests.
|
||||
w := New(st, client, inv, notifier, dispatcher, logger, "15m")
|
||||
|
||||
job := store.Job{
|
||||
ID: "01KEEPALIVE",
|
||||
Model: "qwen3:30b",
|
||||
Payload: json.RawMessage(`{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}]}`),
|
||||
MaxAttempts: 3,
|
||||
}
|
||||
if _, err := st.CreateJob(job); err != nil {
|
||||
t.Fatalf("CreateJob: %v", err)
|
||||
}
|
||||
|
||||
waitCh := notifier.Register("01KEEPALIVE")
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go w.Run(ctx)
|
||||
|
||||
select {
|
||||
case <-waitCh:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for job to complete")
|
||||
}
|
||||
|
||||
// Verify the chat request had keep_alive set to "15m".
|
||||
client.mu.Lock()
|
||||
defer client.mu.Unlock()
|
||||
if len(client.chatCalls) != 1 {
|
||||
t.Fatalf("expected 1 chat call, got %d", len(client.chatCalls))
|
||||
}
|
||||
gotKA := string(client.chatCalls[0].KeepAlive)
|
||||
if gotKA != `"15m"` {
|
||||
t.Errorf("keep_alive = %s, want %s", gotKA, `"15m"`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_DeleteTerminalJobsBefore(t *testing.T) {
|
||||
st := openTestDB(t)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user