feat: add FOREMAN_KEEP_ALIVE config for worker model residency
Allow configuring how long the worker model stays resident on the Ollama
target after a request via FOREMAN_KEEP_ALIVE env var. Accepts Ollama
duration strings ("-1" forever, "0" unload, "15m", "1h", etc). Defaults
to "-1" (pin forever). The embedder warm-up is unaffected and always
uses keep_alive=-1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -104,6 +105,11 @@ type Worker struct {
|
||||
dispatcher *webhook.Dispatcher
|
||||
logger *slog.Logger
|
||||
|
||||
// keepAlive is the JSON-encoded keep_alive value sent in outbound chat requests
|
||||
// to control how long the worker model stays resident on the target. Derived from
|
||||
// FOREMAN_KEEP_ALIVE config; does not affect the embedder.
|
||||
keepAlive json.RawMessage
|
||||
|
||||
// wake is signaled when a new job is enqueued.
|
||||
wake chan struct{}
|
||||
}
|
||||
@@ -120,6 +126,7 @@ func New(
|
||||
notifier *Notifier,
|
||||
dispatcher *webhook.Dispatcher,
|
||||
logger *slog.Logger,
|
||||
keepAlive string,
|
||||
) *Worker {
|
||||
return &Worker{
|
||||
store: st,
|
||||
@@ -128,10 +135,35 @@ func New(
|
||||
notifier: notifier,
|
||||
dispatcher: dispatcher,
|
||||
logger: logger,
|
||||
keepAlive: encodeKeepAlive(keepAlive),
|
||||
wake: make(chan struct{}, 1),
|
||||
}
|
||||
}
|
||||
|
||||
// encodeKeepAlive converts a FOREMAN_KEEP_ALIVE config string to a json.RawMessage
|
||||
// suitable for the Ollama ChatRequest KeepAlive field.
|
||||
//
|
||||
// Why: Ollama's keep_alive field accepts either a JSON number (seconds, or -1 for
|
||||
// forever) or a JSON string duration ("15m", "1h"). Pure-numeric values and "-1"
|
||||
// are encoded as JSON numbers; everything else is encoded as a JSON string.
|
||||
// What: returns a json.RawMessage containing the appropriate JSON representation.
|
||||
// Test: assert "-1" -> `-1`, "0" -> `0`, "15m" -> `"15m"`, "3600" -> `3600`.
|
||||
func encodeKeepAlive(val string) json.RawMessage {
|
||||
if val == "" {
|
||||
val = "-1"
|
||||
}
|
||||
|
||||
// If the value parses as an integer, emit it as a JSON number.
|
||||
// This covers "-1", "0", "3600", etc.
|
||||
if _, err := strconv.Atoi(val); err == nil {
|
||||
return json.RawMessage(val)
|
||||
}
|
||||
|
||||
// Otherwise, emit it as a JSON string (e.g. "15m", "1h").
|
||||
b, _ := json.Marshal(val)
|
||||
return json.RawMessage(b)
|
||||
}
|
||||
|
||||
// Wake signals the worker that a new job may be available. Non-blocking.
|
||||
//
|
||||
// Why: the HTTP handlers signal the worker to check for new work immediately
|
||||
@@ -241,6 +273,10 @@ func (w *Worker) executeJob(ctx context.Context, job store.Job) {
|
||||
streamFalse := false
|
||||
chatReq.Stream = &streamFalse
|
||||
|
||||
// Override keep_alive with the configured value so the worker model stays
|
||||
// resident for the desired duration. The embedder is pinned separately.
|
||||
chatReq.KeepAlive = w.keepAlive
|
||||
|
||||
// Execute the chat request.
|
||||
resp, _, err := w.client.Chat(ctx, chatReq, false)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user