feat: add FOREMAN_KEEP_ALIVE config for worker model residency
Allow configuring how long the worker model stays resident on the Ollama
target after a request via FOREMAN_KEEP_ALIVE env var. Accepts Ollama
duration strings ("-1" forever, "0" unload, "15m", "1h", etc). Defaults
to "-1" (pin forever). The embedder warm-up is unaffected and always
uses keep_alive=-1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+2
-1
@@ -79,6 +79,7 @@ func runServe(logger *slog.Logger) error {
|
||||
"auth_enabled", cfg.Token != "",
|
||||
"max_attempts", cfg.MaxAttempts,
|
||||
"job_ttl", cfg.JobTTL,
|
||||
"keep_alive", cfg.KeepAlive,
|
||||
)
|
||||
|
||||
st, err := store.Open(cfg.DBPath)
|
||||
@@ -107,7 +108,7 @@ func runServe(logger *slog.Logger) error {
|
||||
|
||||
// Create the notifier and worker.
|
||||
notifier := worker.NewNotifier()
|
||||
w := worker.New(st, client, inventory, notifier, dispatcher, logger)
|
||||
w := worker.New(st, client, inventory, notifier, dispatcher, logger, cfg.KeepAlive)
|
||||
|
||||
// Start the worker loop in a goroutine.
|
||||
go w.Run(ctx)
|
||||
|
||||
Reference in New Issue
Block a user