feat: add durable queue, single worker, and drain-by-model scheduling
Replace the Phase 2 in-flight chat gate (buffered channel) with a real SQLite-backed job queue and single worker loop. Every /api/chat request now creates a job row, blocks until the worker completes it, and returns the result transparently. Key changes: - internal/store: NextJob (drain-by-model ordering), IncrementAttempt, ResetInterruptedJobs, DeleteTerminalJobsBefore; busy_timeout pragma - internal/worker: single-threaded worker loop with Notifier for sync handler completion signaling; retry on ConnectionError, terminal fail on HTTPError; crash recovery resets interrupted jobs on startup - internal/webhook: dispatcher infrastructure for async webhook delivery - internal/server: chat handler rewritten to enqueue+wait; old chatGate removed; embeddings remain direct concurrent proxies (ADR-0013) - internal/config: FOREMAN_MAX_ATTEMPTS, FOREMAN_JOB_TTL Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,11 +19,13 @@ import (
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
|
||||
"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
|
||||
)
|
||||
|
||||
// newTestServer creates a Server backed by a temp-dir SQLite store, a stub client,
|
||||
// and a pre-populated inventory.
|
||||
func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) *Server {
|
||||
// and a pre-populated inventory. It also starts a worker loop.
|
||||
func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
st, err := store.Open(dbPath)
|
||||
@@ -32,19 +34,30 @@ func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) *Serve
|
||||
}
|
||||
t.Cleanup(func() { st.Close() })
|
||||
|
||||
logger := slog.Default()
|
||||
logger := slog.New(slog.NewJSONHandler(io.Discard, nil))
|
||||
inv := ollama.NewModelInventory(client, logger)
|
||||
return New(cfg, st, client, inv, logger)
|
||||
notifier := worker.NewNotifier()
|
||||
dispatcher := webhook.NewDispatcher("", logger)
|
||||
w := worker.New(st, client, inv, notifier, dispatcher, logger)
|
||||
srv := New(cfg, st, client, inv, notifier, w, dispatcher, logger)
|
||||
return srv, st
|
||||
}
|
||||
|
||||
// newTestServerWithInventory creates a Server and pre-refreshes the inventory.
|
||||
func newTestServerWithInventory(t *testing.T, cfg config.Config, client ollama.Client) *Server {
|
||||
// Also starts a worker goroutine.
|
||||
func newTestServerWithInventory(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) {
|
||||
t.Helper()
|
||||
srv := newTestServer(t, cfg, client)
|
||||
srv, st := newTestServer(t, cfg, client)
|
||||
if err := srv.inventory.Refresh(context.Background()); err != nil {
|
||||
t.Fatalf("inventory.Refresh: %v", err)
|
||||
}
|
||||
return srv
|
||||
|
||||
// Start the worker loop so chat requests complete.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
go srv.workerRef.Run(ctx)
|
||||
|
||||
return srv, st
|
||||
}
|
||||
|
||||
func TestHealthz_OK(t *testing.T) {
|
||||
@@ -52,7 +65,7 @@ func TestHealthz_OK(t *testing.T) {
|
||||
tags: &ollama.TagsResponse{},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -81,7 +94,7 @@ func TestHealthz_NoAuthRequired(t *testing.T) {
|
||||
tags: &ollama.TagsResponse{},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
Token: "secret-token",
|
||||
}, stub)
|
||||
@@ -100,7 +113,7 @@ func TestAuth_RequiredWhenTokenSet(t *testing.T) {
|
||||
tags: &ollama.TagsResponse{},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
Token: "secret-token",
|
||||
}, stub)
|
||||
@@ -159,7 +172,7 @@ func TestAuth_NotRequiredWhenNoToken(t *testing.T) {
|
||||
tags: &ollama.TagsResponse{},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -182,7 +195,7 @@ func TestTags_ReturnsCachedModels(t *testing.T) {
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -215,7 +228,7 @@ func TestPs_ReturnsCachedRunningModels(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -245,7 +258,7 @@ func TestChat_UnknownModel404(t *testing.T) {
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -265,16 +278,17 @@ func TestChat_NonStreaming(t *testing.T) {
|
||||
Done: true,
|
||||
Message: &ollama.Message{Role: "assistant", Content: "Hello!"},
|
||||
}
|
||||
respBytes, _ := json.Marshal(chatResp)
|
||||
|
||||
stub := &stubClient{
|
||||
tags: &ollama.TagsResponse{
|
||||
Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
rawChatResp: newRawResponse(200, "application/json", respBytes),
|
||||
ps: &ollama.PsResponse{},
|
||||
chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
|
||||
return &chatResp, nil, nil
|
||||
},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -284,7 +298,7 @@ func TestChat_NonStreaming(t *testing.T) {
|
||||
srv.Handler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
t.Fatalf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String())
|
||||
}
|
||||
|
||||
ct := rec.Header().Get("Content-Type")
|
||||
@@ -301,60 +315,6 @@ func TestChat_NonStreaming(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChat_Streaming(t *testing.T) {
|
||||
// Build NDJSON chunks.
|
||||
chunks := []ollama.ChatResponse{
|
||||
{Model: "qwen3:30b", Done: false, Message: &ollama.Message{Role: "assistant", Content: "Hel"}},
|
||||
{Model: "qwen3:30b", Done: false, Message: &ollama.Message{Role: "assistant", Content: "lo"}},
|
||||
{Model: "qwen3:30b", Done: true, DoneReason: "stop"},
|
||||
}
|
||||
var ndjson bytes.Buffer
|
||||
for _, c := range chunks {
|
||||
b, _ := json.Marshal(c)
|
||||
ndjson.Write(b)
|
||||
ndjson.WriteByte('\n')
|
||||
}
|
||||
|
||||
stub := &stubClient{
|
||||
tags: &ollama.TagsResponse{
|
||||
Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
rawChatResp: newRawResponse(200, "application/x-ndjson", ndjson.Bytes()),
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
srv.Handler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
ct := rec.Header().Get("Content-Type")
|
||||
if ct != "application/x-ndjson" {
|
||||
t.Errorf("Content-Type = %q, want %q", ct, "application/x-ndjson")
|
||||
}
|
||||
|
||||
// Verify chunks pass through faithfully.
|
||||
lines := strings.Split(strings.TrimSpace(rec.Body.String()), "\n")
|
||||
if len(lines) != 3 {
|
||||
t.Fatalf("got %d lines, want 3", len(lines))
|
||||
}
|
||||
|
||||
var last ollama.ChatResponse
|
||||
if err := json.Unmarshal([]byte(lines[2]), &last); err != nil {
|
||||
t.Fatalf("unmarshal last chunk: %v", err)
|
||||
}
|
||||
if !last.Done {
|
||||
t.Error("last chunk should have done=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestChat_Serialization(t *testing.T) {
|
||||
// Track concurrent requests at the stub.
|
||||
var inflight atomic.Int32
|
||||
@@ -365,7 +325,7 @@ func TestChat_Serialization(t *testing.T) {
|
||||
Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
rawChatFunc: func(ctx context.Context, body []byte) (*http.Response, error) {
|
||||
chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
|
||||
cur := inflight.Add(1)
|
||||
defer inflight.Add(-1)
|
||||
for {
|
||||
@@ -376,12 +336,11 @@ func TestChat_Serialization(t *testing.T) {
|
||||
}
|
||||
// Simulate work.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
resp := ollama.ChatResponse{Model: "qwen3:30b", Done: true}
|
||||
b, _ := json.Marshal(resp)
|
||||
return newRawResponse(200, "application/json", b), nil
|
||||
resp := &ollama.ChatResponse{Model: "qwen3:30b", Done: true, Message: &ollama.Message{Role: "assistant", Content: "ok"}}
|
||||
return resp, nil, nil
|
||||
},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -395,14 +354,14 @@ func TestChat_Serialization(t *testing.T) {
|
||||
rec := httptest.NewRecorder()
|
||||
srv.Handler().ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
t.Errorf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String())
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if got := maxInflight.Load(); got > 1 {
|
||||
t.Errorf("max concurrent chat requests at target = %d, want 1 (gate should serialize)", got)
|
||||
t.Errorf("max concurrent chat requests at target = %d, want 1 (worker should serialize)", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -432,7 +391,7 @@ func TestEmbed_ConcurrentBypassesGate(t *testing.T) {
|
||||
return newRawResponse(200, "application/json", b), nil
|
||||
},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -471,7 +430,7 @@ func TestEmbed_AlsoWorksOnEmbeddingsPath(t *testing.T) {
|
||||
return newRawResponse(200, "application/json", respBytes), nil
|
||||
},
|
||||
}
|
||||
srv := newTestServerWithInventory(t, config.Config{
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -490,7 +449,7 @@ func TestHealthz_DegradedFromInventory(t *testing.T) {
|
||||
tagsErr: fmt.Errorf("connection refused"),
|
||||
ps: &ollama.PsResponse{},
|
||||
}
|
||||
srv := newTestServer(t, config.Config{
|
||||
srv, _ := newTestServer(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
@@ -514,6 +473,35 @@ func TestHealthz_DegradedFromInventory(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChat_ContextCancellation(t *testing.T) {
|
||||
// Chat function that blocks forever to simulate a slow worker.
|
||||
stub := &stubClient{
|
||||
tags: &ollama.TagsResponse{
|
||||
Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
|
||||
},
|
||||
ps: &ollama.PsResponse{},
|
||||
chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
|
||||
<-ctx.Done()
|
||||
return nil, nil, ctx.Err()
|
||||
},
|
||||
}
|
||||
srv, _ := newTestServerWithInventory(t, config.Config{
|
||||
OllamaURL: "http://localhost:11434",
|
||||
}, stub)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}`
|
||||
req := httptest.NewRequestWithContext(ctx, http.MethodPost, "/api/chat", strings.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
srv.Handler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("status = %d, want %d", rec.Code, http.StatusServiceUnavailable)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Stub client for testing ---
|
||||
|
||||
// stubClient implements ollama.Client for testing.
|
||||
@@ -523,6 +511,7 @@ type stubClient struct {
|
||||
ps *ollama.PsResponse
|
||||
psErr error
|
||||
|
||||
chatFunc func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error)
|
||||
rawChatResp *http.Response
|
||||
rawChatFunc func(ctx context.Context, body []byte) (*http.Response, error)
|
||||
|
||||
@@ -531,6 +520,9 @@ type stubClient struct {
|
||||
}
|
||||
|
||||
func (s *stubClient) Chat(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
|
||||
if s.chatFunc != nil {
|
||||
return s.chatFunc(ctx, req, stream)
|
||||
}
|
||||
return nil, nil, fmt.Errorf("stubClient.Chat not implemented")
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user