package server import ( "bytes" "context" "encoding/json" "fmt" "io" "log/slog" "net/http" "net/http/httptest" "path/filepath" "strings" "sync" "sync/atomic" "testing" "time" "gitea.stevedudenhoeffer.com/steve/foreman/internal/config" "gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama" "gitea.stevedudenhoeffer.com/steve/foreman/internal/store" "gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook" "gitea.stevedudenhoeffer.com/steve/foreman/internal/worker" ) // newTestServer creates a Server backed by a temp-dir SQLite store, a stub client, // and a pre-populated inventory. It also starts a worker loop. func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) { t.Helper() dbPath := filepath.Join(t.TempDir(), "test.db") st, err := store.Open(dbPath) if err != nil { t.Fatalf("store.Open: %v", err) } t.Cleanup(func() { st.Close() }) logger := slog.New(slog.NewJSONHandler(io.Discard, nil)) inv := ollama.NewModelInventory(client, logger) notifier := worker.NewNotifier() dispatcher := webhook.NewDispatcher("", logger) w := worker.New(st, client, inv, notifier, dispatcher, logger, "-1") srv := New(cfg, st, client, inv, notifier, w, dispatcher, logger) return srv, st } // newTestServerWithInventory creates a Server and pre-refreshes the inventory. // Also starts a worker goroutine. func newTestServerWithInventory(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) { t.Helper() srv, st := newTestServer(t, cfg, client) if err := srv.inventory.Refresh(context.Background()); err != nil { t.Fatalf("inventory.Refresh: %v", err) } // Start the worker loop so chat requests complete. ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) go srv.workerRef.Run(ctx) return srv, st } func TestHealthz_OK(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp healthResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode response: %v", err) } if resp.Status != "ok" { t.Errorf("status = %q, want %q", resp.Status, "ok") } if resp.Degraded { t.Error("degraded should be false") } } func TestHealthz_NoAuthRequired(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", Token: "secret-token", }, stub) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("status = %d, want %d (healthz should bypass auth)", rec.Code, http.StatusOK) } } func TestAuth_RequiredWhenTokenSet(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", Token: "secret-token", }, stub) tests := []struct { name string path string auth string want int }{ { name: "no auth header", path: "/some-route", auth: "", want: http.StatusUnauthorized, }, { name: "wrong token", path: "/some-route", auth: "Bearer wrong-token", want: http.StatusUnauthorized, }, { name: "correct token", path: "/some-route", auth: "Bearer secret-token", // Route doesn't exist so we get 404, but auth passed. want: http.StatusNotFound, }, { name: "invalid scheme", path: "/some-route", auth: "Basic dXNlcjpwYXNz", want: http.StatusUnauthorized, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { req := httptest.NewRequest(http.MethodGet, tt.path, nil) if tt.auth != "" { req.Header.Set("Authorization", tt.auth) } rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != tt.want { t.Errorf("status = %d, want %d", rec.Code, tt.want) } }) } } func TestAuth_NotRequiredWhenNoToken(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/some-route", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code == http.StatusUnauthorized { t.Error("should not require auth when no token is configured") } } func TestTags_ReturnsCachedModels(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{ {Name: "qwen3:30b", Model: "qwen3:30b", Size: 19000000000}, {Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000}, }, }, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/api/tags", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp ollama.TagsResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if len(resp.Models) != 2 { t.Fatalf("got %d models, want 2", len(resp.Models)) } if resp.Models[0].Name != "qwen3:30b" { t.Errorf("first model = %q, want %q", resp.Models[0].Name, "qwen3:30b") } } func TestPs_ReturnsCachedRunningModels(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{ Models: []ollama.RunningModel{ {Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000}, }, }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/api/ps", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp ollama.PsResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if len(resp.Models) != 1 { t.Fatalf("got %d models, want 1", len(resp.Models)) } } func TestChat_UnknownModel404(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{ {Name: "qwen3:30b"}, }, }, ps: &ollama.PsResponse{}, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusNotFound { t.Fatalf("status = %d, want %d", rec.Code, http.StatusNotFound) } } func TestChat_NonStreaming(t *testing.T) { chatResp := ollama.ChatResponse{ Model: "qwen3:30b", Done: true, Message: &ollama.Message{Role: "assistant", Content: "Hello!"}, } stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) { return &chatResp, nil, nil }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String()) } ct := rec.Header().Get("Content-Type") if ct != "application/json" { t.Errorf("Content-Type = %q, want %q", ct, "application/json") } var got ollama.ChatResponse if err := json.NewDecoder(rec.Body).Decode(&got); err != nil { t.Fatalf("decode: %v", err) } if got.Message == nil || got.Message.Content != "Hello!" { t.Errorf("response content = %v, want Hello!", got.Message) } } func TestChat_Serialization(t *testing.T) { // Track concurrent requests at the stub. var inflight atomic.Int32 var maxInflight atomic.Int32 stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) { cur := inflight.Add(1) defer inflight.Add(-1) for { old := maxInflight.Load() if cur <= old || maxInflight.CompareAndSwap(old, cur) { break } } // Simulate work. time.Sleep(50 * time.Millisecond) resp := &ollama.ChatResponse{Model: "qwen3:30b", Done: true, Message: &ollama.Message{Role: "assistant", Content: "ok"}} return resp, nil, nil }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) var wg sync.WaitGroup for i := 0; i < 3; i++ { wg.Add(1) go func() { defer wg.Done() body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String()) } }() } wg.Wait() if got := maxInflight.Load(); got > 1 { t.Errorf("max concurrent chat requests at target = %d, want 1 (worker should serialize)", got) } } func TestEmbed_ConcurrentBypassesGate(t *testing.T) { // Track concurrent embed requests. var inflight atomic.Int32 var maxInflight atomic.Int32 stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) { cur := inflight.Add(1) defer inflight.Add(-1) for { old := maxInflight.Load() if cur <= old || maxInflight.CompareAndSwap(old, cur) { break } } // Simulate some work so concurrent requests overlap. time.Sleep(50 * time.Millisecond) resp := ollama.EmbedResponse{Model: "nomic-embed-text", Embeddings: [][]float64{{0.1, 0.2}}} b, _ := json.Marshal(resp) return newRawResponse(200, "application/json", b), nil }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) var wg sync.WaitGroup for i := 0; i < 5; i++ { wg.Add(1) go func() { defer wg.Done() body := `{"model":"nomic-embed-text","input":"test"}` req := httptest.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("embed status = %d, want %d", rec.Code, http.StatusOK) } }() } wg.Wait() if got := maxInflight.Load(); got < 2 { t.Errorf("max concurrent embed requests = %d, want >= 2 (embeds should run in parallel)", got) } } func TestEmbed_AlsoWorksOnEmbeddingsPath(t *testing.T) { embedResp := ollama.EmbedResponse{ Model: "nomic-embed-text", Embeddings: [][]float64{{0.1, 0.2, 0.3}}, } respBytes, _ := json.Marshal(embedResp) stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) { return newRawResponse(200, "application/json", respBytes), nil }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"nomic-embed-text","input":"test"}` req := httptest.NewRequest(http.MethodPost, "/api/embeddings", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } } func TestHealthz_DegradedFromInventory(t *testing.T) { stub := &stubClient{ tagsErr: fmt.Errorf("connection refused"), ps: &ollama.PsResponse{}, } srv, _ := newTestServer(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) // Refresh will fail, setting degraded = true. srv.inventory.Refresh(context.Background()) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp healthResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if !resp.Degraded { t.Error("expected degraded=true when inventory poll failed") } } func TestChat_ContextCancellation(t *testing.T) { // Chat function that blocks forever to simulate a slow worker. stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) { <-ctx.Done() return nil, nil, ctx.Err() }, } srv, _ := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) defer cancel() body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequestWithContext(ctx, http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusServiceUnavailable { t.Fatalf("status = %d, want %d", rec.Code, http.StatusServiceUnavailable) } } // --- Stub client for testing --- // stubClient implements ollama.Client for testing. type stubClient struct { tags *ollama.TagsResponse tagsErr error ps *ollama.PsResponse psErr error chatFunc func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) rawChatResp *http.Response rawChatFunc func(ctx context.Context, body []byte) (*http.Response, error) rawEmbedResp *http.Response rawEmbedFunc func(ctx context.Context, body []byte) (*http.Response, error) } func (s *stubClient) Chat(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) { if s.chatFunc != nil { return s.chatFunc(ctx, req, stream) } return nil, nil, fmt.Errorf("stubClient.Chat not implemented") } func (s *stubClient) Embed(ctx context.Context, req ollama.EmbedRequest) (*ollama.EmbedResponse, error) { return nil, fmt.Errorf("stubClient.Embed not implemented") } func (s *stubClient) Tags(ctx context.Context) (*ollama.TagsResponse, error) { if s.tagsErr != nil { return nil, s.tagsErr } return s.tags, nil } func (s *stubClient) Ps(ctx context.Context) (*ollama.PsResponse, error) { if s.psErr != nil { return nil, s.psErr } return s.ps, nil } func (s *stubClient) RawChat(ctx context.Context, body []byte) (*http.Response, error) { if s.rawChatFunc != nil { return s.rawChatFunc(ctx, body) } if s.rawChatResp != nil { return s.rawChatResp, nil } return nil, fmt.Errorf("stubClient.RawChat not configured") } func (s *stubClient) RawEmbed(ctx context.Context, body []byte) (*http.Response, error) { if s.rawEmbedFunc != nil { return s.rawEmbedFunc(ctx, body) } if s.rawEmbedResp != nil { return s.rawEmbedResp, nil } return nil, fmt.Errorf("stubClient.RawEmbed not configured") } // newRawResponse builds a minimal *http.Response for testing. func newRawResponse(status int, contentType string, body []byte) *http.Response { return &http.Response{ StatusCode: status, Header: http.Header{"Content-Type": {contentType}}, Body: io.NopCloser(bytes.NewReader(body)), } }