package server import ( "bytes" "context" "encoding/json" "fmt" "io" "log/slog" "net/http" "net/http/httptest" "path/filepath" "strings" "sync" "sync/atomic" "testing" "time" "gitea.stevedudenhoeffer.com/steve/foreman/internal/config" "gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama" "gitea.stevedudenhoeffer.com/steve/foreman/internal/store" ) // newTestServer creates a Server backed by a temp-dir SQLite store, a stub client, // and a pre-populated inventory. func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) *Server { t.Helper() dbPath := filepath.Join(t.TempDir(), "test.db") st, err := store.Open(dbPath) if err != nil { t.Fatalf("store.Open: %v", err) } t.Cleanup(func() { st.Close() }) logger := slog.Default() inv := ollama.NewModelInventory(client, logger) return New(cfg, st, client, inv, logger) } // newTestServerWithInventory creates a Server and pre-refreshes the inventory. func newTestServerWithInventory(t *testing.T, cfg config.Config, client ollama.Client) *Server { t.Helper() srv := newTestServer(t, cfg, client) if err := srv.inventory.Refresh(context.Background()); err != nil { t.Fatalf("inventory.Refresh: %v", err) } return srv } func TestHealthz_OK(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp healthResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode response: %v", err) } if resp.Status != "ok" { t.Errorf("status = %q, want %q", resp.Status, "ok") } if resp.Degraded { t.Error("degraded should be false") } } func TestHealthz_NoAuthRequired(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", Token: "secret-token", }, stub) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("status = %d, want %d (healthz should bypass auth)", rec.Code, http.StatusOK) } } func TestAuth_RequiredWhenTokenSet(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", Token: "secret-token", }, stub) tests := []struct { name string path string auth string want int }{ { name: "no auth header", path: "/some-route", auth: "", want: http.StatusUnauthorized, }, { name: "wrong token", path: "/some-route", auth: "Bearer wrong-token", want: http.StatusUnauthorized, }, { name: "correct token", path: "/some-route", auth: "Bearer secret-token", // Route doesn't exist so we get 404, but auth passed. want: http.StatusNotFound, }, { name: "invalid scheme", path: "/some-route", auth: "Basic dXNlcjpwYXNz", want: http.StatusUnauthorized, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { req := httptest.NewRequest(http.MethodGet, tt.path, nil) if tt.auth != "" { req.Header.Set("Authorization", tt.auth) } rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != tt.want { t.Errorf("status = %d, want %d", rec.Code, tt.want) } }) } } func TestAuth_NotRequiredWhenNoToken(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/some-route", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code == http.StatusUnauthorized { t.Error("should not require auth when no token is configured") } } func TestTags_ReturnsCachedModels(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{ {Name: "qwen3:30b", Model: "qwen3:30b", Size: 19000000000}, {Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000}, }, }, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/api/tags", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp ollama.TagsResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if len(resp.Models) != 2 { t.Fatalf("got %d models, want 2", len(resp.Models)) } if resp.Models[0].Name != "qwen3:30b" { t.Errorf("first model = %q, want %q", resp.Models[0].Name, "qwen3:30b") } } func TestPs_ReturnsCachedRunningModels(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{ Models: []ollama.RunningModel{ {Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000}, }, }, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) req := httptest.NewRequest(http.MethodGet, "/api/ps", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp ollama.PsResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if len(resp.Models) != 1 { t.Fatalf("got %d models, want 1", len(resp.Models)) } } func TestChat_UnknownModel404(t *testing.T) { stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{ {Name: "qwen3:30b"}, }, }, ps: &ollama.PsResponse{}, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusNotFound { t.Fatalf("status = %d, want %d", rec.Code, http.StatusNotFound) } } func TestChat_NonStreaming(t *testing.T) { chatResp := ollama.ChatResponse{ Model: "qwen3:30b", Done: true, Message: &ollama.Message{Role: "assistant", Content: "Hello!"}, } respBytes, _ := json.Marshal(chatResp) stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, rawChatResp: newRawResponse(200, "application/json", respBytes), } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } ct := rec.Header().Get("Content-Type") if ct != "application/json" { t.Errorf("Content-Type = %q, want %q", ct, "application/json") } var got ollama.ChatResponse if err := json.NewDecoder(rec.Body).Decode(&got); err != nil { t.Fatalf("decode: %v", err) } if got.Message == nil || got.Message.Content != "Hello!" { t.Errorf("response content = %v, want Hello!", got.Message) } } func TestChat_Streaming(t *testing.T) { // Build NDJSON chunks. chunks := []ollama.ChatResponse{ {Model: "qwen3:30b", Done: false, Message: &ollama.Message{Role: "assistant", Content: "Hel"}}, {Model: "qwen3:30b", Done: false, Message: &ollama.Message{Role: "assistant", Content: "lo"}}, {Model: "qwen3:30b", Done: true, DoneReason: "stop"}, } var ndjson bytes.Buffer for _, c := range chunks { b, _ := json.Marshal(c) ndjson.Write(b) ndjson.WriteByte('\n') } stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, rawChatResp: newRawResponse(200, "application/x-ndjson", ndjson.Bytes()), } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}]}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } ct := rec.Header().Get("Content-Type") if ct != "application/x-ndjson" { t.Errorf("Content-Type = %q, want %q", ct, "application/x-ndjson") } // Verify chunks pass through faithfully. lines := strings.Split(strings.TrimSpace(rec.Body.String()), "\n") if len(lines) != 3 { t.Fatalf("got %d lines, want 3", len(lines)) } var last ollama.ChatResponse if err := json.Unmarshal([]byte(lines[2]), &last); err != nil { t.Fatalf("unmarshal last chunk: %v", err) } if !last.Done { t.Error("last chunk should have done=true") } } func TestChat_Serialization(t *testing.T) { // Track concurrent requests at the stub. var inflight atomic.Int32 var maxInflight atomic.Int32 stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, rawChatFunc: func(ctx context.Context, body []byte) (*http.Response, error) { cur := inflight.Add(1) defer inflight.Add(-1) for { old := maxInflight.Load() if cur <= old || maxInflight.CompareAndSwap(old, cur) { break } } // Simulate work. time.Sleep(50 * time.Millisecond) resp := ollama.ChatResponse{Model: "qwen3:30b", Done: true} b, _ := json.Marshal(resp) return newRawResponse(200, "application/json", b), nil }, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) var wg sync.WaitGroup for i := 0; i < 3; i++ { wg.Add(1) go func() { defer wg.Done() body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("status = %d, want %d", rec.Code, http.StatusOK) } }() } wg.Wait() if got := maxInflight.Load(); got > 1 { t.Errorf("max concurrent chat requests at target = %d, want 1 (gate should serialize)", got) } } func TestEmbed_ConcurrentBypassesGate(t *testing.T) { // Track concurrent embed requests. var inflight atomic.Int32 var maxInflight atomic.Int32 stub := &stubClient{ tags: &ollama.TagsResponse{ Models: []ollama.ModelInfo{{Name: "qwen3:30b"}}, }, ps: &ollama.PsResponse{}, rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) { cur := inflight.Add(1) defer inflight.Add(-1) for { old := maxInflight.Load() if cur <= old || maxInflight.CompareAndSwap(old, cur) { break } } // Simulate some work so concurrent requests overlap. time.Sleep(50 * time.Millisecond) resp := ollama.EmbedResponse{Model: "nomic-embed-text", Embeddings: [][]float64{{0.1, 0.2}}} b, _ := json.Marshal(resp) return newRawResponse(200, "application/json", b), nil }, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) var wg sync.WaitGroup for i := 0; i < 5; i++ { wg.Add(1) go func() { defer wg.Done() body := `{"model":"nomic-embed-text","input":"test"}` req := httptest.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Errorf("embed status = %d, want %d", rec.Code, http.StatusOK) } }() } wg.Wait() if got := maxInflight.Load(); got < 2 { t.Errorf("max concurrent embed requests = %d, want >= 2 (embeds should run in parallel)", got) } } func TestEmbed_AlsoWorksOnEmbeddingsPath(t *testing.T) { embedResp := ollama.EmbedResponse{ Model: "nomic-embed-text", Embeddings: [][]float64{{0.1, 0.2, 0.3}}, } respBytes, _ := json.Marshal(embedResp) stub := &stubClient{ tags: &ollama.TagsResponse{}, ps: &ollama.PsResponse{}, rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) { return newRawResponse(200, "application/json", respBytes), nil }, } srv := newTestServerWithInventory(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) body := `{"model":"nomic-embed-text","input":"test"}` req := httptest.NewRequest(http.MethodPost, "/api/embeddings", strings.NewReader(body)) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } } func TestHealthz_DegradedFromInventory(t *testing.T) { stub := &stubClient{ tagsErr: fmt.Errorf("connection refused"), ps: &ollama.PsResponse{}, } srv := newTestServer(t, config.Config{ OllamaURL: "http://localhost:11434", }, stub) // Refresh will fail, setting degraded = true. srv.inventory.Refresh(context.Background()) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() srv.Handler().ServeHTTP(rec, req) if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) } var resp healthResponse if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { t.Fatalf("decode: %v", err) } if !resp.Degraded { t.Error("expected degraded=true when inventory poll failed") } } // --- Stub client for testing --- // stubClient implements ollama.Client for testing. type stubClient struct { tags *ollama.TagsResponse tagsErr error ps *ollama.PsResponse psErr error rawChatResp *http.Response rawChatFunc func(ctx context.Context, body []byte) (*http.Response, error) rawEmbedResp *http.Response rawEmbedFunc func(ctx context.Context, body []byte) (*http.Response, error) } func (s *stubClient) Chat(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) { return nil, nil, fmt.Errorf("stubClient.Chat not implemented") } func (s *stubClient) Embed(ctx context.Context, req ollama.EmbedRequest) (*ollama.EmbedResponse, error) { return nil, fmt.Errorf("stubClient.Embed not implemented") } func (s *stubClient) Tags(ctx context.Context) (*ollama.TagsResponse, error) { if s.tagsErr != nil { return nil, s.tagsErr } return s.tags, nil } func (s *stubClient) Ps(ctx context.Context) (*ollama.PsResponse, error) { if s.psErr != nil { return nil, s.psErr } return s.ps, nil } func (s *stubClient) RawChat(ctx context.Context, body []byte) (*http.Response, error) { if s.rawChatFunc != nil { return s.rawChatFunc(ctx, body) } if s.rawChatResp != nil { return s.rawChatResp, nil } return nil, fmt.Errorf("stubClient.RawChat not configured") } func (s *stubClient) RawEmbed(ctx context.Context, body []byte) (*http.Response, error) { if s.rawEmbedFunc != nil { return s.rawEmbedFunc(ctx, body) } if s.rawEmbedResp != nil { return s.rawEmbedResp, nil } return nil, fmt.Errorf("stubClient.RawEmbed not configured") } // newRawResponse builds a minimal *http.Response for testing. func newRawResponse(status int, contentType string, body []byte) *http.Response { return &http.Response{ StatusCode: status, Header: http.Header{"Content-Type": {contentType}}, Body: io.NopCloser(bytes.NewReader(body)), } }