foreman/internal/server/server_test.go

package server

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log/slog"
	"net/http"
	"net/http/httptest"
	"path/filepath"
	"strings"
	"sync"
	"sync/atomic"
	"testing"
	"time"

	"gitea.stevedudenhoeffer.com/steve/foreman/internal/config"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/ollama"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/store"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/webhook"
	"gitea.stevedudenhoeffer.com/steve/foreman/internal/worker"
)

// newTestServer creates a Server backed by a temp-dir SQLite store, a stub client,
// and a pre-populated inventory. It also starts a worker loop.
func newTestServer(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) {
	t.Helper()
	dbPath := filepath.Join(t.TempDir(), "test.db")
	st, err := store.Open(dbPath)
	if err != nil {
		t.Fatalf("store.Open: %v", err)
	}
	t.Cleanup(func() { st.Close() })

	logger := slog.New(slog.NewJSONHandler(io.Discard, nil))
	inv := ollama.NewModelInventory(client, logger)
	notifier := worker.NewNotifier()
	dispatcher := webhook.NewDispatcher("", logger)
	w := worker.New(st, client, inv, notifier, dispatcher, logger, "-1")
	srv := New(cfg, st, client, inv, notifier, w, dispatcher, logger)
	return srv, st
}

// newTestServerWithInventory creates a Server and pre-refreshes the inventory.
// Also starts a worker goroutine.
func newTestServerWithInventory(t *testing.T, cfg config.Config, client ollama.Client) (*Server, *store.Store) {
	t.Helper()
	srv, st := newTestServer(t, cfg, client)
	if err := srv.inventory.Refresh(context.Background()); err != nil {
		t.Fatalf("inventory.Refresh: %v", err)
	}

	// Start the worker loop so chat requests complete.
	ctx, cancel := context.WithCancel(context.Background())
	t.Cleanup(cancel)
	go srv.workerRef.Run(ctx)

	return srv, st
}

func TestHealthz_OK(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps:   &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
	}

	var resp healthResponse
	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
		t.Fatalf("decode response: %v", err)
	}
	if resp.Status != "ok" {
		t.Errorf("status = %q, want %q", resp.Status, "ok")
	}
	if resp.Degraded {
		t.Error("degraded should be false")
	}
}

func TestHealthz_NoAuthRequired(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps:   &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
		Token:     "secret-token",
	}, stub)

	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Errorf("status = %d, want %d (healthz should bypass auth)", rec.Code, http.StatusOK)
	}
}

func TestAuth_RequiredWhenTokenSet(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps:   &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
		Token:     "secret-token",
	}, stub)

	tests := []struct {
		name string
		path string
		auth string
		want int
	}{
		{
			name: "no auth header",
			path: "/some-route",
			auth: "",
			want: http.StatusUnauthorized,
		},
		{
			name: "wrong token",
			path: "/some-route",
			auth: "Bearer wrong-token",
			want: http.StatusUnauthorized,
		},
		{
			name: "correct token",
			path: "/some-route",
			auth: "Bearer secret-token",
			// Route doesn't exist so we get 404, but auth passed.
			want: http.StatusNotFound,
		},
		{
			name: "invalid scheme",
			path: "/some-route",
			auth: "Basic dXNlcjpwYXNz",
			want: http.StatusUnauthorized,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			req := httptest.NewRequest(http.MethodGet, tt.path, nil)
			if tt.auth != "" {
				req.Header.Set("Authorization", tt.auth)
			}
			rec := httptest.NewRecorder()
			srv.Handler().ServeHTTP(rec, req)

			if rec.Code != tt.want {
				t.Errorf("status = %d, want %d", rec.Code, tt.want)
			}
		})
	}
}

func TestAuth_NotRequiredWhenNoToken(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps:   &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	req := httptest.NewRequest(http.MethodGet, "/some-route", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code == http.StatusUnauthorized {
		t.Error("should not require auth when no token is configured")
	}
}

func TestTags_ReturnsCachedModels(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{
				{Name: "qwen3:30b", Model: "qwen3:30b", Size: 19000000000},
				{Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000},
			},
		},
		ps: &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	req := httptest.NewRequest(http.MethodGet, "/api/tags", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
	}

	var resp ollama.TagsResponse
	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
		t.Fatalf("decode: %v", err)
	}
	if len(resp.Models) != 2 {
		t.Fatalf("got %d models, want 2", len(resp.Models))
	}
	if resp.Models[0].Name != "qwen3:30b" {
		t.Errorf("first model = %q, want %q", resp.Models[0].Name, "qwen3:30b")
	}
}

func TestPs_ReturnsCachedRunningModels(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps: &ollama.PsResponse{
			Models: []ollama.RunningModel{
				{Name: "nomic-embed-text", Model: "nomic-embed-text", Size: 300000000},
			},
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	req := httptest.NewRequest(http.MethodGet, "/api/ps", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
	}

	var resp ollama.PsResponse
	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
		t.Fatalf("decode: %v", err)
	}
	if len(resp.Models) != 1 {
		t.Fatalf("got %d models, want 1", len(resp.Models))
	}
}

func TestChat_UnknownModel404(t *testing.T) {
	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{
				{Name: "qwen3:30b"},
			},
		},
		ps: &ollama.PsResponse{},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	body := `{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}],"stream":false}`
	req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body))
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusNotFound {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusNotFound)
	}
}

func TestChat_NonStreaming(t *testing.T) {
	chatResp := ollama.ChatResponse{
		Model:   "qwen3:30b",
		Done:    true,
		Message: &ollama.Message{Role: "assistant", Content: "Hello!"},
	}

	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
		},
		ps: &ollama.PsResponse{},
		chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
			return &chatResp, nil, nil
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}`
	req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body))
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String())
	}

	ct := rec.Header().Get("Content-Type")
	if ct != "application/json" {
		t.Errorf("Content-Type = %q, want %q", ct, "application/json")
	}

	var got ollama.ChatResponse
	if err := json.NewDecoder(rec.Body).Decode(&got); err != nil {
		t.Fatalf("decode: %v", err)
	}
	if got.Message == nil || got.Message.Content != "Hello!" {
		t.Errorf("response content = %v, want Hello!", got.Message)
	}
}

func TestChat_Serialization(t *testing.T) {
	// Track concurrent requests at the stub.
	var inflight atomic.Int32
	var maxInflight atomic.Int32

	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
		},
		ps: &ollama.PsResponse{},
		chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
			cur := inflight.Add(1)
			defer inflight.Add(-1)
			for {
				old := maxInflight.Load()
				if cur <= old || maxInflight.CompareAndSwap(old, cur) {
					break
				}
			}
			// Simulate work.
			time.Sleep(50 * time.Millisecond)
			resp := &ollama.ChatResponse{Model: "qwen3:30b", Done: true, Message: &ollama.Message{Role: "assistant", Content: "ok"}}
			return resp, nil, nil
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	var wg sync.WaitGroup
	for i := 0; i < 3; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}`
			req := httptest.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(body))
			rec := httptest.NewRecorder()
			srv.Handler().ServeHTTP(rec, req)
			if rec.Code != http.StatusOK {
				t.Errorf("status = %d, want %d; body: %s", rec.Code, http.StatusOK, rec.Body.String())
			}
		}()
	}
	wg.Wait()

	if got := maxInflight.Load(); got > 1 {
		t.Errorf("max concurrent chat requests at target = %d, want 1 (worker should serialize)", got)
	}
}

func TestEmbed_ConcurrentBypassesGate(t *testing.T) {
	// Track concurrent embed requests.
	var inflight atomic.Int32
	var maxInflight atomic.Int32

	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
		},
		ps: &ollama.PsResponse{},
		rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) {
			cur := inflight.Add(1)
			defer inflight.Add(-1)
			for {
				old := maxInflight.Load()
				if cur <= old || maxInflight.CompareAndSwap(old, cur) {
					break
				}
			}
			// Simulate some work so concurrent requests overlap.
			time.Sleep(50 * time.Millisecond)
			resp := ollama.EmbedResponse{Model: "nomic-embed-text", Embeddings: [][]float64{{0.1, 0.2}}}
			b, _ := json.Marshal(resp)
			return newRawResponse(200, "application/json", b), nil
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	var wg sync.WaitGroup
	for i := 0; i < 5; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			body := `{"model":"nomic-embed-text","input":"test"}`
			req := httptest.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(body))
			rec := httptest.NewRecorder()
			srv.Handler().ServeHTTP(rec, req)
			if rec.Code != http.StatusOK {
				t.Errorf("embed status = %d, want %d", rec.Code, http.StatusOK)
			}
		}()
	}
	wg.Wait()

	if got := maxInflight.Load(); got < 2 {
		t.Errorf("max concurrent embed requests = %d, want >= 2 (embeds should run in parallel)", got)
	}
}

func TestEmbed_AlsoWorksOnEmbeddingsPath(t *testing.T) {
	embedResp := ollama.EmbedResponse{
		Model:      "nomic-embed-text",
		Embeddings: [][]float64{{0.1, 0.2, 0.3}},
	}
	respBytes, _ := json.Marshal(embedResp)

	stub := &stubClient{
		tags: &ollama.TagsResponse{},
		ps:   &ollama.PsResponse{},
		rawEmbedFunc: func(ctx context.Context, body []byte) (*http.Response, error) {
			return newRawResponse(200, "application/json", respBytes), nil
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	body := `{"model":"nomic-embed-text","input":"test"}`
	req := httptest.NewRequest(http.MethodPost, "/api/embeddings", strings.NewReader(body))
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
	}
}

func TestHealthz_DegradedFromInventory(t *testing.T) {
	stub := &stubClient{
		tagsErr: fmt.Errorf("connection refused"),
		ps:      &ollama.PsResponse{},
	}
	srv, _ := newTestServer(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	// Refresh will fail, setting degraded = true.
	srv.inventory.Refresh(context.Background())

	req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusOK {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
	}

	var resp healthResponse
	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
		t.Fatalf("decode: %v", err)
	}
	if !resp.Degraded {
		t.Error("expected degraded=true when inventory poll failed")
	}
}

func TestChat_ContextCancellation(t *testing.T) {
	// Chat function that blocks forever to simulate a slow worker.
	stub := &stubClient{
		tags: &ollama.TagsResponse{
			Models: []ollama.ModelInfo{{Name: "qwen3:30b"}},
		},
		ps: &ollama.PsResponse{},
		chatFunc: func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
			<-ctx.Done()
			return nil, nil, ctx.Err()
		},
	}
	srv, _ := newTestServerWithInventory(t, config.Config{
		OllamaURL: "http://localhost:11434",
	}, stub)

	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
	defer cancel()

	body := `{"model":"qwen3:30b","messages":[{"role":"user","content":"hi"}],"stream":false}`
	req := httptest.NewRequestWithContext(ctx, http.MethodPost, "/api/chat", strings.NewReader(body))
	rec := httptest.NewRecorder()
	srv.Handler().ServeHTTP(rec, req)

	if rec.Code != http.StatusServiceUnavailable {
		t.Fatalf("status = %d, want %d", rec.Code, http.StatusServiceUnavailable)
	}
}

// --- Stub client for testing ---

// stubClient implements ollama.Client for testing.
type stubClient struct {
	tags    *ollama.TagsResponse
	tagsErr error
	ps      *ollama.PsResponse
	psErr   error

	chatFunc    func(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error)
	rawChatResp *http.Response
	rawChatFunc func(ctx context.Context, body []byte) (*http.Response, error)

	rawEmbedResp *http.Response
	rawEmbedFunc func(ctx context.Context, body []byte) (*http.Response, error)
}

func (s *stubClient) Chat(ctx context.Context, req ollama.ChatRequest, stream bool) (*ollama.ChatResponse, <-chan ollama.ChatResponse, error) {
	if s.chatFunc != nil {
		return s.chatFunc(ctx, req, stream)
	}
	return nil, nil, fmt.Errorf("stubClient.Chat not implemented")
}

func (s *stubClient) Embed(ctx context.Context, req ollama.EmbedRequest) (*ollama.EmbedResponse, error) {
	return nil, fmt.Errorf("stubClient.Embed not implemented")
}

func (s *stubClient) Tags(ctx context.Context) (*ollama.TagsResponse, error) {
	if s.tagsErr != nil {
		return nil, s.tagsErr
	}
	return s.tags, nil
}

func (s *stubClient) Ps(ctx context.Context) (*ollama.PsResponse, error) {
	if s.psErr != nil {
		return nil, s.psErr
	}
	return s.ps, nil
}

func (s *stubClient) RawChat(ctx context.Context, body []byte) (*http.Response, error) {
	if s.rawChatFunc != nil {
		return s.rawChatFunc(ctx, body)
	}
	if s.rawChatResp != nil {
		return s.rawChatResp, nil
	}
	return nil, fmt.Errorf("stubClient.RawChat not configured")
}

func (s *stubClient) RawEmbed(ctx context.Context, body []byte) (*http.Response, error) {
	if s.rawEmbedFunc != nil {
		return s.rawEmbedFunc(ctx, body)
	}
	if s.rawEmbedResp != nil {
		return s.rawEmbedResp, nil
	}
	return nil, fmt.Errorf("stubClient.RawEmbed not configured")
}

// newRawResponse builds a minimal *http.Response for testing.
func newRawResponse(status int, contentType string, body []byte) *http.Response {
	return &http.Response{
		StatusCode: status,
		Header:     http.Header{"Content-Type": {contentType}},
		Body:       io.NopCloser(bytes.NewReader(body)),
	}
}