Add different timeout scenarios to Process.checkHealthEndpoint #276 (#278 )

- add a TCP connection timeout of 500ms - increase HTTP client timeout to 5000ms In this new behaviour the upstream has 500ms to accept a tcp connection and 5000ms to respond to the HTTP request.
add /completion endpoint (#275 )
2025-08-28 22:03:14 -07:00 · 2025-08-28 21:41:02 -07:00 · 2025-08-28 21:38:40 -07:00 · 2025-08-27 08:36:05 -07:00 · 2025-08-22 16:08:37 -07:00 · 2025-08-20 13:58:24 -07:00
15 changed files with 249 additions and 139 deletions
@@ -18,9 +18,12 @@ Written in golang, it is very easy to install (single binary with no dependencie
  - `v1/completions`
  - `v1/chat/completions`
  - `v1/embeddings`
  - `v1/rerank`, `v1/reranking`, `rerank`
  - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
  - `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
 - ✅ llama-server (llama.cpp) supported endpoints:
  - `v1/rerank`, `v1/reranking`, `/rerank`
  - `/infill` - for code infilling
  - `/completion` - for completion endpoint
 - ✅ llama-swap custom API endpoints
  - `/ui` - web UI
  - `/log` - remote log monitoring
@@ -129,6 +129,15 @@ models:
      # - recommended to stick to sampling parameters
      strip_params: "temperature, top_p, top_k"
    # concurrencyLimit: overrides the allowed number of active parallel requests to a model
    # - optional, default: 0
    # - useful for limiting the number of active parallel requests a model can process
    # - must be set per model
    # - any number greater than 0 will override the internal default value of 10
    # - any requests that exceeds the limit will receive an HTTP 429 Too Many Requests response
    # - recommended to be omitted and the default used
    concurrencyLimit: 0
  # Unlisted model example:
  "qwen-unlisted":
    # unlisted: boolean, true or false
@@ -153,6 +153,19 @@ func main() {
 	})
 	// llama-server compatibility: /completion
 	r.POST("/completion", func(c *gin.Context) {
 		c.Header("Content-Type", "application/json")
 		c.JSON(http.StatusOK, gin.H{
 			"responseMessage": *responseMessage,
 			"usage": gin.H{
 				"completion_tokens": 10,
 				"prompt_tokens":     25,
 				"total_tokens":      35,
 			},
 		})
 	})
 	// issue #41
 	r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
 		// Parse the multipart form
@@ -5,12 +5,20 @@ import (
 	"fmt"
 	"io"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/gin-gonic/gin"
 	"github.com/tidwall/gjson"
 )
 type MetricsRecorder struct {
 	metricsMonitor *MetricsMonitor
 	realModelName  string
 	//	isStreaming    bool
 	startTime time.Time
 }
 // MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
 func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
 	return func(c *gin.Context) {
@@ -41,49 +49,47 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
 			metricsRecorder: &MetricsRecorder{
 				metricsMonitor: pm.metricsMonitor,
 				realModelName:  realModelName,
 				isStreaming:    gjson.GetBytes(bodyBytes, "stream").Bool(),
 				startTime:      time.Now(),
 			},
 		}
 		c.Writer = writer
 		c.Next()
-		rec := writer.metricsRecorder
+		// check for streaming response
-		rec.processBody(writer.body)
+		if strings.Contains(c.Writer.Header().Get("Content-Type"), "text/event-stream") {
-	}
+			writer.metricsRecorder.processStreamingResponse(writer.body)
-}
+		} else {
 			writer.metricsRecorder.processNonStreamingResponse(writer.body)
 		}
 type MetricsRecorder struct {
 	metricsMonitor *MetricsMonitor
 	realModelName  string
 	isStreaming    bool
 	startTime      time.Time
 }
 // processBody handles response processing after request completes
 func (rec *MetricsRecorder) processBody(body []byte) {
 	if rec.isStreaming {
 		rec.processStreamingResponse(body)
 	} else {
 		rec.processNonStreamingResponse(body)
 	}
 }
 func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
 	usage := jsonData.Get("usage")
-	if !usage.Exists() {
+	timings := jsonData.Get("timings")
 	if !usage.Exists() && !timings.Exists() {
 		return false
 	}
 	// default values
-	outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
+	outputTokens := 0
-	inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
+	inputTokens := 0
 	// timings data
 	tokensPerSecond := -1.0
 	promptPerSecond := -1.0
 	durationMs := int(time.Since(rec.startTime).Milliseconds())
 	if usage.Exists() {
 		outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
 		inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
 	}
 	// use llama-server's timing data for tok/sec and duration as it is more accurate
-	if timings := jsonData.Get("timings"); timings.Exists() {
+	if timings.Exists() {
 		inputTokens = int(jsonData.Get("timings.prompt_n").Int())
 		outputTokens = int(jsonData.Get("timings.predicted_n").Int())
 		promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
 		tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
 		durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
 	"net/url"
 	"os/exec"
@@ -363,8 +364,18 @@ func (p *Process) stopCommand() {
 }
 func (p *Process) checkHealthEndpoint(healthURL string) error {
 	client := &http.Client{
-		Timeout: 500 * time.Millisecond,
+		// wait a short time for a tcp connection to be established
 		Transport: &http.Transport{
 			DialContext: (&net.Dialer{
 				Timeout: 500 * time.Millisecond,
 			}).DialContext,
 		},
 		// give a long time to respond to the health check endpoint
 		// after the connection is established. See issue: 276
 		Timeout: 5000 * time.Millisecond,
 	}
 	req, err := http.NewRequest("GET", healthURL, nil)
@@ -60,10 +60,20 @@ func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter,
 	if pg.swap {
 		pg.Lock()
 		if pg.lastUsedProcess != modelID {
 			// is there something already running?
 			if pg.lastUsedProcess != "" {
 				pg.processes[pg.lastUsedProcess].Stop()
 			}
 			// wait for the request to the new model to be fully handled
 			// and prevent race conditions see issue #277
 			pg.processes[modelID].ProxyRequest(writer, request)
 			pg.lastUsedProcess = modelID
 			// short circuit and exit
 			pg.Unlock()
 			return nil
 		}
 		pg.Unlock()
 	}
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"net/http"
 	"net/http/httptest"
 	"sync"
 	"testing"
 	"github.com/stretchr/testify/assert"
@@ -44,32 +45,49 @@ func TestProcessGroup_HasMember(t *testing.T) {
 	assert.False(t, pg.HasMember("model3"))
 }
-func TestProcessGroup_ProxyRequestSwapIsTrue(t *testing.T) {
+// TestProcessGroup_ProxyRequestSwapIsTrueParallel tests that when swap is true
 // and multiple requests are made in parallel, only one process is running at a time.
 func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
 	var processGroupTestConfig = AddDefaultGroupToConfig(Config{
 		HealthCheckTimeout: 15,
 		Models: map[string]ModelConfig{
 			// use the same listening so if a model is already running, it will fail
 			// this is a way to test that swap isolation is working
 			// properly when there are parallel requests made at the
 			// same time.
 			"model1": getTestSimpleResponderConfigPort("model1", 9832),
 			"model2": getTestSimpleResponderConfigPort("model2", 9832),
 			"model3": getTestSimpleResponderConfigPort("model3", 9832),
 			"model4": getTestSimpleResponderConfigPort("model4", 9832),
 			"model5": getTestSimpleResponderConfigPort("model5", 9832),
 		},
 		Groups: map[string]GroupConfig{
 			"G1": {
 				Swap:    true,
 				Members: []string{"model1", "model2", "model3", "model4", "model5"},
 			},
 		},
 	})
 	pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger)
 	defer pg.StopProcesses(StopWaitForInflightRequest)
-	tests := []string{"model1", "model2"}
+	tests := []string{"model1", "model2", "model3", "model4", "model5"}
 	var wg sync.WaitGroup
 	wg.Add(len(tests))
 	for _, modelName := range tests {
-		t.Run(modelName, func(t *testing.T) {
+		go func(modelName string) {
-			reqBody := `{"x", "y"}`
+			defer wg.Done()
-			req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
+			req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
 			w := httptest.NewRecorder()
 			assert.NoError(t, pg.ProxyRequest(modelName, w, req))
 			assert.Equal(t, http.StatusOK, w.Code)
 			assert.Contains(t, w.Body.String(), modelName)
-
+		}(modelName)
 			// make sure only one process is in the running state
 			count := 0
 			for _, process := range pg.processes {
 				if process.CurrentState() == StateReady {
 					count++
 				}
 			}
 			assert.Equal(t, 1, count)
 		})
 	}
 	wg.Wait()
 }
 func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) {
@@ -191,11 +191,20 @@ func (pm *ProxyManager) setupGinEngine() {
 	// Support legacy /v1/completions api, see issue #12
 	pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
-	// Support embeddings
+	// Support embeddings and reranking
 	pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
 	// llama-server's /reranking endpoint + aliases
 	pm.ginEngine.POST("/reranking", mm, pm.proxyOAIHandler)
 	pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
 	pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
 	pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
-	pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
+
 	// llama-server's /infill endpoint for code infilling
 	pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
 	// llama-server's /completion endpoint
 	pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
 	// Support audio/speech endpoint
 	pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
@@ -42,7 +42,6 @@ func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
 		assert.Contains(t, w.Body.String(), modelName)
 	}
 }
 func TestProxyManager_SwapMultiProcess(t *testing.T) {
 	config := AddDefaultGroupToConfig(Config{
 		HealthCheckTimeout: 15,
@@ -834,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
 	assert.Equal(t, "OK", rec.Body.String())
 }
 // Ensure the custom llama-server /completion endpoint proxies correctly
 func TestProxyManager_CompletionEndpoint(t *testing.T) {
 	config := AddDefaultGroupToConfig(Config{
 		HealthCheckTimeout: 15,
 		Models: map[string]ModelConfig{
 			"model1": getTestSimpleResponderConfig("model1"),
 		},
 		LogLevel: "error",
 	})
 	proxy := New(config)
 	defer proxy.StopProcesses(StopWaitForInflightRequest)
 	reqBody := `{"model":"model1"}`
 	req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
 	w := httptest.NewRecorder()
 	proxy.ServeHTTP(w, req)
 	assert.Equal(t, http.StatusOK, w.Code)
 	assert.Contains(t, w.Body.String(), "model1")
 }
 func TestProxyManager_StartupHooks(t *testing.T) {
 	// using real YAML as the configuration has gotten more complex
@@ -1,88 +1,78 @@
 import { useEffect, useCallback } from "react";
 import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
 import { useTheme } from "./contexts/ThemeProvider";
-import { APIProvider } from "./contexts/APIProvider";
+import { useAPI } from "./contexts/APIProvider";
 import LogViewerPage from "./pages/LogViewer";
 import ModelPage from "./pages/Models";
 import ActivityPage from "./pages/Activity";
-import ConnectionStatus from "./components/ConnectionStatus";
+import ConnectionStatusIcon from "./components/ConnectionStatus";
 import { RiSunFill, RiMoonFill } from "react-icons/ri";
 import { usePersistentState } from "./hooks/usePersistentState";
 function App() {
-  const { isNarrow, toggleTheme, isDarkMode } = useTheme();
+  const { isNarrow, toggleTheme, isDarkMode, appTitle, setAppTitle, setConnectionState } = useTheme();
  const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
  const handleTitleChange = useCallback(
    (newTitle: string) => {
-      setAppTitle(newTitle);
+      setAppTitle(newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap");
      document.title = newTitle;
    },
    [setAppTitle]
  );
  const { connectionStatus } = useAPI();
  // Synchronize the window.title connections state with the actual connection state
  useEffect(() => {
-    document.title = appTitle; // Set initial title
+    setConnectionState(connectionStatus);
-  }, [appTitle]);
+  }, [connectionStatus]);
  return (
    <Router basename="/ui/">
-      <APIProvider>
+      <div className="flex flex-col h-screen">
-        <div className="flex flex-col h-screen">
+        <nav className="bg-surface border-b border-border p-2 h-[75px]">
-          <nav className="bg-surface border-b border-border p-2 h-[75px]">
+          <div className="flex items-center justify-between mx-auto px-4 h-full">
-            <div className="flex items-center justify-between mx-auto px-4 h-full">
+            {!isNarrow && (
-              {!isNarrow && (
+              <h1
-                <h1
+                contentEditable
-                  contentEditable
+                suppressContentEditableWarning
-                  suppressContentEditableWarning
+                className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
-                  className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
+                onBlur={(e) => handleTitleChange(e.currentTarget.textContent || "(set title)")}
-                  onBlur={(e) =>
+                onKeyDown={(e) => {
-                    handleTitleChange(e.currentTarget.textContent?.replace(/\n/g, "").trim() || "llama-swap")
+                  if (e.key === "Enter") {
                    e.preventDefault();
                    handleTitleChange(e.currentTarget.textContent || "(set title)");
                    e.currentTarget.blur();
                  }
-                  onKeyDown={(e) => {
+                }}
-                    if (e.key === "Enter") {
+              >
-                      e.preventDefault();
+                {appTitle}
-                      const sanitizedText =
+              </h1>
-                        e.currentTarget.textContent?.replace(/\n/g, "").trim().substring(0, 25) || "llama-swap";
+            )}
-                      handleTitleChange(sanitizedText);
+            <div className="flex items-center space-x-4">
-                      e.currentTarget.textContent = sanitizedText;
+              <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
-                      e.currentTarget.blur();
+                Logs
-                    }
+              </NavLink>
-                  }}
+              <NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
-                >
+                Models
-                  {appTitle}
+              </NavLink>
-                </h1>
+              <NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
-              )}
+                Activity
-              <div className="flex items-center space-x-4">
+              </NavLink>
-                <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
+              <button className="" onClick={toggleTheme}>
-                  Logs
+                {isDarkMode ? <RiMoonFill /> : <RiSunFill />}
-                </NavLink>
+              </button>
-
+              <ConnectionStatusIcon />
                <NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
                  Models
                </NavLink>
                <NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
                  Activity
                </NavLink>
                <button className="" onClick={toggleTheme}>
                  {isDarkMode ? <RiMoonFill /> : <RiSunFill />}
                </button>
                <ConnectionStatus />
              </div>
            </div>
-          </nav>
+          </div>
        </nav>
-          <main className="flex-1 overflow-auto p-4">
+        <main className="flex-1 overflow-auto p-4">
-            <Routes>
+          <Routes>
-              <Route path="/" element={<LogViewerPage />} />
+            <Route path="/" element={<LogViewerPage />} />
-              <Route path="/models" element={<ModelPage />} />
+            <Route path="/models" element={<ModelPage />} />
-              <Route path="/activity" element={<ActivityPage />} />
+            <Route path="/activity" element={<ActivityPage />} />
-              <Route path="*" element={<Navigate to="/" replace />} />
+            <Route path="*" element={<Navigate to="/" replace />} />
-            </Routes>
+          </Routes>
-          </main>
+        </main>
-        </div>
+      </div>
      </APIProvider>
    </Router>
  );
 }
@@ -1,21 +1,11 @@
 import { useAPI } from "../contexts/APIProvider";
-import { useEffect, useState, useMemo } from "react";
+import { useMemo } from "react";
-type ConnectionStatus = "disconnected" | "connecting" | "connected";
+const ConnectionStatusIcon = () => {
-
+  const { connectionStatus } = useAPI();
 const ConnectionStatus = () => {
  const { getConnectionStatus } = useAPI();
  const [eventStreamStatus, setEventStreamStatus] = useState<ConnectionStatus>("disconnected");
  useEffect(() => {
    const interval = setInterval(() => {
      setEventStreamStatus(getConnectionStatus());
    }, 1000);
    return () => clearInterval(interval);
  });
  const eventStatusColor = useMemo(() => {
-    switch (eventStreamStatus) {
+    switch (connectionStatus) {
      case "connected":
        return "bg-green-500";
      case "connecting":
@@ -24,13 +14,13 @@ const ConnectionStatus = () => {
      default:
        return "bg-red-500";
    }
-  }, [eventStreamStatus]);
+  }, [connectionStatus]);
  return (
-    <div className="flex items-center" title={`event stream: ${eventStreamStatus}`}>
+    <div className="flex items-center" title={`event stream: ${connectionStatus}`}>
      <span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
    </div>
  );
 };
-export default ConnectionStatus;
+export default ConnectionStatusIcon;
@@ -1,4 +1,5 @@
 import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
 import type { ConnectionState } from "../lib/types";
 type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
 const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
@@ -20,7 +21,7 @@ interface APIProviderType {
  proxyLogs: string;
  upstreamLogs: string;
  metrics: Metrics[];
-  getConnectionStatus: () => "connected" | "connecting" | "disconnected";
+  connectionStatus: ConnectionState;
 }
 interface Metrics {
@@ -53,6 +54,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
  const [proxyLogs, setProxyLogs] = useState("");
  const [upstreamLogs, setUpstreamLogs] = useState("");
  const [metrics, setMetrics] = useState<Metrics[]>([]);
  const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
  const apiEventSource = useRef<EventSource | null>(null);
  const [models, setModels] = useState<Model[]>([]);
@@ -64,16 +66,6 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
    });
  }, []);
  const getConnectionStatus = useCallback(() => {
    if (apiEventSource.current?.readyState === EventSource.OPEN) {
      return "connected";
    } else if (apiEventSource.current?.readyState === EventSource.CONNECTING) {
      return "connecting";
    } else {
      return "disconnected";
    }
  }, []);
  const enableAPIEvents = useCallback((enabled: boolean) => {
    if (!enabled) {
      apiEventSource.current?.close();
@@ -86,7 +78,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
    const initialDelay = 1000; // 1 second
    const connect = () => {
      apiEventSource.current = null;
      const eventSource = new EventSource("/api/events");
      setConnectionState("connecting");
      eventSource.onopen = () => {
        // clear everything out on connect to keep things in sync
@@ -94,6 +88,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
        setUpstreamLogs("");
        setMetrics([]); // clear metrics on reconnect
        setModels([]); // clear models on reconnect
        apiEventSource.current = eventSource;
        retryCount = 0;
        setConnectionState("connected");
      };
      eventSource.onmessage = (e: MessageEvent) => {
@@ -138,14 +135,14 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
          console.error(e.data, err);
        }
      };
      eventSource.onerror = () => {
        eventSource.close();
        retryCount++;
        const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
        setConnectionState("disconnected");
        setTimeout(connect, delay);
      };
      apiEventSource.current = eventSource;
    };
    connect();
@@ -213,7 +210,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
      proxyLogs,
      upstreamLogs,
      metrics,
-      getConnectionStatus,
+      connectionStatus,
    }),
    [models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
  );
@@ -1,5 +1,6 @@
 import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
 import { usePersistentState } from "../hooks/usePersistentState";
 import type { ConnectionState } from "../lib/types";
 type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
 type ThemeContextType = {
@@ -7,6 +8,11 @@ type ThemeContextType = {
  screenWidth: ScreenWidth;
  isNarrow: boolean;
  toggleTheme: () => void;
  // for managing the window title and connection state information
  appTitle: string;
  setAppTitle: (title: string) => void;
  setConnectionState: (state: ConnectionState) => void;
 };
 const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
@@ -16,6 +22,17 @@ type ThemeProviderProps = {
 };
 export function ThemeProvider({ children }: ThemeProviderProps) {
  const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
  const [connectionState, setConnectionState] = useState<ConnectionState>("disconnected");
  /**
   * Set the document.title with informative information
   */
  useEffect(() => {
    const connectionIcon = connectionState === "connecting" ? "🟡" : connectionState === "connected" ? "🟢" : "🔴";
    document.title = connectionIcon + " " + appTitle; // Set initial title
  }, [appTitle, connectionState]);
  const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
  const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
@@ -55,7 +72,19 @@ export function ThemeProvider({ children }: ThemeProviderProps) {
  }, [screenWidth]);
  return (
-    <ThemeContext.Provider value={{ isDarkMode, toggleTheme, screenWidth, isNarrow }}>{children}</ThemeContext.Provider>
+    <ThemeContext.Provider
      value={{
        isDarkMode,
        toggleTheme,
        screenWidth,
        isNarrow,
        appTitle,
        setAppTitle,
        setConnectionState,
      }}
    >
      {children}
    </ThemeContext.Provider>
  );
 }
@@ -0,0 +1 @@
 export type ConnectionState = "connected" | "connecting" | "disconnected";
@@ -3,11 +3,14 @@ import { createRoot } from "react-dom/client";
 import "./index.css";
 import App from "./App.tsx";
 import { ThemeProvider } from "./contexts/ThemeProvider";
 import { APIProvider } from "./contexts/APIProvider";
 createRoot(document.getElementById("root")!).render(
  <StrictMode>
    <ThemeProvider>
-      <App />
+      <APIProvider>
        <App />
      </APIProvider>
    </ThemeProvider>
  </StrictMode>
 );
Author	SHA1	Message	Date
Benson Wong	831a90d3b0	Add different timeout scenarios to Process.checkHealthEndpoint #276 (#278 ) - add a TCP connection timeout of 500ms - increase HTTP client timeout to 5000ms In this new behaviour the upstream has 500ms to accept a tcp connection and 5000ms to respond to the HTTP request.	2025-08-28 22:03:14 -07:00
Yandrik	977f1856bb	add /completion endpoint (#275 ) * feat: add /completion endpoint * chore: reformat using gofmt	2025-08-28 21:41:02 -07:00
Benson Wong	52b329f7bc	Fix #277 race condition in ProcessGroup.ProxyRequest when swap=true	2025-08-28 21:38:40 -07:00
Benson Wong	57803fd3aa	Support llama-server's /infill endpoint (#272 ) Add support for llama-server's /infill endpoint and metrics gathering on the Activities page.	2025-08-27 08:36:05 -07:00
Benson Wong	c55d0cc842	Add docs for model.concurrencyLimit #263 [skip ci]	2025-08-22 16:08:37 -07:00
Benson Wong	7acbaf4712	Add connection status indicator in UI (#260 ) * show connection status as icon in UI title * make connection status event driven	2025-08-20 13:58:24 -07:00
		`@@ -0,0 +1 @@`
							`export type ConnectionState = "connected" \| "connecting" \| "disconnected";`