Compare commits

..

6 Commits

Author SHA1 Message Date
Benson Wong 831a90d3b0 Add different timeout scenarios to Process.checkHealthEndpoint #276 (#278)
- add a TCP connection timeout of 500ms
- increase HTTP client timeout to 5000ms

In this new behaviour the upstream has 500ms to accept a tcp connection
and 5000ms to respond to the HTTP request.
2025-08-28 22:03:14 -07:00
Yandrik 977f1856bb add /completion endpoint (#275)
* feat: add /completion endpoint
* chore: reformat using gofmt
2025-08-28 21:41:02 -07:00
Benson Wong 52b329f7bc Fix #277 race condition in ProcessGroup.ProxyRequest when swap=true 2025-08-28 21:38:40 -07:00
Benson Wong 57803fd3aa Support llama-server's /infill endpoint (#272)
Add support for llama-server's /infill endpoint and metrics gathering on the Activities page.
2025-08-27 08:36:05 -07:00
Benson Wong c55d0cc842 Add docs for model.concurrencyLimit #263 [skip ci] 2025-08-22 16:08:37 -07:00
Benson Wong 7acbaf4712 Add connection status indicator in UI (#260)
* show connection status as icon in UI title
* make connection status event driven
2025-08-20 13:58:24 -07:00
15 changed files with 249 additions and 139 deletions
+4 -1
View File
@@ -18,9 +18,12 @@ Written in golang, it is very easy to install (single binary with no dependencie
- `v1/completions` - `v1/completions`
- `v1/chat/completions` - `v1/chat/completions`
- `v1/embeddings` - `v1/embeddings`
- `v1/rerank`, `v1/reranking`, `rerank`
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36)) - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867)) - `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
- ✅ llama-server (llama.cpp) supported endpoints:
- `v1/rerank`, `v1/reranking`, `/rerank`
- `/infill` - for code infilling
- `/completion` - for completion endpoint
- ✅ llama-swap custom API endpoints - ✅ llama-swap custom API endpoints
- `/ui` - web UI - `/ui` - web UI
- `/log` - remote log monitoring - `/log` - remote log monitoring
+9
View File
@@ -129,6 +129,15 @@ models:
# - recommended to stick to sampling parameters # - recommended to stick to sampling parameters
strip_params: "temperature, top_p, top_k" strip_params: "temperature, top_p, top_k"
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
# - optional, default: 0
# - useful for limiting the number of active parallel requests a model can process
# - must be set per model
# - any number greater than 0 will override the internal default value of 10
# - any requests that exceeds the limit will receive an HTTP 429 Too Many Requests response
# - recommended to be omitted and the default used
concurrencyLimit: 0
# Unlisted model example: # Unlisted model example:
"qwen-unlisted": "qwen-unlisted":
# unlisted: boolean, true or false # unlisted: boolean, true or false
+13
View File
@@ -153,6 +153,19 @@ func main() {
}) })
// llama-server compatibility: /completion
r.POST("/completion", func(c *gin.Context) {
c.Header("Content-Type", "application/json")
c.JSON(http.StatusOK, gin.H{
"responseMessage": *responseMessage,
"usage": gin.H{
"completion_tokens": 10,
"prompt_tokens": 25,
"total_tokens": 35,
},
})
})
// issue #41 // issue #41
r.POST("/v1/audio/transcriptions", func(c *gin.Context) { r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
// Parse the multipart form // Parse the multipart form
+28 -22
View File
@@ -5,12 +5,20 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"strings"
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
) )
type MetricsRecorder struct {
metricsMonitor *MetricsMonitor
realModelName string
// isStreaming bool
startTime time.Time
}
// MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests // MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc { func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
return func(c *gin.Context) { return func(c *gin.Context) {
@@ -41,49 +49,47 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
metricsRecorder: &MetricsRecorder{ metricsRecorder: &MetricsRecorder{
metricsMonitor: pm.metricsMonitor, metricsMonitor: pm.metricsMonitor,
realModelName: realModelName, realModelName: realModelName,
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
startTime: time.Now(), startTime: time.Now(),
}, },
} }
c.Writer = writer c.Writer = writer
c.Next() c.Next()
rec := writer.metricsRecorder // check for streaming response
rec.processBody(writer.body) if strings.Contains(c.Writer.Header().Get("Content-Type"), "text/event-stream") {
} writer.metricsRecorder.processStreamingResponse(writer.body)
} } else {
writer.metricsRecorder.processNonStreamingResponse(writer.body)
}
type MetricsRecorder struct {
metricsMonitor *MetricsMonitor
realModelName string
isStreaming bool
startTime time.Time
}
// processBody handles response processing after request completes
func (rec *MetricsRecorder) processBody(body []byte) {
if rec.isStreaming {
rec.processStreamingResponse(body)
} else {
rec.processNonStreamingResponse(body)
} }
} }
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
usage := jsonData.Get("usage") usage := jsonData.Get("usage")
if !usage.Exists() { timings := jsonData.Get("timings")
if !usage.Exists() && !timings.Exists() {
return false return false
} }
// default values // default values
outputTokens := int(jsonData.Get("usage.completion_tokens").Int()) outputTokens := 0
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int()) inputTokens := 0
// timings data
tokensPerSecond := -1.0 tokensPerSecond := -1.0
promptPerSecond := -1.0 promptPerSecond := -1.0
durationMs := int(time.Since(rec.startTime).Milliseconds()) durationMs := int(time.Since(rec.startTime).Milliseconds())
if usage.Exists() {
outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
}
// use llama-server's timing data for tok/sec and duration as it is more accurate // use llama-server's timing data for tok/sec and duration as it is more accurate
if timings := jsonData.Get("timings"); timings.Exists() { if timings.Exists() {
inputTokens = int(jsonData.Get("timings.prompt_n").Int())
outputTokens = int(jsonData.Get("timings.predicted_n").Int())
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float() promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float() tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float()) durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
+12 -1
View File
@@ -5,6 +5,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"net/url" "net/url"
"os/exec" "os/exec"
@@ -363,8 +364,18 @@ func (p *Process) stopCommand() {
} }
func (p *Process) checkHealthEndpoint(healthURL string) error { func (p *Process) checkHealthEndpoint(healthURL string) error {
client := &http.Client{ client := &http.Client{
Timeout: 500 * time.Millisecond, // wait a short time for a tcp connection to be established
Transport: &http.Transport{
DialContext: (&net.Dialer{
Timeout: 500 * time.Millisecond,
}).DialContext,
},
// give a long time to respond to the health check endpoint
// after the connection is established. See issue: 276
Timeout: 5000 * time.Millisecond,
} }
req, err := http.NewRequest("GET", healthURL, nil) req, err := http.NewRequest("GET", healthURL, nil)
+10
View File
@@ -60,10 +60,20 @@ func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter,
if pg.swap { if pg.swap {
pg.Lock() pg.Lock()
if pg.lastUsedProcess != modelID { if pg.lastUsedProcess != modelID {
// is there something already running?
if pg.lastUsedProcess != "" { if pg.lastUsedProcess != "" {
pg.processes[pg.lastUsedProcess].Stop() pg.processes[pg.lastUsedProcess].Stop()
} }
// wait for the request to the new model to be fully handled
// and prevent race conditions see issue #277
pg.processes[modelID].ProxyRequest(writer, request)
pg.lastUsedProcess = modelID pg.lastUsedProcess = modelID
// short circuit and exit
pg.Unlock()
return nil
} }
pg.Unlock() pg.Unlock()
} }
+34 -16
View File
@@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"sync"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@@ -44,32 +45,49 @@ func TestProcessGroup_HasMember(t *testing.T) {
assert.False(t, pg.HasMember("model3")) assert.False(t, pg.HasMember("model3"))
} }
func TestProcessGroup_ProxyRequestSwapIsTrue(t *testing.T) { // TestProcessGroup_ProxyRequestSwapIsTrueParallel tests that when swap is true
// and multiple requests are made in parallel, only one process is running at a time.
func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
var processGroupTestConfig = AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
// use the same listening so if a model is already running, it will fail
// this is a way to test that swap isolation is working
// properly when there are parallel requests made at the
// same time.
"model1": getTestSimpleResponderConfigPort("model1", 9832),
"model2": getTestSimpleResponderConfigPort("model2", 9832),
"model3": getTestSimpleResponderConfigPort("model3", 9832),
"model4": getTestSimpleResponderConfigPort("model4", 9832),
"model5": getTestSimpleResponderConfigPort("model5", 9832),
},
Groups: map[string]GroupConfig{
"G1": {
Swap: true,
Members: []string{"model1", "model2", "model3", "model4", "model5"},
},
},
})
pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger) pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger)
defer pg.StopProcesses(StopWaitForInflightRequest) defer pg.StopProcesses(StopWaitForInflightRequest)
tests := []string{"model1", "model2"} tests := []string{"model1", "model2", "model3", "model4", "model5"}
var wg sync.WaitGroup
wg.Add(len(tests))
for _, modelName := range tests { for _, modelName := range tests {
t.Run(modelName, func(t *testing.T) { go func(modelName string) {
reqBody := `{"x", "y"}` defer wg.Done()
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
w := httptest.NewRecorder() w := httptest.NewRecorder()
assert.NoError(t, pg.ProxyRequest(modelName, w, req)) assert.NoError(t, pg.ProxyRequest(modelName, w, req))
assert.Equal(t, http.StatusOK, w.Code) assert.Equal(t, http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), modelName) assert.Contains(t, w.Body.String(), modelName)
}(modelName)
// make sure only one process is in the running state
count := 0
for _, process := range pg.processes {
if process.CurrentState() == StateReady {
count++
}
}
assert.Equal(t, 1, count)
})
} }
wg.Wait()
} }
func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) { func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) {
+11 -2
View File
@@ -191,11 +191,20 @@ func (pm *ProxyManager) setupGinEngine() {
// Support legacy /v1/completions api, see issue #12 // Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler) pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
// Support embeddings // Support embeddings and reranking
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler) pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
// llama-server's /reranking endpoint + aliases
pm.ginEngine.POST("/reranking", mm, pm.proxyOAIHandler)
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler) pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler) pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
// llama-server's /infill endpoint for code infilling
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
// llama-server's /completion endpoint
pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
// Support audio/speech endpoint // Support audio/speech endpoint
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler) pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
+22 -1
View File
@@ -42,7 +42,6 @@ func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
assert.Contains(t, w.Body.String(), modelName) assert.Contains(t, w.Body.String(), modelName)
} }
} }
func TestProxyManager_SwapMultiProcess(t *testing.T) { func TestProxyManager_SwapMultiProcess(t *testing.T) {
config := AddDefaultGroupToConfig(Config{ config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
@@ -834,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
assert.Equal(t, "OK", rec.Body.String()) assert.Equal(t, "OK", rec.Body.String())
} }
// Ensure the custom llama-server /completion endpoint proxies correctly
func TestProxyManager_CompletionEndpoint(t *testing.T) {
config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
"model1": getTestSimpleResponderConfig("model1"),
},
LogLevel: "error",
})
proxy := New(config)
defer proxy.StopProcesses(StopWaitForInflightRequest)
reqBody := `{"model":"model1"}`
req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
w := httptest.NewRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "model1")
}
func TestProxyManager_StartupHooks(t *testing.T) { func TestProxyManager_StartupHooks(t *testing.T) {
// using real YAML as the configuration has gotten more complex // using real YAML as the configuration has gotten more complex
+53 -63
View File
@@ -1,88 +1,78 @@
import { useEffect, useCallback } from "react"; import { useEffect, useCallback } from "react";
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom"; import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
import { useTheme } from "./contexts/ThemeProvider"; import { useTheme } from "./contexts/ThemeProvider";
import { APIProvider } from "./contexts/APIProvider"; import { useAPI } from "./contexts/APIProvider";
import LogViewerPage from "./pages/LogViewer"; import LogViewerPage from "./pages/LogViewer";
import ModelPage from "./pages/Models"; import ModelPage from "./pages/Models";
import ActivityPage from "./pages/Activity"; import ActivityPage from "./pages/Activity";
import ConnectionStatus from "./components/ConnectionStatus"; import ConnectionStatusIcon from "./components/ConnectionStatus";
import { RiSunFill, RiMoonFill } from "react-icons/ri"; import { RiSunFill, RiMoonFill } from "react-icons/ri";
import { usePersistentState } from "./hooks/usePersistentState";
function App() { function App() {
const { isNarrow, toggleTheme, isDarkMode } = useTheme(); const { isNarrow, toggleTheme, isDarkMode, appTitle, setAppTitle, setConnectionState } = useTheme();
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
const handleTitleChange = useCallback( const handleTitleChange = useCallback(
(newTitle: string) => { (newTitle: string) => {
setAppTitle(newTitle); setAppTitle(newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap");
document.title = newTitle;
}, },
[setAppTitle] [setAppTitle]
); );
const { connectionStatus } = useAPI();
// Synchronize the window.title connections state with the actual connection state
useEffect(() => { useEffect(() => {
document.title = appTitle; // Set initial title setConnectionState(connectionStatus);
}, [appTitle]); }, [connectionStatus]);
return ( return (
<Router basename="/ui/"> <Router basename="/ui/">
<APIProvider> <div className="flex flex-col h-screen">
<div className="flex flex-col h-screen"> <nav className="bg-surface border-b border-border p-2 h-[75px]">
<nav className="bg-surface border-b border-border p-2 h-[75px]"> <div className="flex items-center justify-between mx-auto px-4 h-full">
<div className="flex items-center justify-between mx-auto px-4 h-full"> {!isNarrow && (
{!isNarrow && ( <h1
<h1 contentEditable
contentEditable suppressContentEditableWarning
suppressContentEditableWarning className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1" onBlur={(e) => handleTitleChange(e.currentTarget.textContent || "(set title)")}
onBlur={(e) => onKeyDown={(e) => {
handleTitleChange(e.currentTarget.textContent?.replace(/\n/g, "").trim() || "llama-swap") if (e.key === "Enter") {
e.preventDefault();
handleTitleChange(e.currentTarget.textContent || "(set title)");
e.currentTarget.blur();
} }
onKeyDown={(e) => { }}
if (e.key === "Enter") { >
e.preventDefault(); {appTitle}
const sanitizedText = </h1>
e.currentTarget.textContent?.replace(/\n/g, "").trim().substring(0, 25) || "llama-swap"; )}
handleTitleChange(sanitizedText); <div className="flex items-center space-x-4">
e.currentTarget.textContent = sanitizedText; <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
e.currentTarget.blur(); Logs
} </NavLink>
}} <NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
> Models
{appTitle} </NavLink>
</h1> <NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
)} Activity
<div className="flex items-center space-x-4"> </NavLink>
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> <button className="" onClick={toggleTheme}>
Logs {isDarkMode ? <RiMoonFill /> : <RiSunFill />}
</NavLink> </button>
<ConnectionStatusIcon />
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Models
</NavLink>
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Activity
</NavLink>
<button className="" onClick={toggleTheme}>
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
</button>
<ConnectionStatus />
</div>
</div> </div>
</nav> </div>
</nav>
<main className="flex-1 overflow-auto p-4"> <main className="flex-1 overflow-auto p-4">
<Routes> <Routes>
<Route path="/" element={<LogViewerPage />} /> <Route path="/" element={<LogViewerPage />} />
<Route path="/models" element={<ModelPage />} /> <Route path="/models" element={<ModelPage />} />
<Route path="/activity" element={<ActivityPage />} /> <Route path="/activity" element={<ActivityPage />} />
<Route path="*" element={<Navigate to="/" replace />} /> <Route path="*" element={<Navigate to="/" replace />} />
</Routes> </Routes>
</main> </main>
</div> </div>
</APIProvider>
</Router> </Router>
); );
} }
+7 -17
View File
@@ -1,21 +1,11 @@
import { useAPI } from "../contexts/APIProvider"; import { useAPI } from "../contexts/APIProvider";
import { useEffect, useState, useMemo } from "react"; import { useMemo } from "react";
type ConnectionStatus = "disconnected" | "connecting" | "connected"; const ConnectionStatusIcon = () => {
const { connectionStatus } = useAPI();
const ConnectionStatus = () => {
const { getConnectionStatus } = useAPI();
const [eventStreamStatus, setEventStreamStatus] = useState<ConnectionStatus>("disconnected");
useEffect(() => {
const interval = setInterval(() => {
setEventStreamStatus(getConnectionStatus());
}, 1000);
return () => clearInterval(interval);
});
const eventStatusColor = useMemo(() => { const eventStatusColor = useMemo(() => {
switch (eventStreamStatus) { switch (connectionStatus) {
case "connected": case "connected":
return "bg-green-500"; return "bg-green-500";
case "connecting": case "connecting":
@@ -24,13 +14,13 @@ const ConnectionStatus = () => {
default: default:
return "bg-red-500"; return "bg-red-500";
} }
}, [eventStreamStatus]); }, [connectionStatus]);
return ( return (
<div className="flex items-center" title={`event stream: ${eventStreamStatus}`}> <div className="flex items-center" title={`event stream: ${connectionStatus}`}>
<span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span> <span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
</div> </div>
); );
}; };
export default ConnectionStatus; export default ConnectionStatusIcon;
+11 -14
View File
@@ -1,4 +1,5 @@
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react"; import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
import type { ConnectionState } from "../lib/types";
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown"; type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */ const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
@@ -20,7 +21,7 @@ interface APIProviderType {
proxyLogs: string; proxyLogs: string;
upstreamLogs: string; upstreamLogs: string;
metrics: Metrics[]; metrics: Metrics[];
getConnectionStatus: () => "connected" | "connecting" | "disconnected"; connectionStatus: ConnectionState;
} }
interface Metrics { interface Metrics {
@@ -53,6 +54,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const [proxyLogs, setProxyLogs] = useState(""); const [proxyLogs, setProxyLogs] = useState("");
const [upstreamLogs, setUpstreamLogs] = useState(""); const [upstreamLogs, setUpstreamLogs] = useState("");
const [metrics, setMetrics] = useState<Metrics[]>([]); const [metrics, setMetrics] = useState<Metrics[]>([]);
const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
const apiEventSource = useRef<EventSource | null>(null); const apiEventSource = useRef<EventSource | null>(null);
const [models, setModels] = useState<Model[]>([]); const [models, setModels] = useState<Model[]>([]);
@@ -64,16 +66,6 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
}); });
}, []); }, []);
const getConnectionStatus = useCallback(() => {
if (apiEventSource.current?.readyState === EventSource.OPEN) {
return "connected";
} else if (apiEventSource.current?.readyState === EventSource.CONNECTING) {
return "connecting";
} else {
return "disconnected";
}
}, []);
const enableAPIEvents = useCallback((enabled: boolean) => { const enableAPIEvents = useCallback((enabled: boolean) => {
if (!enabled) { if (!enabled) {
apiEventSource.current?.close(); apiEventSource.current?.close();
@@ -86,7 +78,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const initialDelay = 1000; // 1 second const initialDelay = 1000; // 1 second
const connect = () => { const connect = () => {
apiEventSource.current = null;
const eventSource = new EventSource("/api/events"); const eventSource = new EventSource("/api/events");
setConnectionState("connecting");
eventSource.onopen = () => { eventSource.onopen = () => {
// clear everything out on connect to keep things in sync // clear everything out on connect to keep things in sync
@@ -94,6 +88,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
setUpstreamLogs(""); setUpstreamLogs("");
setMetrics([]); // clear metrics on reconnect setMetrics([]); // clear metrics on reconnect
setModels([]); // clear models on reconnect setModels([]); // clear models on reconnect
apiEventSource.current = eventSource;
retryCount = 0;
setConnectionState("connected");
}; };
eventSource.onmessage = (e: MessageEvent) => { eventSource.onmessage = (e: MessageEvent) => {
@@ -138,14 +135,14 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
console.error(e.data, err); console.error(e.data, err);
} }
}; };
eventSource.onerror = () => { eventSource.onerror = () => {
eventSource.close(); eventSource.close();
retryCount++; retryCount++;
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000); const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
setConnectionState("disconnected");
setTimeout(connect, delay); setTimeout(connect, delay);
}; };
apiEventSource.current = eventSource;
}; };
connect(); connect();
@@ -213,7 +210,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
proxyLogs, proxyLogs,
upstreamLogs, upstreamLogs,
metrics, metrics,
getConnectionStatus, connectionStatus,
}), }),
[models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics] [models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
); );
+30 -1
View File
@@ -1,5 +1,6 @@
import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react"; import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
import { usePersistentState } from "../hooks/usePersistentState"; import { usePersistentState } from "../hooks/usePersistentState";
import type { ConnectionState } from "../lib/types";
type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl"; type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
type ThemeContextType = { type ThemeContextType = {
@@ -7,6 +8,11 @@ type ThemeContextType = {
screenWidth: ScreenWidth; screenWidth: ScreenWidth;
isNarrow: boolean; isNarrow: boolean;
toggleTheme: () => void; toggleTheme: () => void;
// for managing the window title and connection state information
appTitle: string;
setAppTitle: (title: string) => void;
setConnectionState: (state: ConnectionState) => void;
}; };
const ThemeContext = createContext<ThemeContextType | undefined>(undefined); const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
@@ -16,6 +22,17 @@ type ThemeProviderProps = {
}; };
export function ThemeProvider({ children }: ThemeProviderProps) { export function ThemeProvider({ children }: ThemeProviderProps) {
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
const [connectionState, setConnectionState] = useState<ConnectionState>("disconnected");
/**
* Set the document.title with informative information
*/
useEffect(() => {
const connectionIcon = connectionState === "connecting" ? "🟡" : connectionState === "connected" ? "🟢" : "🔴";
document.title = connectionIcon + " " + appTitle; // Set initial title
}, [appTitle, connectionState]);
const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false); const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
@@ -55,7 +72,19 @@ export function ThemeProvider({ children }: ThemeProviderProps) {
}, [screenWidth]); }, [screenWidth]);
return ( return (
<ThemeContext.Provider value={{ isDarkMode, toggleTheme, screenWidth, isNarrow }}>{children}</ThemeContext.Provider> <ThemeContext.Provider
value={{
isDarkMode,
toggleTheme,
screenWidth,
isNarrow,
appTitle,
setAppTitle,
setConnectionState,
}}
>
{children}
</ThemeContext.Provider>
); );
} }
+1
View File
@@ -0,0 +1 @@
export type ConnectionState = "connected" | "connecting" | "disconnected";
+4 -1
View File
@@ -3,11 +3,14 @@ import { createRoot } from "react-dom/client";
import "./index.css"; import "./index.css";
import App from "./App.tsx"; import App from "./App.tsx";
import { ThemeProvider } from "./contexts/ThemeProvider"; import { ThemeProvider } from "./contexts/ThemeProvider";
import { APIProvider } from "./contexts/APIProvider";
createRoot(document.getElementById("root")!).render( createRoot(document.getElementById("root")!).render(
<StrictMode> <StrictMode>
<ThemeProvider> <ThemeProvider>
<App /> <APIProvider>
<App />
</APIProvider>
</ThemeProvider> </ThemeProvider>
</StrictMode> </StrictMode>
); );