Compare commits

...

3 Commits

Author SHA1 Message Date
Benson Wong 7acbaf4712 Add connection status indicator in UI (#260)
* show connection status as icon in UI title
* make connection status event driven
2025-08-20 13:58:24 -07:00
Benson Wong fcc5ad135a UI: Allow editing of title (#246)
- make <h1> title contentEditable
- title setting persists across reloads in localStorage
2025-08-17 09:42:06 -07:00
Benson Wong 305e5a0031 improve example config [skip ci] 2025-08-17 09:19:04 -07:00
7 changed files with 150 additions and 96 deletions
+34 -26
View File
@@ -3,14 +3,15 @@
# #
# 💡 Tip - Use an LLM with this file! # 💡 Tip - Use an LLM with this file!
# ==================================== # ====================================
# This example configuration is written to be LLM friendly! Try # This example configuration is written to be LLM friendly. Try
# copying this file into an LLM and asking it to explain or generate # copying this file into an LLM and asking it to explain or generate
# sections for you. # sections for you.
# ==================================== # ====================================
#
# Usage notes:
# - Below are all the available configuration options for llama-swap. # - Below are all the available configuration options for llama-swap.
# - Settings with a default value, or noted as optional can be omitted. # - Settings noted as "required" must be in your configuration file
# - Settings that are marked required must be in your configuration file # - Settings noted as "optional" can be omitted
# healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests # healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests
# - optional, default: 120 # - optional, default: 120
@@ -34,9 +35,9 @@ metricsMaxInMemory: 1000
# - it is automatically incremented for every model that uses it # - it is automatically incremented for every model that uses it
startPort: 10001 startPort: 10001
# macros: sets a dictionary of string:string pairs # macros: a dictionary of string substitutions
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - these are reusable snippets # - macros are reusable snippets
# - used in a model's cmd, cmdStop, proxy and checkEndpoint # - used in a model's cmd, cmdStop, proxy and checkEndpoint
# - useful for reducing common configuration settings # - useful for reducing common configuration settings
macros: macros:
@@ -99,44 +100,46 @@ models:
# checkEndpoint: URL path to check if the server is ready # checkEndpoint: URL path to check if the server is ready
# - optional, default: /health # - optional, default: /health
# - use "none" to skip endpoint ready checking
# - endpoint is expected to return an HTTP 200 response # - endpoint is expected to return an HTTP 200 response
# - all requests wait until the endpoint is ready (or fails) # - all requests wait until the endpoint is ready or fails
# - use "none" to skip endpoint health checking
checkEndpoint: /custom-endpoint checkEndpoint: /custom-endpoint
# ttl: automatically unload the model after this many seconds # ttl: automatically unload the model after ttl seconds
# - optional, default: 0 # - optional, default: 0
# - ttl values must be a value greater than 0 # - ttl values must be a value greater than 0
# - a value of 0 disables automatic unloading of the model # - a value of 0 disables automatic unloading of the model
ttl: 60 ttl: 60
# useModelName: overrides the model name that is sent to upstream server # useModelName: override the model name that is sent to upstream server
# - optional, default: "" # - optional, default: ""
# - useful when the upstream server expects a specific model name or format # - useful for when the upstream server expects a specific model name that
# is different from the model's ID
useModelName: "qwen:qwq" useModelName: "qwen:qwq"
# filters: a dictionary of filter settings # filters: a dictionary of filter settings
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - only strip_params is currently supported
filters: filters:
# strip_params: a comma separated list of parameters to remove from the request # strip_params: a comma separated list of parameters to remove from the request
# - optional, default: "" # - optional, default: ""
# - useful for preventing overriding of default server params by requests # - useful for server side enforcement of sampling parameters
# - `model` parameter is never removed # - the `model` parameter can never be removed
# - can be any JSON key in the request body # - can be any JSON key in the request body
# - recommended to stick to sampling parameters # - recommended to stick to sampling parameters
strip_params: "temperature, top_p, top_k" strip_params: "temperature, top_p, top_k"
# Unlisted model example: # Unlisted model example:
"qwen-unlisted": "qwen-unlisted":
# unlisted: true or false # unlisted: boolean, true or false
# - optional, default: false # - optional, default: false
# - unlisted models do not show up in /v1/models or /upstream lists # - unlisted models do not show up in /v1/models api requests
# - can be requested as normal through all apis # - can be requested as normal through all apis
unlisted: true unlisted: true
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
# Docker example: # Docker example:
# container run times like Docker and Podman can also be used with a # container run times like Docker and Podman can be used reliably with a
# a combination of cmd and cmdStop. # a combination of cmd and cmdStop.
"docker-llama": "docker-llama":
proxy: "http://127.0.0.1:${PORT}" proxy: "http://127.0.0.1:${PORT}"
@@ -149,24 +152,26 @@ models:
# cmdStop: command to run to stop the model gracefully # cmdStop: command to run to stop the model gracefully
# - optional, default: "" # - optional, default: ""
# - useful for stopping commands managed by another system # - useful for stopping commands managed by another system
# - on POSIX systems: a SIGTERM is sent for graceful shutdown
# - on Windows, taskkill is used
# - processes are given 5 seconds to shutdown until they are forcefully killed
# - the upstream's process id is available in the ${PID} macro # - the upstream's process id is available in the ${PID} macro
#
# When empty, llama-swap has this default behaviour:
# - on POSIX systems: a SIGTERM signal is sent
# - on Windows, calls taskkill to stop the process
# - processes have 5 seconds to shutdown until forceful termination is attempted
cmdStop: docker stop dockertest cmdStop: docker stop dockertest
# groups: a dictionary of group settings # groups: a dictionary of group settings
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - provide advanced controls over model swapping behaviour. # - provides advanced controls over model swapping behaviour
# - Using groups some models can be kept loaded indefinitely, while others are swapped out. # - using groups some models can be kept loaded indefinitely, while others are swapped out
# - model ids must be defined in the Models section # - model IDs must be defined in the Models section
# - a model can only be a member of one group # - a model can only be a member of one group
# - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields # - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields
# - see issue #109 for details # - see issue #109 for details
# #
# NOTE: the example below uses model names that are not defined above for demonstration purposes # NOTE: the example below uses model names that are not defined above for demonstration purposes
groups: groups:
# group1 is same as the default behaviour of llama-swap where only one model is allowed # group1 works the same as the default behaviour of llama-swap where only one model is allowed
# to run a time across the whole llama-swap instance # to run a time across the whole llama-swap instance
"group1": "group1":
# swap: controls the model swapping behaviour in within the group # swap: controls the model swapping behaviour in within the group
@@ -188,10 +193,13 @@ groups:
- "qwen-unlisted" - "qwen-unlisted"
# Example: # Example:
# - in this group all the models can run at the same time # - in group2 all models can run at the same time
# - when a different group loads all running models in this group are unloaded # - when a different group is loaded it causes all running models in this group to unload
"group2": "group2":
swap: false swap: false
# exclusive: false does not unload other groups when a model in group2 is requested
# - the models in group2 will be loaded but will not unload any other groups
exclusive: false exclusive: false
members: members:
- "docker-llama" - "docker-llama"
@@ -220,7 +228,7 @@ groups:
# - the only supported hook is on_startup # - the only supported hook is on_startup
hooks: hooks:
# on_startup: a dictionary of actions to perform on startup # on_startup: a dictionary of actions to perform on startup
# - optional, default: empty dictionar # - optional, default: empty dictionary
# - the only supported action is preload # - the only supported action is preload
on_startup: on_startup:
# preload: a list of model ids to load on startup # preload: a list of model ids to load on startup
+62 -36
View File
@@ -1,52 +1,78 @@
import { useEffect, useCallback } from "react";
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom"; import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
import { useTheme } from "./contexts/ThemeProvider"; import { useTheme } from "./contexts/ThemeProvider";
import { APIProvider } from "./contexts/APIProvider"; import { useAPI } from "./contexts/APIProvider";
import LogViewerPage from "./pages/LogViewer"; import LogViewerPage from "./pages/LogViewer";
import ModelPage from "./pages/Models"; import ModelPage from "./pages/Models";
import ActivityPage from "./pages/Activity"; import ActivityPage from "./pages/Activity";
import ConnectionStatus from "./components/ConnectionStatus"; import ConnectionStatusIcon from "./components/ConnectionStatus";
import { RiSunFill, RiMoonFill } from "react-icons/ri"; import { RiSunFill, RiMoonFill } from "react-icons/ri";
function App() { function App() {
const { isNarrow, toggleTheme, isDarkMode } = useTheme(); const { isNarrow, toggleTheme, isDarkMode, appTitle, setAppTitle, setConnectionState } = useTheme();
const handleTitleChange = useCallback(
(newTitle: string) => {
setAppTitle(newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap");
},
[setAppTitle]
);
const { connectionStatus } = useAPI();
// Synchronize the window.title connections state with the actual connection state
useEffect(() => {
setConnectionState(connectionStatus);
}, [connectionStatus]);
return ( return (
<Router basename="/ui/"> <Router basename="/ui/">
<APIProvider> <div className="flex flex-col h-screen">
<div className="flex flex-col h-screen"> <nav className="bg-surface border-b border-border p-2 h-[75px]">
<nav className="bg-surface border-b border-border p-2 h-[75px]"> <div className="flex items-center justify-between mx-auto px-4 h-full">
<div className="flex items-center justify-between mx-auto px-4 h-full"> {!isNarrow && (
{!isNarrow && <h1 className="flex items-center p-0">llama-swap</h1>} <h1
<div className="flex items-center space-x-4"> contentEditable
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> suppressContentEditableWarning
Logs className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
</NavLink> onBlur={(e) => handleTitleChange(e.currentTarget.textContent || "(set title)")}
onKeyDown={(e) => {
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> if (e.key === "Enter") {
Models e.preventDefault();
</NavLink> handleTitleChange(e.currentTarget.textContent || "(set title)");
e.currentTarget.blur();
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> }
Activity }}
</NavLink> >
<button className="" onClick={toggleTheme}> {appTitle}
{isDarkMode ? <RiMoonFill /> : <RiSunFill />} </h1>
</button> )}
<ConnectionStatus /> <div className="flex items-center space-x-4">
</div> <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Logs
</NavLink>
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Models
</NavLink>
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Activity
</NavLink>
<button className="" onClick={toggleTheme}>
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
</button>
<ConnectionStatusIcon />
</div> </div>
</nav> </div>
</nav>
<main className="flex-1 overflow-auto p-4"> <main className="flex-1 overflow-auto p-4">
<Routes> <Routes>
<Route path="/" element={<LogViewerPage />} /> <Route path="/" element={<LogViewerPage />} />
<Route path="/models" element={<ModelPage />} /> <Route path="/models" element={<ModelPage />} />
<Route path="/activity" element={<ActivityPage />} /> <Route path="/activity" element={<ActivityPage />} />
<Route path="*" element={<Navigate to="/" replace />} /> <Route path="*" element={<Navigate to="/" replace />} />
</Routes> </Routes>
</main> </main>
</div> </div>
</APIProvider>
</Router> </Router>
); );
} }
+7 -17
View File
@@ -1,21 +1,11 @@
import { useAPI } from "../contexts/APIProvider"; import { useAPI } from "../contexts/APIProvider";
import { useEffect, useState, useMemo } from "react"; import { useMemo } from "react";
type ConnectionStatus = "disconnected" | "connecting" | "connected"; const ConnectionStatusIcon = () => {
const { connectionStatus } = useAPI();
const ConnectionStatus = () => {
const { getConnectionStatus } = useAPI();
const [eventStreamStatus, setEventStreamStatus] = useState<ConnectionStatus>("disconnected");
useEffect(() => {
const interval = setInterval(() => {
setEventStreamStatus(getConnectionStatus());
}, 1000);
return () => clearInterval(interval);
});
const eventStatusColor = useMemo(() => { const eventStatusColor = useMemo(() => {
switch (eventStreamStatus) { switch (connectionStatus) {
case "connected": case "connected":
return "bg-green-500"; return "bg-green-500";
case "connecting": case "connecting":
@@ -24,13 +14,13 @@ const ConnectionStatus = () => {
default: default:
return "bg-red-500"; return "bg-red-500";
} }
}, [eventStreamStatus]); }, [connectionStatus]);
return ( return (
<div className="flex items-center" title={`event stream: ${eventStreamStatus}`}> <div className="flex items-center" title={`event stream: ${connectionStatus}`}>
<span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span> <span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
</div> </div>
); );
}; };
export default ConnectionStatus; export default ConnectionStatusIcon;
+11 -14
View File
@@ -1,4 +1,5 @@
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react"; import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
import type { ConnectionState } from "../lib/types";
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown"; type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */ const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
@@ -20,7 +21,7 @@ interface APIProviderType {
proxyLogs: string; proxyLogs: string;
upstreamLogs: string; upstreamLogs: string;
metrics: Metrics[]; metrics: Metrics[];
getConnectionStatus: () => "connected" | "connecting" | "disconnected"; connectionStatus: ConnectionState;
} }
interface Metrics { interface Metrics {
@@ -53,6 +54,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const [proxyLogs, setProxyLogs] = useState(""); const [proxyLogs, setProxyLogs] = useState("");
const [upstreamLogs, setUpstreamLogs] = useState(""); const [upstreamLogs, setUpstreamLogs] = useState("");
const [metrics, setMetrics] = useState<Metrics[]>([]); const [metrics, setMetrics] = useState<Metrics[]>([]);
const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
const apiEventSource = useRef<EventSource | null>(null); const apiEventSource = useRef<EventSource | null>(null);
const [models, setModels] = useState<Model[]>([]); const [models, setModels] = useState<Model[]>([]);
@@ -64,16 +66,6 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
}); });
}, []); }, []);
const getConnectionStatus = useCallback(() => {
if (apiEventSource.current?.readyState === EventSource.OPEN) {
return "connected";
} else if (apiEventSource.current?.readyState === EventSource.CONNECTING) {
return "connecting";
} else {
return "disconnected";
}
}, []);
const enableAPIEvents = useCallback((enabled: boolean) => { const enableAPIEvents = useCallback((enabled: boolean) => {
if (!enabled) { if (!enabled) {
apiEventSource.current?.close(); apiEventSource.current?.close();
@@ -86,7 +78,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const initialDelay = 1000; // 1 second const initialDelay = 1000; // 1 second
const connect = () => { const connect = () => {
apiEventSource.current = null;
const eventSource = new EventSource("/api/events"); const eventSource = new EventSource("/api/events");
setConnectionState("connecting");
eventSource.onopen = () => { eventSource.onopen = () => {
// clear everything out on connect to keep things in sync // clear everything out on connect to keep things in sync
@@ -94,6 +88,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
setUpstreamLogs(""); setUpstreamLogs("");
setMetrics([]); // clear metrics on reconnect setMetrics([]); // clear metrics on reconnect
setModels([]); // clear models on reconnect setModels([]); // clear models on reconnect
apiEventSource.current = eventSource;
retryCount = 0;
setConnectionState("connected");
}; };
eventSource.onmessage = (e: MessageEvent) => { eventSource.onmessage = (e: MessageEvent) => {
@@ -138,14 +135,14 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
console.error(e.data, err); console.error(e.data, err);
} }
}; };
eventSource.onerror = () => { eventSource.onerror = () => {
eventSource.close(); eventSource.close();
retryCount++; retryCount++;
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000); const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
setConnectionState("disconnected");
setTimeout(connect, delay); setTimeout(connect, delay);
}; };
apiEventSource.current = eventSource;
}; };
connect(); connect();
@@ -213,7 +210,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
proxyLogs, proxyLogs,
upstreamLogs, upstreamLogs,
metrics, metrics,
getConnectionStatus, connectionStatus,
}), }),
[models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics] [models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
); );
+30 -1
View File
@@ -1,5 +1,6 @@
import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react"; import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
import { usePersistentState } from "../hooks/usePersistentState"; import { usePersistentState } from "../hooks/usePersistentState";
import type { ConnectionState } from "../lib/types";
type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl"; type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
type ThemeContextType = { type ThemeContextType = {
@@ -7,6 +8,11 @@ type ThemeContextType = {
screenWidth: ScreenWidth; screenWidth: ScreenWidth;
isNarrow: boolean; isNarrow: boolean;
toggleTheme: () => void; toggleTheme: () => void;
// for managing the window title and connection state information
appTitle: string;
setAppTitle: (title: string) => void;
setConnectionState: (state: ConnectionState) => void;
}; };
const ThemeContext = createContext<ThemeContextType | undefined>(undefined); const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
@@ -16,6 +22,17 @@ type ThemeProviderProps = {
}; };
export function ThemeProvider({ children }: ThemeProviderProps) { export function ThemeProvider({ children }: ThemeProviderProps) {
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
const [connectionState, setConnectionState] = useState<ConnectionState>("disconnected");
/**
* Set the document.title with informative information
*/
useEffect(() => {
const connectionIcon = connectionState === "connecting" ? "🟡" : connectionState === "connected" ? "🟢" : "🔴";
document.title = connectionIcon + " " + appTitle; // Set initial title
}, [appTitle, connectionState]);
const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false); const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
@@ -55,7 +72,19 @@ export function ThemeProvider({ children }: ThemeProviderProps) {
}, [screenWidth]); }, [screenWidth]);
return ( return (
<ThemeContext.Provider value={{ isDarkMode, toggleTheme, screenWidth, isNarrow }}>{children}</ThemeContext.Provider> <ThemeContext.Provider
value={{
isDarkMode,
toggleTheme,
screenWidth,
isNarrow,
appTitle,
setAppTitle,
setConnectionState,
}}
>
{children}
</ThemeContext.Provider>
); );
} }
+1
View File
@@ -0,0 +1 @@
export type ConnectionState = "connected" | "connecting" | "disconnected";
+4 -1
View File
@@ -3,11 +3,14 @@ import { createRoot } from "react-dom/client";
import "./index.css"; import "./index.css";
import App from "./App.tsx"; import App from "./App.tsx";
import { ThemeProvider } from "./contexts/ThemeProvider"; import { ThemeProvider } from "./contexts/ThemeProvider";
import { APIProvider } from "./contexts/APIProvider";
createRoot(document.getElementById("root")!).render( createRoot(document.getElementById("root")!).render(
<StrictMode> <StrictMode>
<ThemeProvider> <ThemeProvider>
<App /> <APIProvider>
<App />
</APIProvider>
</ThemeProvider> </ThemeProvider>
</StrictMode> </StrictMode>
); );