Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 57803fd3aa | |||
| c55d0cc842 | |||
| 7acbaf4712 |
@@ -18,9 +18,11 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
||||
- `v1/completions`
|
||||
- `v1/chat/completions`
|
||||
- `v1/embeddings`
|
||||
- `v1/rerank`, `v1/reranking`, `rerank`
|
||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||
- ✅ llama-server (llama.cpp) supported endpoints:
|
||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||
- `/infill` - for code infilling
|
||||
- ✅ llama-swap custom API endpoints
|
||||
- `/ui` - web UI
|
||||
- `/log` - remote log monitoring
|
||||
|
||||
@@ -129,6 +129,15 @@ models:
|
||||
# - recommended to stick to sampling parameters
|
||||
strip_params: "temperature, top_p, top_k"
|
||||
|
||||
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
||||
# - optional, default: 0
|
||||
# - useful for limiting the number of active parallel requests a model can process
|
||||
# - must be set per model
|
||||
# - any number greater than 0 will override the internal default value of 10
|
||||
# - any requests that exceeds the limit will receive an HTTP 429 Too Many Requests response
|
||||
# - recommended to be omitted and the default used
|
||||
concurrencyLimit: 0
|
||||
|
||||
# Unlisted model example:
|
||||
"qwen-unlisted":
|
||||
# unlisted: boolean, true or false
|
||||
|
||||
+28
-22
@@ -5,12 +5,20 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
type MetricsRecorder struct {
|
||||
metricsMonitor *MetricsMonitor
|
||||
realModelName string
|
||||
// isStreaming bool
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
|
||||
func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
@@ -41,49 +49,47 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||
metricsRecorder: &MetricsRecorder{
|
||||
metricsMonitor: pm.metricsMonitor,
|
||||
realModelName: realModelName,
|
||||
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
|
||||
startTime: time.Now(),
|
||||
},
|
||||
}
|
||||
c.Writer = writer
|
||||
c.Next()
|
||||
|
||||
rec := writer.metricsRecorder
|
||||
rec.processBody(writer.body)
|
||||
}
|
||||
}
|
||||
// check for streaming response
|
||||
if strings.Contains(c.Writer.Header().Get("Content-Type"), "text/event-stream") {
|
||||
writer.metricsRecorder.processStreamingResponse(writer.body)
|
||||
} else {
|
||||
writer.metricsRecorder.processNonStreamingResponse(writer.body)
|
||||
}
|
||||
|
||||
type MetricsRecorder struct {
|
||||
metricsMonitor *MetricsMonitor
|
||||
realModelName string
|
||||
isStreaming bool
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// processBody handles response processing after request completes
|
||||
func (rec *MetricsRecorder) processBody(body []byte) {
|
||||
if rec.isStreaming {
|
||||
rec.processStreamingResponse(body)
|
||||
} else {
|
||||
rec.processNonStreamingResponse(body)
|
||||
}
|
||||
}
|
||||
|
||||
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
||||
usage := jsonData.Get("usage")
|
||||
if !usage.Exists() {
|
||||
timings := jsonData.Get("timings")
|
||||
if !usage.Exists() && !timings.Exists() {
|
||||
return false
|
||||
}
|
||||
|
||||
// default values
|
||||
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
||||
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
||||
outputTokens := 0
|
||||
inputTokens := 0
|
||||
|
||||
// timings data
|
||||
tokensPerSecond := -1.0
|
||||
promptPerSecond := -1.0
|
||||
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
||||
|
||||
if usage.Exists() {
|
||||
outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
|
||||
inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
|
||||
}
|
||||
|
||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
||||
if timings := jsonData.Get("timings"); timings.Exists() {
|
||||
if timings.Exists() {
|
||||
inputTokens = int(jsonData.Get("timings.prompt_n").Int())
|
||||
outputTokens = int(jsonData.Get("timings.predicted_n").Int())
|
||||
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||
|
||||
@@ -191,11 +191,17 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
// Support legacy /v1/completions api, see issue #12
|
||||
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
||||
|
||||
// Support embeddings
|
||||
// Support embeddings and reranking
|
||||
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
|
||||
|
||||
// llama-server's /reranking endpoint + aliases
|
||||
pm.ginEngine.POST("/reranking", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
||||
|
||||
// llama-server's /infill endpoint for code infilling
|
||||
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
|
||||
|
||||
// Support audio/speech endpoint
|
||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||
|
||||
+53
-63
@@ -1,88 +1,78 @@
|
||||
import { useEffect, useCallback } from "react";
|
||||
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
|
||||
import { useTheme } from "./contexts/ThemeProvider";
|
||||
import { APIProvider } from "./contexts/APIProvider";
|
||||
import { useAPI } from "./contexts/APIProvider";
|
||||
import LogViewerPage from "./pages/LogViewer";
|
||||
import ModelPage from "./pages/Models";
|
||||
import ActivityPage from "./pages/Activity";
|
||||
import ConnectionStatus from "./components/ConnectionStatus";
|
||||
import ConnectionStatusIcon from "./components/ConnectionStatus";
|
||||
import { RiSunFill, RiMoonFill } from "react-icons/ri";
|
||||
import { usePersistentState } from "./hooks/usePersistentState";
|
||||
|
||||
function App() {
|
||||
const { isNarrow, toggleTheme, isDarkMode } = useTheme();
|
||||
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
|
||||
|
||||
const { isNarrow, toggleTheme, isDarkMode, appTitle, setAppTitle, setConnectionState } = useTheme();
|
||||
const handleTitleChange = useCallback(
|
||||
(newTitle: string) => {
|
||||
setAppTitle(newTitle);
|
||||
document.title = newTitle;
|
||||
setAppTitle(newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap");
|
||||
},
|
||||
[setAppTitle]
|
||||
);
|
||||
|
||||
const { connectionStatus } = useAPI();
|
||||
|
||||
// Synchronize the window.title connections state with the actual connection state
|
||||
useEffect(() => {
|
||||
document.title = appTitle; // Set initial title
|
||||
}, [appTitle]);
|
||||
setConnectionState(connectionStatus);
|
||||
}, [connectionStatus]);
|
||||
|
||||
return (
|
||||
<Router basename="/ui/">
|
||||
<APIProvider>
|
||||
<div className="flex flex-col h-screen">
|
||||
<nav className="bg-surface border-b border-border p-2 h-[75px]">
|
||||
<div className="flex items-center justify-between mx-auto px-4 h-full">
|
||||
{!isNarrow && (
|
||||
<h1
|
||||
contentEditable
|
||||
suppressContentEditableWarning
|
||||
className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
|
||||
onBlur={(e) =>
|
||||
handleTitleChange(e.currentTarget.textContent?.replace(/\n/g, "").trim() || "llama-swap")
|
||||
<div className="flex flex-col h-screen">
|
||||
<nav className="bg-surface border-b border-border p-2 h-[75px]">
|
||||
<div className="flex items-center justify-between mx-auto px-4 h-full">
|
||||
{!isNarrow && (
|
||||
<h1
|
||||
contentEditable
|
||||
suppressContentEditableWarning
|
||||
className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
|
||||
onBlur={(e) => handleTitleChange(e.currentTarget.textContent || "(set title)")}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") {
|
||||
e.preventDefault();
|
||||
handleTitleChange(e.currentTarget.textContent || "(set title)");
|
||||
e.currentTarget.blur();
|
||||
}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") {
|
||||
e.preventDefault();
|
||||
const sanitizedText =
|
||||
e.currentTarget.textContent?.replace(/\n/g, "").trim().substring(0, 25) || "llama-swap";
|
||||
handleTitleChange(sanitizedText);
|
||||
e.currentTarget.textContent = sanitizedText;
|
||||
e.currentTarget.blur();
|
||||
}
|
||||
}}
|
||||
>
|
||||
{appTitle}
|
||||
</h1>
|
||||
)}
|
||||
<div className="flex items-center space-x-4">
|
||||
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Logs
|
||||
</NavLink>
|
||||
|
||||
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Models
|
||||
</NavLink>
|
||||
|
||||
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Activity
|
||||
</NavLink>
|
||||
<button className="" onClick={toggleTheme}>
|
||||
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
|
||||
</button>
|
||||
<ConnectionStatus />
|
||||
</div>
|
||||
}}
|
||||
>
|
||||
{appTitle}
|
||||
</h1>
|
||||
)}
|
||||
<div className="flex items-center space-x-4">
|
||||
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Logs
|
||||
</NavLink>
|
||||
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Models
|
||||
</NavLink>
|
||||
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||
Activity
|
||||
</NavLink>
|
||||
<button className="" onClick={toggleTheme}>
|
||||
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
|
||||
</button>
|
||||
<ConnectionStatusIcon />
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<main className="flex-1 overflow-auto p-4">
|
||||
<Routes>
|
||||
<Route path="/" element={<LogViewerPage />} />
|
||||
<Route path="/models" element={<ModelPage />} />
|
||||
<Route path="/activity" element={<ActivityPage />} />
|
||||
<Route path="*" element={<Navigate to="/" replace />} />
|
||||
</Routes>
|
||||
</main>
|
||||
</div>
|
||||
</APIProvider>
|
||||
<main className="flex-1 overflow-auto p-4">
|
||||
<Routes>
|
||||
<Route path="/" element={<LogViewerPage />} />
|
||||
<Route path="/models" element={<ModelPage />} />
|
||||
<Route path="/activity" element={<ActivityPage />} />
|
||||
<Route path="*" element={<Navigate to="/" replace />} />
|
||||
</Routes>
|
||||
</main>
|
||||
</div>
|
||||
</Router>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,21 +1,11 @@
|
||||
import { useAPI } from "../contexts/APIProvider";
|
||||
import { useEffect, useState, useMemo } from "react";
|
||||
import { useMemo } from "react";
|
||||
|
||||
type ConnectionStatus = "disconnected" | "connecting" | "connected";
|
||||
|
||||
const ConnectionStatus = () => {
|
||||
const { getConnectionStatus } = useAPI();
|
||||
const [eventStreamStatus, setEventStreamStatus] = useState<ConnectionStatus>("disconnected");
|
||||
|
||||
useEffect(() => {
|
||||
const interval = setInterval(() => {
|
||||
setEventStreamStatus(getConnectionStatus());
|
||||
}, 1000);
|
||||
return () => clearInterval(interval);
|
||||
});
|
||||
const ConnectionStatusIcon = () => {
|
||||
const { connectionStatus } = useAPI();
|
||||
|
||||
const eventStatusColor = useMemo(() => {
|
||||
switch (eventStreamStatus) {
|
||||
switch (connectionStatus) {
|
||||
case "connected":
|
||||
return "bg-green-500";
|
||||
case "connecting":
|
||||
@@ -24,13 +14,13 @@ const ConnectionStatus = () => {
|
||||
default:
|
||||
return "bg-red-500";
|
||||
}
|
||||
}, [eventStreamStatus]);
|
||||
}, [connectionStatus]);
|
||||
|
||||
return (
|
||||
<div className="flex items-center" title={`event stream: ${eventStreamStatus}`}>
|
||||
<div className="flex items-center" title={`event stream: ${connectionStatus}`}>
|
||||
<span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectionStatus;
|
||||
export default ConnectionStatusIcon;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
|
||||
import type { ConnectionState } from "../lib/types";
|
||||
|
||||
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
|
||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||
@@ -20,7 +21,7 @@ interface APIProviderType {
|
||||
proxyLogs: string;
|
||||
upstreamLogs: string;
|
||||
metrics: Metrics[];
|
||||
getConnectionStatus: () => "connected" | "connecting" | "disconnected";
|
||||
connectionStatus: ConnectionState;
|
||||
}
|
||||
|
||||
interface Metrics {
|
||||
@@ -53,6 +54,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
const [proxyLogs, setProxyLogs] = useState("");
|
||||
const [upstreamLogs, setUpstreamLogs] = useState("");
|
||||
const [metrics, setMetrics] = useState<Metrics[]>([]);
|
||||
const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
|
||||
const apiEventSource = useRef<EventSource | null>(null);
|
||||
|
||||
const [models, setModels] = useState<Model[]>([]);
|
||||
@@ -64,16 +66,6 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
});
|
||||
}, []);
|
||||
|
||||
const getConnectionStatus = useCallback(() => {
|
||||
if (apiEventSource.current?.readyState === EventSource.OPEN) {
|
||||
return "connected";
|
||||
} else if (apiEventSource.current?.readyState === EventSource.CONNECTING) {
|
||||
return "connecting";
|
||||
} else {
|
||||
return "disconnected";
|
||||
}
|
||||
}, []);
|
||||
|
||||
const enableAPIEvents = useCallback((enabled: boolean) => {
|
||||
if (!enabled) {
|
||||
apiEventSource.current?.close();
|
||||
@@ -86,7 +78,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
const initialDelay = 1000; // 1 second
|
||||
|
||||
const connect = () => {
|
||||
apiEventSource.current = null;
|
||||
const eventSource = new EventSource("/api/events");
|
||||
setConnectionState("connecting");
|
||||
|
||||
eventSource.onopen = () => {
|
||||
// clear everything out on connect to keep things in sync
|
||||
@@ -94,6 +88,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
setUpstreamLogs("");
|
||||
setMetrics([]); // clear metrics on reconnect
|
||||
setModels([]); // clear models on reconnect
|
||||
apiEventSource.current = eventSource;
|
||||
retryCount = 0;
|
||||
setConnectionState("connected");
|
||||
};
|
||||
|
||||
eventSource.onmessage = (e: MessageEvent) => {
|
||||
@@ -138,14 +135,14 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
console.error(e.data, err);
|
||||
}
|
||||
};
|
||||
|
||||
eventSource.onerror = () => {
|
||||
eventSource.close();
|
||||
retryCount++;
|
||||
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
|
||||
setConnectionState("disconnected");
|
||||
setTimeout(connect, delay);
|
||||
};
|
||||
|
||||
apiEventSource.current = eventSource;
|
||||
};
|
||||
|
||||
connect();
|
||||
@@ -213,7 +210,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
||||
proxyLogs,
|
||||
upstreamLogs,
|
||||
metrics,
|
||||
getConnectionStatus,
|
||||
connectionStatus,
|
||||
}),
|
||||
[models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
|
||||
);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
|
||||
import { usePersistentState } from "../hooks/usePersistentState";
|
||||
import type { ConnectionState } from "../lib/types";
|
||||
|
||||
type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
|
||||
type ThemeContextType = {
|
||||
@@ -7,6 +8,11 @@ type ThemeContextType = {
|
||||
screenWidth: ScreenWidth;
|
||||
isNarrow: boolean;
|
||||
toggleTheme: () => void;
|
||||
|
||||
// for managing the window title and connection state information
|
||||
appTitle: string;
|
||||
setAppTitle: (title: string) => void;
|
||||
setConnectionState: (state: ConnectionState) => void;
|
||||
};
|
||||
|
||||
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
|
||||
@@ -16,6 +22,17 @@ type ThemeProviderProps = {
|
||||
};
|
||||
|
||||
export function ThemeProvider({ children }: ThemeProviderProps) {
|
||||
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
|
||||
const [connectionState, setConnectionState] = useState<ConnectionState>("disconnected");
|
||||
|
||||
/**
|
||||
* Set the document.title with informative information
|
||||
*/
|
||||
useEffect(() => {
|
||||
const connectionIcon = connectionState === "connecting" ? "🟡" : connectionState === "connected" ? "🟢" : "🔴";
|
||||
document.title = connectionIcon + " " + appTitle; // Set initial title
|
||||
}, [appTitle, connectionState]);
|
||||
|
||||
const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
|
||||
const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
|
||||
|
||||
@@ -55,7 +72,19 @@ export function ThemeProvider({ children }: ThemeProviderProps) {
|
||||
}, [screenWidth]);
|
||||
|
||||
return (
|
||||
<ThemeContext.Provider value={{ isDarkMode, toggleTheme, screenWidth, isNarrow }}>{children}</ThemeContext.Provider>
|
||||
<ThemeContext.Provider
|
||||
value={{
|
||||
isDarkMode,
|
||||
toggleTheme,
|
||||
screenWidth,
|
||||
isNarrow,
|
||||
appTitle,
|
||||
setAppTitle,
|
||||
setConnectionState,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</ThemeContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export type ConnectionState = "connected" | "connecting" | "disconnected";
|
||||
+4
-1
@@ -3,11 +3,14 @@ import { createRoot } from "react-dom/client";
|
||||
import "./index.css";
|
||||
import App from "./App.tsx";
|
||||
import { ThemeProvider } from "./contexts/ThemeProvider";
|
||||
import { APIProvider } from "./contexts/APIProvider";
|
||||
|
||||
createRoot(document.getElementById("root")!).render(
|
||||
<StrictMode>
|
||||
<ThemeProvider>
|
||||
<App />
|
||||
<APIProvider>
|
||||
<App />
|
||||
</APIProvider>
|
||||
</ThemeProvider>
|
||||
</StrictMode>
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user