Compare commits

...

5 Commits

Author SHA1 Message Date
Benson Wong c36986fef6 upstream handler support for model names with forward slash (#298)
The upstream handler would break on model IDs that contained a forward
slash. Model IDs like "aaa/bbb" called at upstream/aaa/bbb would result
in an error. This commit adds support for model IDs with a forward slash
by iteratively searching the path for a match.

Fixes: #229
2025-09-13 13:37:03 -07:00
Artur Podsiadły 558801db1a Fix nginx proxy buffering for streaming endpoints (#295)
* Fix nginx proxy buffering for streaming endpoints

- Add X-Accel-Buffering: no header to SSE endpoints (/api/events, /logs/stream)
- Add X-Accel-Buffering: no header to proxied text/event-stream responses
- Add nginx reverse proxy configuration section to README
- Add tests for X-Accel-Buffering header on streaming endpoints

Fixes #236

* Fix goroutine cleanup in streaming endpoints test

Add context cancellation to TestProxyManager_StreamingEndpointsReturnNoBufferingHeader
to ensure the goroutine is properly cleaned up when the test completes.
2025-09-09 16:07:46 -07:00
Benson Wong b21dee27c1 Fix #288 Vite hot module reloading creating multiple SSE connections (#290)
- move SSE (EventSource) connection to module level
- manage EventSource as a singleton, closing open connection before
  reopening a new one
2025-09-07 21:48:58 -07:00
Benson Wong f58c8c8ec5 Support llama.cpp's cache_n in timings info (#287)
Capture prompt cache metrics and surface them on Activities page in UI
2025-09-06 13:58:02 -07:00
Benson Wong 954e2dee73 Remove cmdStart from README [skip ci]
cmdStart was in the README but it doesn't exist. Fixed the typo. Oops.
2025-09-04 11:57:28 -07:00
10 changed files with 217 additions and 29 deletions
+25 -1
View File
@@ -34,7 +34,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
- ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107)) - ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
- ✅ Automatic unloading of models after timeout by setting a `ttl` - ✅ Automatic unloading of models after timeout by setting a `ttl`
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc) - ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
- ✅ Reliable Docker and Podman support with `cmdStart` and `cmdStop` - ✅ Reliable Docker and Podman support using `cmd` and `cmdStop` together
- ✅ Full control over server settings per model - ✅ Full control over server settings per model
- ✅ Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235)) - ✅ Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
@@ -73,6 +73,30 @@ However, there are many more capabilities that llama-swap supports:
See the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration) in the wiki all options and examples. See the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration) in the wiki all options and examples.
## Reverse Proxy Configuration (nginx)
If you deploy llama-swap behind nginx, disable response buffering for streaming endpoints. By default, nginx buffers responses which breaks ServerSent Events (SSE) and streaming chat completion. ([#236](https://github.com/mostlygeek/llama-swap/issues/236))
Recommended nginx configuration snippets:
```nginx
# SSE for UI events/logs
location /api/events {
proxy_pass http://your-llama-swap-backend;
proxy_buffering off;
proxy_cache off;
}
# Streaming chat completions (stream=true)
location /v1/chat/completions {
proxy_pass http://your-llama-swap-backend;
proxy_buffering off;
proxy_cache off;
}
```
As a safeguard, llama-swap also sets `X-Accel-Buffering: no` on SSE responses. However, explicitly disabling `proxy_buffering` at your reverse proxy is still recommended for reliable streaming behavior.
## Web UI ## Web UI
llama-swap includes a real time web interface for monitoring logs and models: llama-swap includes a real time web interface for monitoring logs and models:
+6 -1
View File
@@ -61,7 +61,6 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
} else { } else {
writer.metricsRecorder.processNonStreamingResponse(writer.body) writer.metricsRecorder.processNonStreamingResponse(writer.body)
} }
} }
} }
@@ -73,6 +72,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
} }
// default values // default values
cachedTokens := -1 // unknown or missing data
outputTokens := 0 outputTokens := 0
inputTokens := 0 inputTokens := 0
@@ -93,11 +93,16 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float() promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float() tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float()) durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() {
cachedTokens = int(cachedValue.Int())
}
} }
rec.metricsMonitor.addMetrics(TokenMetrics{ rec.metricsMonitor.addMetrics(TokenMetrics{
Timestamp: time.Now(), Timestamp: time.Now(),
Model: rec.realModelName, Model: rec.realModelName,
CachedTokens: cachedTokens,
InputTokens: inputTokens, InputTokens: inputTokens,
OutputTokens: outputTokens, OutputTokens: outputTokens,
PromptPerSecond: promptPerSecond, PromptPerSecond: promptPerSecond,
+1 -1
View File
@@ -13,6 +13,7 @@ type TokenMetrics struct {
ID int `json:"id"` ID int `json:"id"`
Timestamp time.Time `json:"timestamp"` Timestamp time.Time `json:"timestamp"`
Model string `json:"model"` Model string `json:"model"`
CachedTokens int `json:"cache_tokens"`
InputTokens int `json:"input_tokens"` InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"` OutputTokens int `json:"output_tokens"`
PromptPerSecond float64 `json:"prompt_per_second"` PromptPerSecond float64 `json:"prompt_per_second"`
@@ -61,7 +62,6 @@ func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) {
if len(mp.metrics) > mp.maxMetrics { if len(mp.metrics) > mp.maxMetrics {
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:] mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
} }
event.Emit(TokenMetricsEvent{Metrics: metric}) event.Emit(TokenMetricsEvent{Metrics: metric})
} }
+4
View File
@@ -458,6 +458,10 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
w.Header().Add(k, v) w.Header().Add(k, v)
} }
} }
// prevent nginx from buffering streaming responses (e.g., SSE)
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
w.Header().Set("X-Accel-Buffering", "no")
}
w.WriteHeader(resp.StatusCode) w.WriteHeader(resp.StatusCode)
// faster than io.Copy when streaming // faster than io.Copy when streaming
+34 -6
View File
@@ -227,7 +227,7 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.GET("/upstream", func(c *gin.Context) { pm.ginEngine.GET("/upstream", func(c *gin.Context) {
c.Redirect(http.StatusFound, "/ui/models") c.Redirect(http.StatusFound, "/ui/models")
}) })
pm.ginEngine.Any("/upstream/:model_id/*upstreamPath", pm.proxyToUpstream) pm.ginEngine.Any("/upstream/*upstreamPath", pm.proxyToUpstream)
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler) pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler) pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
@@ -393,24 +393,52 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
} }
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) { func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
requestedModel := c.Param("model_id") upstreamPath := c.Param("upstreamPath")
if requestedModel == "" { // split the upstream path by / and search for the model name
parts := strings.Split(strings.TrimSpace(upstreamPath), "/")
if len(parts) == 0 {
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path") pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
return return
} }
processGroup, realModelName, err := pm.swapProcessGroup(requestedModel) modelFound := false
searchModelName := ""
var modelName, remainingPath string
for i, part := range parts {
if parts[i] == "" {
continue
}
if searchModelName == "" {
searchModelName = part
} else {
searchModelName = searchModelName + "/" + parts[i]
}
if real, ok := pm.config.RealModelName(searchModelName); ok {
modelName = real
remainingPath = "/" + strings.Join(parts[i+1:], "/")
modelFound = true
break
}
}
if !modelFound {
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
return
}
processGroup, realModelName, err := pm.swapProcessGroup(modelName)
if err != nil { if err != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error())) pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
return return
} }
// rewrite the path // rewrite the path
c.Request.URL.Path = c.Param("upstreamPath") c.Request.URL.Path = remainingPath
processGroup.ProxyRequest(realModelName, c.Writer, c.Request) processGroup.ProxyRequest(realModelName, c.Writer, c.Request)
} }
func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) { func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
bodyBytes, err := io.ReadAll(c.Request.Body) bodyBytes, err := io.ReadAll(c.Request.Body)
if err != nil { if err != nil {
+2
View File
@@ -100,6 +100,8 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
c.Header("Cache-Control", "no-cache") c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive") c.Header("Connection", "keep-alive")
c.Header("X-Content-Type-Options", "nosniff") c.Header("X-Content-Type-Options", "nosniff")
// prevent nginx from buffering SSE
c.Header("X-Accel-Buffering", "no")
sendBuffer := make(chan messageEnvelope, 25) sendBuffer := make(chan messageEnvelope, 25)
ctx, cancel := context.WithCancel(c.Request.Context()) ctx, cancel := context.WithCancel(c.Request.Context())
+2
View File
@@ -28,6 +28,8 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
c.Header("Content-Type", "text/plain") c.Header("Content-Type", "text/plain")
c.Header("Transfer-Encoding", "chunked") c.Header("Transfer-Encoding", "chunked")
c.Header("X-Content-Type-Options", "nosniff") c.Header("X-Content-Type-Options", "nosniff")
// prevent nginx from buffering streamed logs
c.Header("X-Accel-Buffering", "no")
logMonitorId := c.Param("logMonitorID") logMonitorId := c.Param("logMonitorID")
logger, err := pm.getLogger(logMonitorId) logger, err := pm.getLogger(logMonitorId)
+65
View File
@@ -2,6 +2,7 @@ package proxy
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math/rand" "math/rand"
@@ -913,3 +914,67 @@ models:
assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model1"].CurrentState()) assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model1"].CurrentState())
assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model2"].CurrentState()) assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model2"].CurrentState())
} }
func TestProxyManager_StreamingEndpointsReturnNoBufferingHeader(t *testing.T) {
config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
"model1": getTestSimpleResponderConfig("model1"),
},
LogLevel: "error",
})
proxy := New(config)
defer proxy.StopProcesses(StopWaitForInflightRequest)
endpoints := []string{
"/api/events",
"/logs/stream",
"/logs/stream/proxy",
"/logs/stream/upstream",
}
for _, endpoint := range endpoints {
t.Run(endpoint, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
req := httptest.NewRequest("GET", endpoint, nil)
req = req.WithContext(ctx)
rec := httptest.NewRecorder()
// We don't need the handler to fully complete, just to set the headers
// so run it in a goroutine and check the headers after a short delay
go proxy.ServeHTTP(rec, req)
time.Sleep(10 * time.Millisecond) // give it time to start and write headers
assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "no", rec.Header().Get("X-Accel-Buffering"))
})
}
}
func TestProxyManager_ProxiedStreamingEndpointReturnsNoBufferingHeader(t *testing.T) {
config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
"streaming-model": getTestSimpleResponderConfig("streaming-model"),
},
LogLevel: "error",
})
proxy := New(config)
defer proxy.StopProcesses(StopWaitForInflightRequest)
// Make a streaming request
reqBody := `{"model":"streaming-model"}`
// simple-responder will return text/event-stream when stream=true is in the query
req := httptest.NewRequest("POST", "/v1/chat/completions?stream=true", bytes.NewBufferString(reqBody))
rec := httptest.NewRecorder()
proxy.ServeHTTP(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "no", rec.Header().Get("X-Accel-Buffering"))
assert.Contains(t, rec.Header().Get("Content-Type"), "text/event-stream")
}
+14 -11
View File
@@ -1,4 +1,4 @@
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react"; import { createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
import type { ConnectionState } from "../lib/types"; import type { ConnectionState } from "../lib/types";
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown"; type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
@@ -28,6 +28,7 @@ interface Metrics {
id: number; id: number;
timestamp: string; timestamp: string;
model: string; model: string;
cache_tokens: number;
input_tokens: number; input_tokens: number;
output_tokens: number; output_tokens: number;
prompt_per_second: number; prompt_per_second: number;
@@ -50,12 +51,14 @@ type APIProviderProps = {
autoStartAPIEvents?: boolean; autoStartAPIEvents?: boolean;
}; };
let apiEventSource: EventSource | null = null;
export function APIProvider({ children, autoStartAPIEvents = true }: APIProviderProps) { export function APIProvider({ children, autoStartAPIEvents = true }: APIProviderProps) {
const [proxyLogs, setProxyLogs] = useState(""); const [proxyLogs, setProxyLogs] = useState("");
const [upstreamLogs, setUpstreamLogs] = useState(""); const [upstreamLogs, setUpstreamLogs] = useState("");
const [metrics, setMetrics] = useState<Metrics[]>([]); const [metrics, setMetrics] = useState<Metrics[]>([]);
const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected"); const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
const apiEventSource = useRef<EventSource | null>(null); //const apiEventSource = useRef<EventSource | null>(null);
const [models, setModels] = useState<Model[]>([]); const [models, setModels] = useState<Model[]>([]);
@@ -68,8 +71,8 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const enableAPIEvents = useCallback((enabled: boolean) => { const enableAPIEvents = useCallback((enabled: boolean) => {
if (!enabled) { if (!enabled) {
apiEventSource.current?.close(); apiEventSource?.close();
apiEventSource.current = null; apiEventSource = null;
setMetrics([]); setMetrics([]);
return; return;
} }
@@ -78,22 +81,22 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
const initialDelay = 1000; // 1 second const initialDelay = 1000; // 1 second
const connect = () => { const connect = () => {
apiEventSource.current = null; apiEventSource?.close();
const eventSource = new EventSource("/api/events"); apiEventSource = new EventSource("/api/events");
setConnectionState("connecting"); setConnectionState("connecting");
eventSource.onopen = () => { apiEventSource.onopen = () => {
// clear everything out on connect to keep things in sync // clear everything out on connect to keep things in sync
setProxyLogs(""); setProxyLogs("");
setUpstreamLogs(""); setUpstreamLogs("");
setMetrics([]); // clear metrics on reconnect setMetrics([]); // clear metrics on reconnect
setModels([]); // clear models on reconnect setModels([]); // clear models on reconnect
apiEventSource.current = eventSource;
retryCount = 0; retryCount = 0;
setConnectionState("connected"); setConnectionState("connected");
}; };
eventSource.onmessage = (e: MessageEvent) => { apiEventSource.onmessage = (e: MessageEvent) => {
try { try {
const message = JSON.parse(e.data) as APIEventEnvelope; const message = JSON.parse(e.data) as APIEventEnvelope;
switch (message.type) { switch (message.type) {
@@ -136,8 +139,8 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
} }
}; };
eventSource.onerror = () => { apiEventSource.onerror = () => {
eventSource.close(); apiEventSource?.close();
retryCount++; retryCount++;
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000); const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
setConnectionState("disconnected"); setConnectionState("disconnected");
+64 -9
View File
@@ -1,10 +1,6 @@
import { useMemo } from "react"; import { useMemo } from "react";
import { useAPI } from "../contexts/APIProvider"; import { useAPI } from "../contexts/APIProvider";
const formatTimestamp = (timestamp: string): string => {
return new Date(timestamp).toLocaleString();
};
const formatSpeed = (speed: number): string => { const formatSpeed = (speed: number): string => {
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s"; return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
}; };
@@ -13,6 +9,33 @@ const formatDuration = (ms: number): string => {
return (ms / 1000).toFixed(2) + "s"; return (ms / 1000).toFixed(2) + "s";
}; };
const formatRelativeTime = (timestamp: string): string => {
const now = new Date();
const date = new Date(timestamp);
const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000);
// Handle future dates by returning "just now"
if (diffInSeconds < 5) {
return "now";
}
if (diffInSeconds < 60) {
return `${diffInSeconds}s ago`;
}
const diffInMinutes = Math.floor(diffInSeconds / 60);
if (diffInMinutes < 60) {
return `${diffInMinutes}m ago`;
}
const diffInHours = Math.floor(diffInMinutes / 60);
if (diffInHours < 24) {
return `${diffInHours}h ago`;
}
return "a while ago";
};
const ActivityPage = () => { const ActivityPage = () => {
const { metrics } = useAPI(); const { metrics } = useAPI();
const sortedMetrics = useMemo(() => { const sortedMetrics = useMemo(() => {
@@ -32,11 +55,16 @@ const ActivityPage = () => {
<table className="min-w-full divide-y"> <table className="min-w-full divide-y">
<thead> <thead>
<tr> <tr>
<th className="px-4 py-3 text-left text-xs font-medium uppercase tracking-wider">Id</th> <th className="px-4 py-3 text-left text-xs font-medium uppercase tracking-wider">ID</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Timestamp</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Time</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th> Cached <Tooltip content="prompt tokens from cache" />
</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">
Prompt <Tooltip content="new prompt tokens processed" />
</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generated</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Prompt Processing</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Prompt Processing</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th> <th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
@@ -46,8 +74,11 @@ const ActivityPage = () => {
{sortedMetrics.map((metric) => ( {sortedMetrics.map((metric) => (
<tr key={`metric_${metric.id}`}> <tr key={`metric_${metric.id}`}>
<td className="px-4 py-4 whitespace-nowrap text-sm">{metric.id + 1 /* un-zero index */}</td> <td className="px-4 py-4 whitespace-nowrap text-sm">{metric.id + 1 /* un-zero index */}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatTimestamp(metric.timestamp)}</td> <td className="px-6 py-4 whitespace-nowrap text-sm">{formatRelativeTime(metric.timestamp)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td> <td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">
{metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"}
</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td> <td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td> <td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.prompt_per_second)}</td> <td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.prompt_per_second)}</td>
@@ -63,4 +94,28 @@ const ActivityPage = () => {
); );
}; };
interface TooltipProps {
content: string;
}
const Tooltip: React.FC<TooltipProps> = ({ content }) => {
return (
<div className="relative group inline-block">
<div
className="absolute top-full left-1/2 transform -translate-x-1/2 mt-2
px-3 py-2 bg-gray-900 text-white text-sm rounded-md
opacity-0 group-hover:opacity-100 transition-opacity
duration-200 pointer-events-none whitespace-nowrap z-50 normal-case"
>
{content}
<div
className="absolute bottom-full left-1/2 transform -translate-x-1/2
border-4 border-transparent border-b-gray-900"
></div>
</div>
</div>
);
};
export default ActivityPage; export default ActivityPage;