Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f58c8c8ec5 | |||
| 954e2dee73 | |||
| a533aec736 | |||
| 97b17fc47d | |||
| 2457840698 | |||
| 7f55494151 | |||
| 831a90d3b0 | |||
| 977f1856bb | |||
| 52b329f7bc |
@@ -7,7 +7,7 @@
|
||||
|
||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||
|
||||
Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
|
||||
Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary, a provided docker images or Homebrew.
|
||||
|
||||
## Features:
|
||||
|
||||
@@ -23,6 +23,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
||||
- ✅ llama-server (llama.cpp) supported endpoints:
|
||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||
- `/infill` - for code infilling
|
||||
- `/completion` - for completion endpoint
|
||||
- ✅ llama-swap custom API endpoints
|
||||
- `/ui` - web UI
|
||||
- `/log` - remote log monitoring
|
||||
@@ -33,7 +34,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
||||
- ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
||||
- ✅ Automatic unloading of models after timeout by setting a `ttl`
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
||||
- ✅ Reliable Docker and Podman support with `cmdStart` and `cmdStop`
|
||||
- ✅ Reliable Docker and Podman support using `cmd` and `cmdStop` together
|
||||
- ✅ Full control over server settings per model
|
||||
- ✅ Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
|
||||
|
||||
@@ -206,4 +207,7 @@ For Python based inference servers like vllm or tabbyAPI it is recommended to ru
|
||||
|
||||
## Star History
|
||||
|
||||
> [!NOTE]
|
||||
> ⭐️ Star this project to help others discover it!
|
||||
|
||||
[](https://www.star-history.com/#mostlygeek/llama-swap&Date)
|
||||
|
||||
+6
-6
@@ -49,8 +49,8 @@ macros:
|
||||
# - required
|
||||
# - each key is the model's ID, used in API requests
|
||||
# - model settings have default values that are used if they are not defined here
|
||||
# - below are examples of the various settings a model can have:
|
||||
# - available model settings: env, cmd, cmdStop, proxy, aliases, checkEndpoint, ttl, unlisted
|
||||
# - the model's ID is available in the ${MODEL_ID} macro, also available in macros defined above
|
||||
# - below are examples of the all the settings a model can have
|
||||
models:
|
||||
|
||||
# keys are the model names used in API requests
|
||||
@@ -148,12 +148,12 @@ models:
|
||||
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||
|
||||
# Docker example:
|
||||
# container run times like Docker and Podman can be used reliably with a
|
||||
# a combination of cmd and cmdStop.
|
||||
# container runtimes like Docker and Podman can be used reliably with
|
||||
# a combination of cmd, cmdStop, and ${MODEL_ID}
|
||||
"docker-llama":
|
||||
proxy: "http://127.0.0.1:${PORT}"
|
||||
cmd: |
|
||||
docker run --name dockertest
|
||||
docker run --name ${MODEL_ID}
|
||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||
ghcr.io/ggml-org/llama.cpp:server
|
||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||
@@ -167,7 +167,7 @@ models:
|
||||
# - on POSIX systems: a SIGTERM signal is sent
|
||||
# - on Windows, calls taskkill to stop the process
|
||||
# - processes have 5 seconds to shutdown until forceful termination is attempted
|
||||
cmdStop: docker stop dockertest
|
||||
cmdStop: docker stop ${MODEL_ID}
|
||||
|
||||
# groups: a dictionary of group settings
|
||||
# - optional, default: empty dictionary
|
||||
|
||||
@@ -153,6 +153,19 @@ func main() {
|
||||
|
||||
})
|
||||
|
||||
// llama-server compatibility: /completion
|
||||
r.POST("/completion", func(c *gin.Context) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"responseMessage": *responseMessage,
|
||||
"usage": gin.H{
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 25,
|
||||
"total_tokens": 35,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// issue #41
|
||||
r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
|
||||
// Parse the multipart form
|
||||
|
||||
+7
-1
@@ -237,7 +237,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
|
||||
- name must fit the regex ^[a-zA-Z0-9_-]+$
|
||||
- names must be less than 64 characters (no reason, just cause)
|
||||
- name can not be any reserved macros: PORT
|
||||
- name can not be any reserved macros: PORT, MODEL_ID
|
||||
- macro values must be less than 1024 characters
|
||||
*/
|
||||
macroNameRegex := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
||||
@@ -253,6 +253,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
}
|
||||
switch macroName {
|
||||
case "PORT":
|
||||
case "MODEL_ID":
|
||||
return Config{}, fmt.Errorf("macro name '%s' is reserved and cannot be used", macroName)
|
||||
}
|
||||
}
|
||||
@@ -296,6 +297,11 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
nextPort++
|
||||
}
|
||||
|
||||
if strings.Contains(modelConfig.Cmd, "${MODEL_ID}") || strings.Contains(modelConfig.CmdStop, "${MODEL_ID}") {
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${MODEL_ID}", modelId)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, "${MODEL_ID}", modelId)
|
||||
}
|
||||
|
||||
// make sure there are no unknown macros that have not been replaced
|
||||
macroPattern := regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
||||
fieldMap := map[string]string{
|
||||
|
||||
@@ -440,3 +440,44 @@ models:
|
||||
expectedCmd := "/user/llama.cpp/build/bin/llama-server --port 9990 --model /path/to/model.gguf -ngl 99"
|
||||
assert.Equal(t, expectedCmd, cmdStr, "Final command does not match expected structure")
|
||||
}
|
||||
|
||||
func TestConfig_MacroModelId(t *testing.T) {
|
||||
content := `
|
||||
startPort: 9000
|
||||
macros:
|
||||
"docker-llama": docker run --name ${MODEL_ID} -p ${PORT}:8080 docker_img
|
||||
"docker-stop": docker stop ${MODEL_ID}
|
||||
|
||||
models:
|
||||
model1:
|
||||
cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
|
||||
|
||||
model2:
|
||||
cmd: ${docker-llama}
|
||||
cmdStop: ${docker-stop}
|
||||
|
||||
author/model:F16:
|
||||
cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
|
||||
cmdStop: stop
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
sanitizedCmd, err := SanitizeCommand(config.Models["model1"].Cmd)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "/path/to/server -p 9001 -hf model1", strings.Join(sanitizedCmd, " "))
|
||||
|
||||
assert.Equal(t, "docker stop ${MODEL_ID}", config.Macros["docker-stop"])
|
||||
|
||||
sanitizedCmd2, err := SanitizeCommand(config.Models["model2"].Cmd)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "docker run --name model2 -p 9002:8080 docker_img", strings.Join(sanitizedCmd2, " "))
|
||||
|
||||
sanitizedCmdStop, err := SanitizeCommand(config.Models["model2"].CmdStop)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "docker stop model2", strings.Join(sanitizedCmdStop, " "))
|
||||
|
||||
sanitizedCmd3, err := SanitizeCommand(config.Models["author/model:F16"].Cmd)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "/path/to/server -p 9000 -hf author/model:F16", strings.Join(sanitizedCmd3, " "))
|
||||
}
|
||||
|
||||
@@ -61,7 +61,6 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||
} else {
|
||||
writer.metricsRecorder.processNonStreamingResponse(writer.body)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +72,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
||||
}
|
||||
|
||||
// default values
|
||||
cachedTokens := -1 // unknown or missing data
|
||||
outputTokens := 0
|
||||
inputTokens := 0
|
||||
|
||||
@@ -93,11 +93,16 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
||||
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||
|
||||
if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() {
|
||||
cachedTokens = int(cachedValue.Int())
|
||||
}
|
||||
}
|
||||
|
||||
rec.metricsMonitor.addMetrics(TokenMetrics{
|
||||
Timestamp: time.Now(),
|
||||
Model: rec.realModelName,
|
||||
CachedTokens: cachedTokens,
|
||||
InputTokens: inputTokens,
|
||||
OutputTokens: outputTokens,
|
||||
PromptPerSecond: promptPerSecond,
|
||||
|
||||
@@ -13,6 +13,7 @@ type TokenMetrics struct {
|
||||
ID int `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Model string `json:"model"`
|
||||
CachedTokens int `json:"cache_tokens"`
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||
@@ -61,7 +62,6 @@ func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) {
|
||||
if len(mp.metrics) > mp.maxMetrics {
|
||||
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
||||
}
|
||||
|
||||
event.Emit(TokenMetricsEvent{Metrics: metric})
|
||||
}
|
||||
|
||||
|
||||
+12
-1
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os/exec"
|
||||
@@ -363,8 +364,18 @@ func (p *Process) stopCommand() {
|
||||
}
|
||||
|
||||
func (p *Process) checkHealthEndpoint(healthURL string) error {
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
// wait a short time for a tcp connection to be established
|
||||
Transport: &http.Transport{
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
}).DialContext,
|
||||
},
|
||||
|
||||
// give a long time to respond to the health check endpoint
|
||||
// after the connection is established. See issue: 276
|
||||
Timeout: 5000 * time.Millisecond,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", healthURL, nil)
|
||||
|
||||
@@ -60,10 +60,20 @@ func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter,
|
||||
if pg.swap {
|
||||
pg.Lock()
|
||||
if pg.lastUsedProcess != modelID {
|
||||
|
||||
// is there something already running?
|
||||
if pg.lastUsedProcess != "" {
|
||||
pg.processes[pg.lastUsedProcess].Stop()
|
||||
}
|
||||
|
||||
// wait for the request to the new model to be fully handled
|
||||
// and prevent race conditions see issue #277
|
||||
pg.processes[modelID].ProxyRequest(writer, request)
|
||||
pg.lastUsedProcess = modelID
|
||||
|
||||
// short circuit and exit
|
||||
pg.Unlock()
|
||||
return nil
|
||||
}
|
||||
pg.Unlock()
|
||||
}
|
||||
|
||||
+34
-16
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -44,32 +45,49 @@ func TestProcessGroup_HasMember(t *testing.T) {
|
||||
assert.False(t, pg.HasMember("model3"))
|
||||
}
|
||||
|
||||
func TestProcessGroup_ProxyRequestSwapIsTrue(t *testing.T) {
|
||||
// TestProcessGroup_ProxyRequestSwapIsTrueParallel tests that when swap is true
|
||||
// and multiple requests are made in parallel, only one process is running at a time.
|
||||
func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
|
||||
var processGroupTestConfig = AddDefaultGroupToConfig(Config{
|
||||
HealthCheckTimeout: 15,
|
||||
Models: map[string]ModelConfig{
|
||||
// use the same listening so if a model is already running, it will fail
|
||||
// this is a way to test that swap isolation is working
|
||||
// properly when there are parallel requests made at the
|
||||
// same time.
|
||||
"model1": getTestSimpleResponderConfigPort("model1", 9832),
|
||||
"model2": getTestSimpleResponderConfigPort("model2", 9832),
|
||||
"model3": getTestSimpleResponderConfigPort("model3", 9832),
|
||||
"model4": getTestSimpleResponderConfigPort("model4", 9832),
|
||||
"model5": getTestSimpleResponderConfigPort("model5", 9832),
|
||||
},
|
||||
Groups: map[string]GroupConfig{
|
||||
"G1": {
|
||||
Swap: true,
|
||||
Members: []string{"model1", "model2", "model3", "model4", "model5"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger)
|
||||
defer pg.StopProcesses(StopWaitForInflightRequest)
|
||||
|
||||
tests := []string{"model1", "model2"}
|
||||
tests := []string{"model1", "model2", "model3", "model4", "model5"}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(len(tests))
|
||||
for _, modelName := range tests {
|
||||
t.Run(modelName, func(t *testing.T) {
|
||||
reqBody := `{"x", "y"}`
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||
go func(modelName string) {
|
||||
defer wg.Done()
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
assert.NoError(t, pg.ProxyRequest(modelName, w, req))
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
assert.Contains(t, w.Body.String(), modelName)
|
||||
|
||||
// make sure only one process is in the running state
|
||||
count := 0
|
||||
for _, process := range pg.processes {
|
||||
if process.CurrentState() == StateReady {
|
||||
count++
|
||||
}
|
||||
}
|
||||
assert.Equal(t, 1, count)
|
||||
})
|
||||
}(modelName)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) {
|
||||
|
||||
@@ -203,6 +203,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
// llama-server's /infill endpoint for code infilling
|
||||
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
|
||||
|
||||
// llama-server's /completion endpoint
|
||||
pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
|
||||
|
||||
// Support audio/speech endpoint
|
||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
||||
|
||||
@@ -42,7 +42,6 @@ func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
|
||||
assert.Contains(t, w.Body.String(), modelName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyManager_SwapMultiProcess(t *testing.T) {
|
||||
config := AddDefaultGroupToConfig(Config{
|
||||
HealthCheckTimeout: 15,
|
||||
@@ -834,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||
assert.Equal(t, "OK", rec.Body.String())
|
||||
}
|
||||
|
||||
// Ensure the custom llama-server /completion endpoint proxies correctly
|
||||
func TestProxyManager_CompletionEndpoint(t *testing.T) {
|
||||
config := AddDefaultGroupToConfig(Config{
|
||||
HealthCheckTimeout: 15,
|
||||
Models: map[string]ModelConfig{
|
||||
"model1": getTestSimpleResponderConfig("model1"),
|
||||
},
|
||||
LogLevel: "error",
|
||||
})
|
||||
|
||||
proxy := New(config)
|
||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||
|
||||
reqBody := `{"model":"model1"}`
|
||||
req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "model1")
|
||||
}
|
||||
|
||||
func TestProxyManager_StartupHooks(t *testing.T) {
|
||||
|
||||
// using real YAML as the configuration has gotten more complex
|
||||
|
||||
@@ -28,6 +28,7 @@ interface Metrics {
|
||||
id: number;
|
||||
timestamp: string;
|
||||
model: string;
|
||||
cache_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
prompt_per_second: number;
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import { useMemo } from "react";
|
||||
import { useAPI } from "../contexts/APIProvider";
|
||||
|
||||
const formatTimestamp = (timestamp: string): string => {
|
||||
return new Date(timestamp).toLocaleString();
|
||||
};
|
||||
|
||||
const formatSpeed = (speed: number): string => {
|
||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||
};
|
||||
@@ -13,6 +9,33 @@ const formatDuration = (ms: number): string => {
|
||||
return (ms / 1000).toFixed(2) + "s";
|
||||
};
|
||||
|
||||
const formatRelativeTime = (timestamp: string): string => {
|
||||
const now = new Date();
|
||||
const date = new Date(timestamp);
|
||||
const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000);
|
||||
|
||||
// Handle future dates by returning "just now"
|
||||
if (diffInSeconds < 5) {
|
||||
return "now";
|
||||
}
|
||||
|
||||
if (diffInSeconds < 60) {
|
||||
return `${diffInSeconds}s ago`;
|
||||
}
|
||||
|
||||
const diffInMinutes = Math.floor(diffInSeconds / 60);
|
||||
if (diffInMinutes < 60) {
|
||||
return `${diffInMinutes}m ago`;
|
||||
}
|
||||
|
||||
const diffInHours = Math.floor(diffInMinutes / 60);
|
||||
if (diffInHours < 24) {
|
||||
return `${diffInHours}h ago`;
|
||||
}
|
||||
|
||||
return "a while ago";
|
||||
};
|
||||
|
||||
const ActivityPage = () => {
|
||||
const { metrics } = useAPI();
|
||||
const sortedMetrics = useMemo(() => {
|
||||
@@ -32,11 +55,16 @@ const ActivityPage = () => {
|
||||
<table className="min-w-full divide-y">
|
||||
<thead>
|
||||
<tr>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium uppercase tracking-wider">Id</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Timestamp</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium uppercase tracking-wider">ID</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Time</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">
|
||||
Cached <Tooltip content="prompt tokens from cache" />
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">
|
||||
Prompt <Tooltip content="new prompt tokens processed" />
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generated</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Prompt Processing</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
||||
@@ -46,8 +74,11 @@ const ActivityPage = () => {
|
||||
{sortedMetrics.map((metric) => (
|
||||
<tr key={`metric_${metric.id}`}>
|
||||
<td className="px-4 py-4 whitespace-nowrap text-sm">{metric.id + 1 /* un-zero index */}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatTimestamp(metric.timestamp)}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatRelativeTime(metric.timestamp)}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">
|
||||
{metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"}
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.prompt_per_second)}</td>
|
||||
@@ -63,4 +94,28 @@ const ActivityPage = () => {
|
||||
);
|
||||
};
|
||||
|
||||
interface TooltipProps {
|
||||
content: string;
|
||||
}
|
||||
|
||||
const Tooltip: React.FC<TooltipProps> = ({ content }) => {
|
||||
return (
|
||||
<div className="relative group inline-block">
|
||||
ⓘ
|
||||
<div
|
||||
className="absolute top-full left-1/2 transform -translate-x-1/2 mt-2
|
||||
px-3 py-2 bg-gray-900 text-white text-sm rounded-md
|
||||
opacity-0 group-hover:opacity-100 transition-opacity
|
||||
duration-200 pointer-events-none whitespace-nowrap z-50 normal-case"
|
||||
>
|
||||
{content}
|
||||
<div
|
||||
className="absolute bottom-full left-1/2 transform -translate-x-1/2
|
||||
border-4 border-transparent border-b-gray-900"
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ActivityPage;
|
||||
|
||||
Reference in New Issue
Block a user