Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 574fdfabb4 | |||
| 5172cb2e12 | |||
| 5672cb03fd | |||
| 0f583163f7 | |||
| 7905fa9ea3 | |||
| bbaf172956 | |||
| fd50932dbc | |||
| 8c693e7fcf | |||
| 8f2af26a41 | |||
| 01d4838fb3 | |||
| accd65294b | |||
| 7472a25864 |
@@ -7,6 +7,10 @@ on:
|
||||
|
||||
# Allows manual triggering of the workflow
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Tag version to release (e.g. v144)'
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -20,15 +24,15 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.inputs.tag || github.ref }}
|
||||
-
|
||||
name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
|
||||
-
|
||||
name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '23' # or your preferred version
|
||||
node-version: '23'
|
||||
-
|
||||
name: Install dependencies and build UI
|
||||
run: |
|
||||
@@ -46,4 +50,30 @@ jobs:
|
||||
version: '~> v2'
|
||||
args: release --clean
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
trigger-tap-update:
|
||||
runs-on: ubuntu-latest
|
||||
needs: goreleaser
|
||||
steps:
|
||||
- name: "Resolve tag to dispatch"
|
||||
id: tag
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "tag=${{ github.event.inputs.tag }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=${{ github.ref_name }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: "Trigger tap repository update"
|
||||
uses: peter-evans/repository-dispatch@v2
|
||||
with:
|
||||
token: ${{ secrets.TAP_REPO_PAT }}
|
||||
repository: mostlygeek/homebrew-llama-swap
|
||||
event-type: new-release
|
||||
client-payload: |
|
||||
{
|
||||
"release": {
|
||||
"tag_name": "${{ steps.tag.outputs.tag }}"
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
||||
- `v1/completions`
|
||||
- `v1/chat/completions`
|
||||
- `v1/embeddings`
|
||||
- `v1/rerank`
|
||||
- `v1/rerank`, `v1/reranking`, `rerank`
|
||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||
- ✅ llama-swap custom API endpoints
|
||||
@@ -27,6 +27,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
||||
- `/upstream/:model_id` - direct access to upstream HTTP server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||
- `/unload` - manually unload running models ([#58](https://github.com/mostlygeek/llama-swap/issues/58))
|
||||
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
||||
- `/health` - just returns "OK"
|
||||
- ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
||||
- ✅ Automatic unloading of models after timeout by setting a `ttl`
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
||||
@@ -70,13 +71,22 @@ See the [configuration documentation](https://github.com/mostlygeek/llama-swap/w
|
||||
|
||||
## Web UI
|
||||
|
||||
llama-swap ships with a web based interface to make it easier to monitor logs and check the status of models.
|
||||
llama-swap ships with a real time web interface to monitor logs and status of models:
|
||||
|
||||
<img width="1758" alt="image" src="https://github.com/user-attachments/assets/31ae5bcd-5efd-46b0-b64b-6db9e60196d3" />
|
||||
<img width="1786" height="1334" alt="image" src="https://github.com/user-attachments/assets/d6258cb9-1dad-40db-828f-2be860aec8fe" />
|
||||
|
||||
## Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||
## Installation
|
||||
|
||||
Docker is the quickest way to try out llama-swap:
|
||||
llama-swap can be installed in multiple ways
|
||||
|
||||
1. Docker
|
||||
2. Homebrew (OSX and Linux)
|
||||
3. From release binaries
|
||||
4. From source
|
||||
|
||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||
|
||||
Docker images with llama-swap and llama-server are built nightly.
|
||||
|
||||
```shell
|
||||
# use CPU inference comes with the example config above
|
||||
@@ -98,7 +108,7 @@ $ curl -s http://localhost:9292/v1/chat/completions \
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Docker images are built nightly for cuda, intel, vulcan, etc ...</summary>
|
||||
<summary>Docker images are built nightly with llama-server for cuda, intel, vulcan and musa.</summary>
|
||||
|
||||
They include:
|
||||
|
||||
@@ -121,9 +131,23 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
||||
|
||||
</details>
|
||||
|
||||
## Bare metal Install ([download](https://github.com/mostlygeek/llama-swap/releases))
|
||||
### Homebrew Install (macOS/Linux)
|
||||
|
||||
Pre-built binaries are available for Linux, Mac, Windows and FreeBSD. These are automatically published and are likely a few hours ahead of the docker releases. The baremetal install works with any OpenAI compatible server, not just llama-server.
|
||||
The latest release of `llama-swap` can be installed via [Homebrew](https://brew.sh).
|
||||
|
||||
```shell
|
||||
# Set up tap and install formula
|
||||
brew tap mostlygeek/llama-swap
|
||||
brew install llama-swap
|
||||
# Run llama-swap
|
||||
llama-swap --config path/to/config.yaml --listen localhost:8080
|
||||
```
|
||||
|
||||
This will install the `llama-swap` binary and make it available in your path. See the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration)
|
||||
|
||||
### Pre-built Binaries ([download](https://github.com/mostlygeek/llama-swap/releases))
|
||||
|
||||
Binaries are available for Linux, Mac, Windows and FreeBSD. These are automatically published and are likely a few hours ahead of the docker releases. The binary install works with any OpenAI compatible server, not just llama-server.
|
||||
|
||||
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
|
||||
1. Create a configuration file, see the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration).
|
||||
@@ -173,6 +197,13 @@ Any OpenAI compatible server would work. llama-swap was originally designed for
|
||||
|
||||
For Python based inference servers like vllm or tabbyAPI it is recommended to run them via podman or docker. This provides clean environment isolation as well as responding correctly to `SIGTERM` signals to shutdown.
|
||||
|
||||
## Contributors
|
||||
<a href="https://github.com/mostlygeek/llama-swap/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=mostlygeek/llama-swap" />
|
||||
</a>
|
||||
|
||||
Made with [contrib.rocks](https://contrib.rocks).
|
||||
|
||||
## Star History
|
||||
|
||||
[](https://www.star-history.com/#mostlygeek/llama-swap&Date)
|
||||
|
||||
@@ -78,6 +78,14 @@ func main() {
|
||||
"prompt_tokens": 25,
|
||||
"total_tokens": 35,
|
||||
},
|
||||
// add timings to simulate llama.cpp
|
||||
"timings": gin.H{
|
||||
"prompt_n": 25,
|
||||
"prompt_ms": 13,
|
||||
"predicted_n": 10,
|
||||
"predicted_ms": 17,
|
||||
"predicted_per_second": 10,
|
||||
},
|
||||
}
|
||||
c.SSEvent("message", finalData)
|
||||
c.Writer.Flush()
|
||||
@@ -102,6 +110,13 @@ func main() {
|
||||
"prompt_tokens": 25,
|
||||
"total_tokens": 35,
|
||||
},
|
||||
"timings": gin.H{
|
||||
"prompt_n": 25,
|
||||
"prompt_ms": 13,
|
||||
"predicted_n": 10,
|
||||
"predicted_ms": 17,
|
||||
"predicted_per_second": 10,
|
||||
},
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
+48
-31
@@ -17,6 +17,7 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||
bodyBytes, err := io.ReadAll(c.Request.Body)
|
||||
if err != nil {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||
@@ -24,15 +25,16 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
||||
if requestedModel == "" {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||
if !found {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
c.Set("ls-real-model-name", realModelName)
|
||||
|
||||
writer := &MetricsResponseWriter{
|
||||
ResponseWriter: c.Writer,
|
||||
@@ -67,51 +69,66 @@ func (rec *MetricsRecorder) processBody(body []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) {
|
||||
if !jsonData.Get("usage").Exists() {
|
||||
return
|
||||
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
||||
usage := jsonData.Get("usage")
|
||||
if !usage.Exists() {
|
||||
return false
|
||||
}
|
||||
|
||||
// default values
|
||||
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
||||
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
||||
tokensPerSecond := -1.0
|
||||
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
||||
|
||||
if outputTokens > 0 {
|
||||
duration := time.Since(rec.startTime)
|
||||
tokensPerSecond := float64(inputTokens+outputTokens) / duration.Seconds()
|
||||
|
||||
metrics := TokenMetrics{
|
||||
Timestamp: time.Now(),
|
||||
Model: rec.realModelName,
|
||||
InputTokens: inputTokens,
|
||||
OutputTokens: outputTokens,
|
||||
TokensPerSecond: tokensPerSecond,
|
||||
DurationMs: int(duration.Milliseconds()),
|
||||
}
|
||||
rec.metricsMonitor.addMetrics(metrics)
|
||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
||||
if timings := jsonData.Get("timings"); timings.Exists() {
|
||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||
}
|
||||
|
||||
rec.metricsMonitor.addMetrics(TokenMetrics{
|
||||
Timestamp: time.Now(),
|
||||
Model: rec.realModelName,
|
||||
InputTokens: inputTokens,
|
||||
OutputTokens: outputTokens,
|
||||
TokensPerSecond: tokensPerSecond,
|
||||
DurationMs: durationMs,
|
||||
})
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (rec *MetricsRecorder) processStreamingResponse(body []byte) {
|
||||
// Iterate **backwards** through the lines looking for the data payload with
|
||||
// usage data
|
||||
lines := bytes.Split(body, []byte("\n"))
|
||||
for _, line := range lines {
|
||||
line = bytes.TrimSpace(line)
|
||||
|
||||
for i := len(lines) - 1; i >= 0; i-- {
|
||||
line := bytes.TrimSpace(lines[i])
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for SSE data prefix
|
||||
if data, found := bytes.CutPrefix(line, []byte("data:")); found {
|
||||
data = bytes.TrimSpace(data)
|
||||
if len(data) == 0 {
|
||||
continue
|
||||
}
|
||||
if bytes.Equal(data, []byte("[DONE]")) {
|
||||
break
|
||||
}
|
||||
// SSE payload always follows "data:"
|
||||
prefix := []byte("data:")
|
||||
if !bytes.HasPrefix(line, prefix) {
|
||||
continue
|
||||
}
|
||||
data := bytes.TrimSpace(line[len(prefix):])
|
||||
|
||||
// Parse JSON to look for usage data
|
||||
if gjson.ValidBytes(data) {
|
||||
rec.parseAndRecordMetrics(gjson.ParseBytes(data))
|
||||
if len(data) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if bytes.Equal(data, []byte("[DONE]")) {
|
||||
// [DONE] line itself contains nothing of interest.
|
||||
continue
|
||||
}
|
||||
|
||||
if gjson.ValidBytes(data) {
|
||||
if rec.parseAndRecordMetrics(gjson.ParseBytes(data)) {
|
||||
return // short circuit if a metric was recorded
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+17
-5
@@ -14,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
@@ -160,8 +161,10 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
||||
|
||||
// Support embeddings
|
||||
pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
|
||||
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
||||
|
||||
// Support audio/speech endpoint
|
||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||
@@ -188,6 +191,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
|
||||
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
|
||||
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
|
||||
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
||||
c.String(http.StatusOK, "OK")
|
||||
})
|
||||
|
||||
pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
|
||||
if data, err := reactStaticFS.ReadFile("ui_dist/favicon.ico"); err == nil {
|
||||
@@ -365,9 +371,15 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
realModelName := c.GetString("ls-real-model-name") // Should be set in MetricsMiddleware
|
||||
if realModelName == "" {
|
||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "ls-real-model-name not set")
|
||||
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
||||
if requestedModel == "" {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
||||
return
|
||||
}
|
||||
|
||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||
if !found {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -708,7 +708,9 @@ func TestProxyManager_MiddlewareWritesMetrics_NonStreaming(t *testing.T) {
|
||||
|
||||
// Check that metrics were recorded
|
||||
metrics := proxy.metricsMonitor.GetMetrics()
|
||||
assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request")
|
||||
if !assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request") {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the last metric has the correct model
|
||||
lastMetric := metrics[len(metrics)-1]
|
||||
@@ -741,7 +743,9 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
|
||||
|
||||
// Check that metrics were recorded
|
||||
metrics := proxy.metricsMonitor.GetMetrics()
|
||||
assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request")
|
||||
if !assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request") {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the last metric has the correct model
|
||||
lastMetric := metrics[len(metrics)-1]
|
||||
@@ -751,3 +755,21 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
|
||||
assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
|
||||
assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
|
||||
}
|
||||
|
||||
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||
config := AddDefaultGroupToConfig(Config{
|
||||
HealthCheckTimeout: 15,
|
||||
Models: map[string]ModelConfig{
|
||||
"model1": getTestSimpleResponderConfig("model1"),
|
||||
},
|
||||
LogLevel: "error",
|
||||
})
|
||||
|
||||
proxy := New(config)
|
||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||
req := httptest.NewRequest("GET", "/health", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
proxy.ServeHTTP(rec, req)
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
assert.Equal(t, "OK", rec.Body.String())
|
||||
}
|
||||
|
||||
+13
-13
@@ -1,6 +1,18 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { useAPI } from "../contexts/APIProvider";
|
||||
|
||||
const formatTimestamp = (timestamp: string): string => {
|
||||
return new Date(timestamp).toLocaleString();
|
||||
};
|
||||
|
||||
const formatSpeed = (speed: number): string => {
|
||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||
};
|
||||
|
||||
const formatDuration = (ms: number): string => {
|
||||
return (ms / 1000).toFixed(2) + "s";
|
||||
};
|
||||
|
||||
const ActivityPage = () => {
|
||||
const { metrics } = useAPI();
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
@@ -11,18 +23,6 @@ const ActivityPage = () => {
|
||||
}
|
||||
}, [metrics]);
|
||||
|
||||
const formatTimestamp = (timestamp: string) => {
|
||||
return new Date(timestamp).toLocaleString();
|
||||
};
|
||||
|
||||
const formatSpeed = (speed: number) => {
|
||||
return speed.toFixed(2) + " t/s";
|
||||
};
|
||||
|
||||
const formatDuration = (ms: number) => {
|
||||
return (ms / 1000).toFixed(2) + "s";
|
||||
};
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="p-6">
|
||||
@@ -51,7 +51,7 @@ const ActivityPage = () => {
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Processing Speed</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
||||
+27
-25
@@ -6,7 +6,7 @@ const LogViewer = () => {
|
||||
const { proxyLogs, upstreamLogs } = useAPI();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-5" style={{ height: "calc(100vh - 125px)" }}>
|
||||
<div className="flex flex-col lg:flex-row gap-5" style={{ height: "calc(100vh - 125px)" }}>
|
||||
<LogPanel id="proxy" title="Proxy Logs" logData={proxyLogs} />
|
||||
<LogPanel id="upstream" title="Upstream Logs" logData={upstreamLogs} />
|
||||
</div>
|
||||
@@ -90,34 +90,36 @@ export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
|
||||
<div className="flex flex-col md:flex-row md:items-center md:justify-between gap-4">
|
||||
{/* Title - Always full width on mobile, normal on desktop */}
|
||||
<div className="w-full md:w-auto" onClick={() => setIsCollapsed(!isCollapsed)}>
|
||||
<h3 className="m-0 text-lg">{title}</h3>
|
||||
<h3 className="m-0 text-lg p-0">{title}</h3>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col sm:flex-row gap-4 w-full md:w-auto">
|
||||
{/* Sizing Buttons - Stacks vertically on mobile */}
|
||||
<div className="flex flex-wrap gap-2">
|
||||
<button className="btn" onClick={toggleFontSize}>
|
||||
font: {fontSize}
|
||||
</button>
|
||||
<button className="btn" onClick={() => setTextWrap((prev) => !prev)}>
|
||||
{wrapText ? "wrap" : "wrap off"}
|
||||
</button>
|
||||
</div>
|
||||
{!isCollapsed && (
|
||||
<div className="flex flex-col sm:flex-row gap-4 w-full md:w-auto">
|
||||
{/* Sizing Buttons - Stacks vertically on mobile */}
|
||||
<div className="flex flex-wrap gap-2">
|
||||
<button className="btn" onClick={toggleFontSize}>
|
||||
font: {fontSize}
|
||||
</button>
|
||||
<button className="btn" onClick={() => setTextWrap((prev) => !prev)}>
|
||||
{wrapText ? "wrap" : "wrap off"}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Filtering Options - Full width on mobile, normal on desktop */}
|
||||
<div className="flex flex-1 min-w-0 gap-2">
|
||||
<input
|
||||
type="text"
|
||||
className="flex-1 min-w-[120px] text-sm border p-2 rounded"
|
||||
placeholder="Filter logs..."
|
||||
value={filterRegex}
|
||||
onChange={(e) => setFilterRegex(e.target.value)}
|
||||
/>
|
||||
<button className="btn" onClick={() => setFilterRegex("")}>
|
||||
Clear
|
||||
</button>
|
||||
{/* Filtering Options - Full width on mobile, normal on desktop */}
|
||||
<div className="flex flex-1 min-w-0 gap-2">
|
||||
<input
|
||||
type="text"
|
||||
className="flex-1 min-w-[120px] text-sm border p-2 rounded"
|
||||
placeholder="Filter logs..."
|
||||
value={filterRegex}
|
||||
onChange={(e) => setFilterRegex(e.target.value)}
|
||||
/>
|
||||
<button className="btn" onClick={() => setFilterRegex("")}>
|
||||
Clear
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -27,10 +27,13 @@ export default function ModelsPage() {
|
||||
}, []);
|
||||
|
||||
const [totalRequests, totalTokens, avgTokensPerSecond] = useMemo(() => {
|
||||
const totalTokens = metrics.reduce((sum, m) => sum + m.input_tokens + m.output_tokens, 0);
|
||||
const totalSeconds = metrics.reduce((sum, m) => sum + m.duration_ms / 1000, 0);
|
||||
const avgTokensPerSecond = totalSeconds > 0 ? totalTokens / totalSeconds : 0;
|
||||
return [metrics.length, totalTokens, avgTokensPerSecond.toFixed(2)];
|
||||
const totalRequests = metrics.length;
|
||||
if (totalRequests === 0) {
|
||||
return [0, 0, 0];
|
||||
}
|
||||
const totalTokens = metrics.reduce((sum, m) => sum + m.output_tokens, 0);
|
||||
const avgTokensPerSecond = (metrics.reduce((sum, m) => sum + m.tokens_per_second, 0) / totalRequests).toFixed(2);
|
||||
return [totalRequests, totalTokens, avgTokensPerSecond];
|
||||
}, [metrics]);
|
||||
|
||||
return (
|
||||
|
||||
Reference in New Issue
Block a user