Compare commits

..

5 Commits

Author SHA1 Message Date
Benson Wong c867a6c9a2 Add name and description to v1/models list (#179)
* Add support for name and description in v1/models list
* add configuration example for name and description
2025-06-30 23:02:44 -07:00
Leoyzen 3bd1b23ce0 fix config hot-reload on k8s (#181)
Co-authored-by: Leoyzen <leoyzen@gmial.com>
2025-06-27 11:49:31 -07:00
srevn 10606abf89 fix config hot-reload on macos (#180)
Co-authored-by: srevn <srevn@github>
2025-06-26 09:20:50 -07:00
Benson Wong fefd14903d improve log display and add a small stats table in ui (#178) 2025-06-25 12:27:49 -07:00
Benson Wong 717d64e336 update GUI image in README [skip ci] 2025-06-24 10:38:28 -07:00
11 changed files with 153 additions and 56 deletions
+1 -3
View File
@@ -72,9 +72,7 @@ See the [configuration documentation](https://github.com/mostlygeek/llama-swap/w
llama-swap ships with a web based interface to make it easier to monitor logs and check the status of models. llama-swap ships with a web based interface to make it easier to monitor logs and check the status of models.
<img width="1854" alt="image" src="https://github.com/user-attachments/assets/ee0025f0-f031-4158-9b5d-cd98b2b9fe4d" /> <img width="1758" alt="image" src="https://github.com/user-attachments/assets/31ae5bcd-5efd-46b0-b64b-6db9e60196d3" />
## Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap)) ## Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
+13 -1
View File
@@ -49,7 +49,19 @@ models:
cmd: | cmd: |
# ${latest-llama} is a macro that is defined above # ${latest-llama} is a macro that is defined above
${latest-llama} ${latest-llama}
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf --model path/to/llama-8B-Q4_K_M.gguf
# name: a display name for the model
# - optional, default: empty string
# - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record
name: "llama 3.1 8B"
# description: a description for the model
# - optional, default: empty string
# - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record
description: "A small but capable model used for quick testing"
# env: define an array of environment variables to inject into cmd's environment # env: define an array of environment variables to inject into cmd's environment
# - optional, default: empty array # - optional, default: empty array
+9 -2
View File
@@ -144,8 +144,8 @@ func watchConfigFileWithReload(configPath string, reloadChan chan<- *proxy.Proxy
if !ok { if !ok {
return return
} }
// We only care about writes to the specific config file // We only care about writes/creates to the specific config file
if event.Name == configPath && event.Has(fsnotify.Write) { if event.Name == configPath && (event.Has(fsnotify.Write) || event.Has(fsnotify.Create) || event.Has(fsnotify.Remove)) {
// Reset or start the debounce timer // Reset or start the debounce timer
if debounceTimer != nil { if debounceTimer != nil {
debounceTimer.Stop() debounceTimer.Stop()
@@ -176,6 +176,13 @@ func watchConfigFileWithReload(configPath string, reloadChan chan<- *proxy.Proxy
newPM := proxy.New(newConfig) newPM := proxy.New(newConfig)
reloadChan <- newPM reloadChan <- newPM
log.Println("Config reloaded successfully") log.Println("Config reloaded successfully")
if (event.Has(fsnotify.Remove)) {
// re-add watcher
err = watcher.Add(configPath)
if err != nil {
log.Printf("Could not re-add watcher for %s: %s", configPath, err)
}
}
}) })
} }
case err, ok := <-watcher.Errors: case err, ok := <-watcher.Errors:
+6
View File
@@ -28,6 +28,10 @@ type ModelConfig struct {
Unlisted bool `yaml:"unlisted"` Unlisted bool `yaml:"unlisted"`
UseModelName string `yaml:"useModelName"` UseModelName string `yaml:"useModelName"`
// #179 for /v1/models
Name string `yaml:"name"`
Description string `yaml:"description"`
// Limit concurrency of HTTP requests to process // Limit concurrency of HTTP requests to process
ConcurrencyLimit int `yaml:"concurrencyLimit"` ConcurrencyLimit int `yaml:"concurrencyLimit"`
@@ -48,6 +52,8 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
Unlisted: false, Unlisted: false,
UseModelName: "", UseModelName: "",
ConcurrencyLimit: 0, ConcurrencyLimit: 0,
Name: "",
Description: "",
} }
// the default cmdStop to taskkill /f /t /pid ${PID} // the default cmdStop to taskkill /f /t /pid ${PID}
+4
View File
@@ -104,6 +104,8 @@ models:
model1: model1:
cmd: path/to/cmd --arg1 one cmd: path/to/cmd --arg1 one
proxy: "http://localhost:8080" proxy: "http://localhost:8080"
name: "Model 1"
description: "This is model 1"
aliases: aliases:
- "m1" - "m1"
- "model-one" - "model-one"
@@ -168,6 +170,8 @@ groups:
Aliases: []string{"m1", "model-one"}, Aliases: []string{"m1", "model-one"},
Env: []string{"VAR1=value1", "VAR2=value2"}, Env: []string{"VAR1=value1", "VAR2=value2"},
CheckEndpoint: "/health", CheckEndpoint: "/health",
Name: "Model 1",
Description: "This is model 1",
}, },
"model2": { "model2": {
Cmd: "path/to/server --arg1 one", Cmd: "path/to/server --arg1 one",
+22 -14
View File
@@ -2,7 +2,6 @@ package proxy
import ( import (
"bytes" "bytes"
"encoding/json"
"fmt" "fmt"
"io" "io"
"mime/multipart" "mime/multipart"
@@ -289,32 +288,41 @@ func (pm *ProxyManager) swapProcessGroup(requestedModel string) (*ProcessGroup,
} }
func (pm *ProxyManager) listModelsHandler(c *gin.Context) { func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
data := []interface{}{} data := make([]gin.H, 0, len(pm.config.Models))
createdTime := time.Now().Unix()
for id, modelConfig := range pm.config.Models { for id, modelConfig := range pm.config.Models {
if modelConfig.Unlisted { if modelConfig.Unlisted {
continue continue
} }
data = append(data, map[string]interface{}{ record := gin.H{
"id": id, "id": id,
"object": "model", "object": "model",
"created": time.Now().Unix(), "created": createdTime,
"owned_by": "llama-swap", "owned_by": "llama-swap",
}) }
if name := strings.TrimSpace(modelConfig.Name); name != "" {
record["name"] = name
}
if desc := strings.TrimSpace(modelConfig.Description); desc != "" {
record["description"] = desc
}
data = append(data, record)
} }
// Set the Content-Type header to application/json // Set CORS headers if origin exists
c.Header("Content-Type", "application/json") if origin := c.GetHeader("Origin"); origin != "" {
if origin := c.Request.Header.Get("Origin"); origin != "" {
c.Header("Access-Control-Allow-Origin", origin) c.Header("Access-Control-Allow-Origin", origin)
} }
// Encode the data as JSON and write it to the response writer // Use gin's JSON method which handles content-type and encoding
if err := json.NewEncoder(c.Writer).Encode(map[string]interface{}{"object": "list", "data": data}); err != nil { c.JSON(http.StatusOK, gin.H{
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error encoding JSON %s", err.Error())) "object": "list",
return "data": data,
} })
} }
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) { func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
+28 -2
View File
@@ -183,11 +183,20 @@ func TestProxyManager_SwapMultiProcessParallelRequests(t *testing.T) {
} }
func TestProxyManager_ListModelsHandler(t *testing.T) { func TestProxyManager_ListModelsHandler(t *testing.T) {
model1Config := getTestSimpleResponderConfig("model1")
model1Config.Name = "Model 1"
model1Config.Description = "Model 1 description is used for testing"
model2Config := getTestSimpleResponderConfig("model2")
model2Config.Name = " " // empty whitespace only strings will get ignored
model2Config.Description = " "
config := Config{ config := Config{
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
Models: map[string]ModelConfig{ Models: map[string]ModelConfig{
"model1": getTestSimpleResponderConfig("model1"), "model1": model1Config,
"model2": getTestSimpleResponderConfig("model2"), "model2": model2Config,
"model3": getTestSimpleResponderConfig("model3"), "model3": getTestSimpleResponderConfig("model3"),
}, },
LogLevel: "error", LogLevel: "error",
@@ -213,6 +222,7 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
var response struct { var response struct {
Data []map[string]interface{} `json:"data"` Data []map[string]interface{} `json:"data"`
} }
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil { if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to parse JSON response: %v", err) t.Fatalf("Failed to parse JSON response: %v", err)
} }
@@ -227,6 +237,7 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
"model3": {}, "model3": {},
} }
// make all models
for _, model := range response.Data { for _, model := range response.Data {
modelID, ok := model["id"].(string) modelID, ok := model["id"].(string)
assert.True(t, ok, "model ID should be a string") assert.True(t, ok, "model ID should be a string")
@@ -245,6 +256,21 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
ownedBy, ok := model["owned_by"].(string) ownedBy, ok := model["owned_by"].(string)
assert.True(t, ok, "owned_by should be a string") assert.True(t, ok, "owned_by should be a string")
assert.Equal(t, "llama-swap", ownedBy) assert.Equal(t, "llama-swap", ownedBy)
// check for optional name and description
if modelID == "model1" {
name, ok := model["name"].(string)
assert.True(t, ok, "name should be a string")
assert.Equal(t, "Model 1", name)
description, ok := model["description"].(string)
assert.True(t, ok, "description should be a string")
assert.Equal(t, "Model 1 description is used for testing", description)
} else {
_, exists := model["name"]
assert.False(t, exists, "unexpected name field for model: %s", modelID)
_, exists = model["description"]
assert.False(t, exists, "unexpected description field for model: %s", modelID)
}
} }
// Ensure all expected models were returned // Ensure all expected models were returned
+4 -4
View File
@@ -10,10 +10,10 @@ function App() {
<Router basename="/ui/"> <Router basename="/ui/">
<APIProvider> <APIProvider>
<div> <div>
<nav className="bg-surface border-b border-border p-4"> <nav className="bg-surface border-b border-border p-2 h-[75px]">
<div className="flex items-center justify-between mx-auto px-4"> <div className="flex items-center justify-between mx-auto px-4 h-full">
<h1>llama-swap</h1> <h1 className="flex items-center p-0">llama-swap</h1>
<div className="flex space-x-4"> <div className="flex items-center space-x-4">
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Logs Logs
</NavLink> </NavLink>
+18
View File
@@ -0,0 +1,18 @@
export function processEvalTimes(text: string) {
const lines = text.match(/^ *eval time.*$/gm) || [];
let totalTokens = 0;
let totalTime = 0;
lines.forEach((line) => {
const tokensMatch = line.match(/\/\s*(\d+)\s*tokens/);
const timeMatch = line.match(/=\s*(\d+\.\d+)\s*ms/);
if (tokensMatch) totalTokens += parseFloat(tokensMatch[1]);
if (timeMatch) totalTime += parseFloat(timeMatch[1]);
});
const avgTokensPerSecond = totalTime > 0 ? totalTokens / (totalTime / 1000) : 0;
return [lines.length, totalTokens, Math.round(avgTokensPerSecond * 100) / 100];
}
+11 -25
View File
@@ -15,7 +15,7 @@ const LogViewer = () => {
}, []); }, []);
return ( return (
<div className="flex flex-col gap-5"> <div className="flex flex-col gap-5" style={{ height: "calc(100vh - 125px)" }}>
<LogPanel id="proxy" title="Proxy Logs" logData={proxyLogs} /> <LogPanel id="proxy" title="Proxy Logs" logData={proxyLogs} />
<LogPanel id="upstream" title="Upstream Logs" logData={upstreamLogs} /> <LogPanel id="upstream" title="Upstream Logs" logData={upstreamLogs} />
</div> </div>
@@ -30,11 +30,8 @@ interface LogPanelProps {
} }
export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => { export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
const [isCollapsed, setIsCollapsed] = usePersistentState(`logPanel-${id}-isCollapsed`, false);
const [filterRegex, setFilterRegex] = useState(""); const [filterRegex, setFilterRegex] = useState("");
const [panelState, setPanelState] = usePersistentState<"hide" | "small" | "max">(
`logPanel-${id}-panelState`,
"small"
);
const [fontSize, setFontSize] = usePersistentState<"xxs" | "xs" | "small" | "normal">( const [fontSize, setFontSize] = usePersistentState<"xxs" | "xs" | "small" | "normal">(
`logPanel-${id}-fontSize`, `logPanel-${id}-fontSize`,
"normal" "normal"
@@ -60,14 +57,6 @@ export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
}); });
}, []); }, []);
const togglePanelState = useCallback(() => {
setPanelState((prev) => {
if (prev === "small") return "max";
if (prev === "hide") return "small";
return "hide";
});
}, []);
const fontSizeClass = useMemo(() => { const fontSizeClass = useMemo(() => {
switch (fontSize) { switch (fontSize) {
case "xxs": case "xxs":
@@ -101,20 +90,21 @@ export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
}, [filteredLogs]); }, [filteredLogs]);
return ( return (
<div className={`bg-surface border border-border rounded-lg overflow-hidden flex flex-col ${className || ""}`}> <div
className={`bg-surface border border-border rounded-lg overflow-hidden flex flex-col ${
!isCollapsed && "h-full"
} ${className || ""}`}
>
<div className="p-4 border-b border-border bg-secondary"> <div className="p-4 border-b border-border bg-secondary">
<div className="flex flex-col md:flex-row md:items-center md:justify-between gap-4"> <div className="flex flex-col md:flex-row md:items-center md:justify-between gap-4">
{/* Title - Always full width on mobile, normal on desktop */} {/* Title - Always full width on mobile, normal on desktop */}
<div className="w-full md:w-auto" onClick={togglePanelState}> <div className="w-full md:w-auto" onClick={() => setIsCollapsed(!isCollapsed)}>
<h3 className="m-0 text-lg">{title}</h3> <h3 className="m-0 text-lg">{title}</h3>
</div> </div>
<div className="flex flex-col sm:flex-row gap-4 w-full md:w-auto"> <div className="flex flex-col sm:flex-row gap-4 w-full md:w-auto">
{/* Sizing Buttons - Stacks vertically on mobile */} {/* Sizing Buttons - Stacks vertically on mobile */}
<div className="flex flex-wrap gap-2"> <div className="flex flex-wrap gap-2">
<button className="btn" onClick={togglePanelState}>
size: {panelState}
</button>
<button className="btn" onClick={toggleFontSize}> <button className="btn" onClick={toggleFontSize}>
font: {fontSize} font: {fontSize}
</button> </button>
@@ -140,14 +130,11 @@ export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
</div> </div>
</div> </div>
{panelState !== "hide" && ( {!isCollapsed && (
<div className="flex-1 bg-background font-mono text-sm leading-[1.4] p-3"> <div className="flex-1 bg-background font-mono text-sm p-3 overflow-hidden">
<pre <pre
ref={preTagRef} ref={preTagRef}
className={`flex-1 p-4 overflow-y-auto whitespace-pre min-h-0 ${textWrapClass} ${fontSizeClass}`} className={`h-full p-4 overflow-y-auto whitespace-pre min-h-0 ${textWrapClass} ${fontSizeClass}`}
style={{
maxHeight: panelState === "max" ? "1500px" : "500px",
}}
> >
{filteredLogs} {filteredLogs}
</pre> </pre>
@@ -156,5 +143,4 @@ export const LogPanel = ({ id, title, logData, className }: LogPanelProps) => {
</div> </div>
); );
}; };
export default LogViewer; export default LogViewer;
+37 -5
View File
@@ -1,6 +1,7 @@
import { useState, useEffect, useCallback } from "react"; import { useState, useEffect, useCallback, useMemo } from "react";
import { useAPI } from "../contexts/APIProvider"; import { useAPI } from "../contexts/APIProvider";
import { LogPanel } from "./LogViewer"; import { LogPanel } from "./LogViewer";
import { processEvalTimes } from "../lib/Utils";
export default function ModelsPage() { export default function ModelsPage() {
const { models, enableModelUpdates, unloadAllModels, loadModel, upstreamLogs, enableUpstreamLogs } = useAPI(); const { models, enableModelUpdates, unloadAllModels, loadModel, upstreamLogs, enableUpstreamLogs } = useAPI();
@@ -29,8 +30,12 @@ export default function ModelsPage() {
} }
}, []); }, []);
const [totalLines, totalTokens, avgTokensPerSecond] = useMemo(() => {
return processEvalTimes(upstreamLogs);
}, [upstreamLogs]);
return ( return (
<div className="h-screen"> <div>
<div className="flex flex-col md:flex-row gap-4"> <div className="flex flex-col md:flex-row gap-4">
{/* Left Column */} {/* Left Column */}
<div className="w-full md:w-1/2 flex items-top"> <div className="w-full md:w-1/2 flex items-top">
@@ -56,7 +61,13 @@ export default function ModelsPage() {
</a> </a>
</td> </td>
<td className="p-2"> <td className="p-2">
<button className="btn btn--sm" disabled={model.state !== "stopped"} onClick={() => loadModel(model.id)}>Load</button> <button
className="btn btn--sm"
disabled={model.state !== "stopped"}
onClick={() => loadModel(model.id)}
>
Load
</button>
</td> </td>
<td className="p-2"> <td className="p-2">
<span className={`status status--${model.state}`}>{model.state}</span> <span className={`status status--${model.state}`}>{model.state}</span>
@@ -69,8 +80,29 @@ export default function ModelsPage() {
</div> </div>
{/* Right Column */} {/* Right Column */}
<div className="w-full md:w-1/2 flex items-top"> <div className="w-full md:w-1/2 flex flex-col" style={{ height: "calc(100vh - 125px)" }}>
<LogPanel id="modelsupstream" title="Upstream Logs" logData={upstreamLogs} className="h-full" /> <div className="card mb-4 min-h-[250px]">
<h2>Log Stats</h2>
<p className="italic my-2">note: eval logs from llama-server</p>
<table className="w-full border border-gray-200">
<tbody>
<tr className="border-b border-gray-200">
<td className="py-2 px-4 font-medium border-r border-gray-200">Requests</td>
<td className="py-2 px-4 text-right">{totalLines}</td>
</tr>
<tr className="border-b border-gray-200">
<td className="py-2 px-4 font-medium border-r border-gray-200">Total Tokens Generated</td>
<td className="py-2 px-4 text-right">{totalTokens}</td>
</tr>
<tr>
<td className="py-2 px-4 font-medium border-r border-gray-200">Average Tokens/Second</td>
<td className="py-2 px-4 text-right">{avgTokensPerSecond}</td>
</tr>
</tbody>
</table>
</div>
<LogPanel id="modelsupstream" title="Upstream Logs" logData={upstreamLogs} />
</div> </div>
</div> </div>
</div> </div>