Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 57803fd3aa | |||
| c55d0cc842 | |||
| 7acbaf4712 | |||
| fcc5ad135a | |||
| 305e5a0031 |
@@ -18,9 +18,11 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
|||||||
- `v1/completions`
|
- `v1/completions`
|
||||||
- `v1/chat/completions`
|
- `v1/chat/completions`
|
||||||
- `v1/embeddings`
|
- `v1/embeddings`
|
||||||
- `v1/rerank`, `v1/reranking`, `rerank`
|
|
||||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||||
|
- ✅ llama-server (llama.cpp) supported endpoints:
|
||||||
|
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||||
|
- `/infill` - for code infilling
|
||||||
- ✅ llama-swap custom API endpoints
|
- ✅ llama-swap custom API endpoints
|
||||||
- `/ui` - web UI
|
- `/ui` - web UI
|
||||||
- `/log` - remote log monitoring
|
- `/log` - remote log monitoring
|
||||||
|
|||||||
+44
-27
@@ -3,14 +3,15 @@
|
|||||||
#
|
#
|
||||||
# 💡 Tip - Use an LLM with this file!
|
# 💡 Tip - Use an LLM with this file!
|
||||||
# ====================================
|
# ====================================
|
||||||
# This example configuration is written to be LLM friendly! Try
|
# This example configuration is written to be LLM friendly. Try
|
||||||
# copying this file into an LLM and asking it to explain or generate
|
# copying this file into an LLM and asking it to explain or generate
|
||||||
# sections for you.
|
# sections for you.
|
||||||
# ====================================
|
# ====================================
|
||||||
#
|
|
||||||
|
# Usage notes:
|
||||||
# - Below are all the available configuration options for llama-swap.
|
# - Below are all the available configuration options for llama-swap.
|
||||||
# - Settings with a default value, or noted as optional can be omitted.
|
# - Settings noted as "required" must be in your configuration file
|
||||||
# - Settings that are marked required must be in your configuration file
|
# - Settings noted as "optional" can be omitted
|
||||||
|
|
||||||
# healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests
|
# healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests
|
||||||
# - optional, default: 120
|
# - optional, default: 120
|
||||||
@@ -34,9 +35,9 @@ metricsMaxInMemory: 1000
|
|||||||
# - it is automatically incremented for every model that uses it
|
# - it is automatically incremented for every model that uses it
|
||||||
startPort: 10001
|
startPort: 10001
|
||||||
|
|
||||||
# macros: sets a dictionary of string:string pairs
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - these are reusable snippets
|
# - macros are reusable snippets
|
||||||
# - used in a model's cmd, cmdStop, proxy and checkEndpoint
|
# - used in a model's cmd, cmdStop, proxy and checkEndpoint
|
||||||
# - useful for reducing common configuration settings
|
# - useful for reducing common configuration settings
|
||||||
macros:
|
macros:
|
||||||
@@ -99,44 +100,55 @@ models:
|
|||||||
|
|
||||||
# checkEndpoint: URL path to check if the server is ready
|
# checkEndpoint: URL path to check if the server is ready
|
||||||
# - optional, default: /health
|
# - optional, default: /health
|
||||||
# - use "none" to skip endpoint ready checking
|
|
||||||
# - endpoint is expected to return an HTTP 200 response
|
# - endpoint is expected to return an HTTP 200 response
|
||||||
# - all requests wait until the endpoint is ready (or fails)
|
# - all requests wait until the endpoint is ready or fails
|
||||||
|
# - use "none" to skip endpoint health checking
|
||||||
checkEndpoint: /custom-endpoint
|
checkEndpoint: /custom-endpoint
|
||||||
|
|
||||||
# ttl: automatically unload the model after this many seconds
|
# ttl: automatically unload the model after ttl seconds
|
||||||
# - optional, default: 0
|
# - optional, default: 0
|
||||||
# - ttl values must be a value greater than 0
|
# - ttl values must be a value greater than 0
|
||||||
# - a value of 0 disables automatic unloading of the model
|
# - a value of 0 disables automatic unloading of the model
|
||||||
ttl: 60
|
ttl: 60
|
||||||
|
|
||||||
# useModelName: overrides the model name that is sent to upstream server
|
# useModelName: override the model name that is sent to upstream server
|
||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful when the upstream server expects a specific model name or format
|
# - useful for when the upstream server expects a specific model name that
|
||||||
|
# is different from the model's ID
|
||||||
useModelName: "qwen:qwq"
|
useModelName: "qwen:qwq"
|
||||||
|
|
||||||
# filters: a dictionary of filter settings
|
# filters: a dictionary of filter settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
|
# - only strip_params is currently supported
|
||||||
filters:
|
filters:
|
||||||
# strip_params: a comma separated list of parameters to remove from the request
|
# strip_params: a comma separated list of parameters to remove from the request
|
||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful for preventing overriding of default server params by requests
|
# - useful for server side enforcement of sampling parameters
|
||||||
# - `model` parameter is never removed
|
# - the `model` parameter can never be removed
|
||||||
# - can be any JSON key in the request body
|
# - can be any JSON key in the request body
|
||||||
# - recommended to stick to sampling parameters
|
# - recommended to stick to sampling parameters
|
||||||
strip_params: "temperature, top_p, top_k"
|
strip_params: "temperature, top_p, top_k"
|
||||||
|
|
||||||
|
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
||||||
|
# - optional, default: 0
|
||||||
|
# - useful for limiting the number of active parallel requests a model can process
|
||||||
|
# - must be set per model
|
||||||
|
# - any number greater than 0 will override the internal default value of 10
|
||||||
|
# - any requests that exceeds the limit will receive an HTTP 429 Too Many Requests response
|
||||||
|
# - recommended to be omitted and the default used
|
||||||
|
concurrencyLimit: 0
|
||||||
|
|
||||||
# Unlisted model example:
|
# Unlisted model example:
|
||||||
"qwen-unlisted":
|
"qwen-unlisted":
|
||||||
# unlisted: true or false
|
# unlisted: boolean, true or false
|
||||||
# - optional, default: false
|
# - optional, default: false
|
||||||
# - unlisted models do not show up in /v1/models or /upstream lists
|
# - unlisted models do not show up in /v1/models api requests
|
||||||
# - can be requested as normal through all apis
|
# - can be requested as normal through all apis
|
||||||
unlisted: true
|
unlisted: true
|
||||||
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||||
|
|
||||||
# Docker example:
|
# Docker example:
|
||||||
# container run times like Docker and Podman can also be used with a
|
# container run times like Docker and Podman can be used reliably with a
|
||||||
# a combination of cmd and cmdStop.
|
# a combination of cmd and cmdStop.
|
||||||
"docker-llama":
|
"docker-llama":
|
||||||
proxy: "http://127.0.0.1:${PORT}"
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
@@ -149,24 +161,26 @@ models:
|
|||||||
# cmdStop: command to run to stop the model gracefully
|
# cmdStop: command to run to stop the model gracefully
|
||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful for stopping commands managed by another system
|
# - useful for stopping commands managed by another system
|
||||||
# - on POSIX systems: a SIGTERM is sent for graceful shutdown
|
|
||||||
# - on Windows, taskkill is used
|
|
||||||
# - processes are given 5 seconds to shutdown until they are forcefully killed
|
|
||||||
# - the upstream's process id is available in the ${PID} macro
|
# - the upstream's process id is available in the ${PID} macro
|
||||||
|
#
|
||||||
|
# When empty, llama-swap has this default behaviour:
|
||||||
|
# - on POSIX systems: a SIGTERM signal is sent
|
||||||
|
# - on Windows, calls taskkill to stop the process
|
||||||
|
# - processes have 5 seconds to shutdown until forceful termination is attempted
|
||||||
cmdStop: docker stop dockertest
|
cmdStop: docker stop dockertest
|
||||||
|
|
||||||
# groups: a dictionary of group settings
|
# groups: a dictionary of group settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - provide advanced controls over model swapping behaviour.
|
# - provides advanced controls over model swapping behaviour
|
||||||
# - Using groups some models can be kept loaded indefinitely, while others are swapped out.
|
# - using groups some models can be kept loaded indefinitely, while others are swapped out
|
||||||
# - model ids must be defined in the Models section
|
# - model IDs must be defined in the Models section
|
||||||
# - a model can only be a member of one group
|
# - a model can only be a member of one group
|
||||||
# - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields
|
# - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields
|
||||||
# - see issue #109 for details
|
# - see issue #109 for details
|
||||||
#
|
#
|
||||||
# NOTE: the example below uses model names that are not defined above for demonstration purposes
|
# NOTE: the example below uses model names that are not defined above for demonstration purposes
|
||||||
groups:
|
groups:
|
||||||
# group1 is same as the default behaviour of llama-swap where only one model is allowed
|
# group1 works the same as the default behaviour of llama-swap where only one model is allowed
|
||||||
# to run a time across the whole llama-swap instance
|
# to run a time across the whole llama-swap instance
|
||||||
"group1":
|
"group1":
|
||||||
# swap: controls the model swapping behaviour in within the group
|
# swap: controls the model swapping behaviour in within the group
|
||||||
@@ -188,10 +202,13 @@ groups:
|
|||||||
- "qwen-unlisted"
|
- "qwen-unlisted"
|
||||||
|
|
||||||
# Example:
|
# Example:
|
||||||
# - in this group all the models can run at the same time
|
# - in group2 all models can run at the same time
|
||||||
# - when a different group loads all running models in this group are unloaded
|
# - when a different group is loaded it causes all running models in this group to unload
|
||||||
"group2":
|
"group2":
|
||||||
swap: false
|
swap: false
|
||||||
|
|
||||||
|
# exclusive: false does not unload other groups when a model in group2 is requested
|
||||||
|
# - the models in group2 will be loaded but will not unload any other groups
|
||||||
exclusive: false
|
exclusive: false
|
||||||
members:
|
members:
|
||||||
- "docker-llama"
|
- "docker-llama"
|
||||||
@@ -220,7 +237,7 @@ groups:
|
|||||||
# - the only supported hook is on_startup
|
# - the only supported hook is on_startup
|
||||||
hooks:
|
hooks:
|
||||||
# on_startup: a dictionary of actions to perform on startup
|
# on_startup: a dictionary of actions to perform on startup
|
||||||
# - optional, default: empty dictionar
|
# - optional, default: empty dictionary
|
||||||
# - the only supported action is preload
|
# - the only supported action is preload
|
||||||
on_startup:
|
on_startup:
|
||||||
# preload: a list of model ids to load on startup
|
# preload: a list of model ids to load on startup
|
||||||
@@ -229,4 +246,4 @@ hooks:
|
|||||||
# - when preloading multiple models at once, define a group
|
# - when preloading multiple models at once, define a group
|
||||||
# otherwise models will be loaded and swapped out
|
# otherwise models will be loaded and swapped out
|
||||||
preload:
|
preload:
|
||||||
- "llama"
|
- "llama"
|
||||||
+28
-22
@@ -5,12 +5,20 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type MetricsRecorder struct {
|
||||||
|
metricsMonitor *MetricsMonitor
|
||||||
|
realModelName string
|
||||||
|
// isStreaming bool
|
||||||
|
startTime time.Time
|
||||||
|
}
|
||||||
|
|
||||||
// MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
|
// MetricsMiddleware sets up the MetricsResponseWriter for capturing upstream requests
|
||||||
func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
||||||
return func(c *gin.Context) {
|
return func(c *gin.Context) {
|
||||||
@@ -41,49 +49,47 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
|||||||
metricsRecorder: &MetricsRecorder{
|
metricsRecorder: &MetricsRecorder{
|
||||||
metricsMonitor: pm.metricsMonitor,
|
metricsMonitor: pm.metricsMonitor,
|
||||||
realModelName: realModelName,
|
realModelName: realModelName,
|
||||||
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
|
|
||||||
startTime: time.Now(),
|
startTime: time.Now(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
c.Writer = writer
|
c.Writer = writer
|
||||||
c.Next()
|
c.Next()
|
||||||
|
|
||||||
rec := writer.metricsRecorder
|
// check for streaming response
|
||||||
rec.processBody(writer.body)
|
if strings.Contains(c.Writer.Header().Get("Content-Type"), "text/event-stream") {
|
||||||
}
|
writer.metricsRecorder.processStreamingResponse(writer.body)
|
||||||
}
|
} else {
|
||||||
|
writer.metricsRecorder.processNonStreamingResponse(writer.body)
|
||||||
|
}
|
||||||
|
|
||||||
type MetricsRecorder struct {
|
|
||||||
metricsMonitor *MetricsMonitor
|
|
||||||
realModelName string
|
|
||||||
isStreaming bool
|
|
||||||
startTime time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
// processBody handles response processing after request completes
|
|
||||||
func (rec *MetricsRecorder) processBody(body []byte) {
|
|
||||||
if rec.isStreaming {
|
|
||||||
rec.processStreamingResponse(body)
|
|
||||||
} else {
|
|
||||||
rec.processNonStreamingResponse(body)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
||||||
usage := jsonData.Get("usage")
|
usage := jsonData.Get("usage")
|
||||||
if !usage.Exists() {
|
timings := jsonData.Get("timings")
|
||||||
|
if !usage.Exists() && !timings.Exists() {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// default values
|
// default values
|
||||||
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
outputTokens := 0
|
||||||
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
inputTokens := 0
|
||||||
|
|
||||||
|
// timings data
|
||||||
tokensPerSecond := -1.0
|
tokensPerSecond := -1.0
|
||||||
promptPerSecond := -1.0
|
promptPerSecond := -1.0
|
||||||
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
||||||
|
|
||||||
|
if usage.Exists() {
|
||||||
|
outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
|
||||||
|
inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
|
||||||
|
}
|
||||||
|
|
||||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
||||||
if timings := jsonData.Get("timings"); timings.Exists() {
|
if timings.Exists() {
|
||||||
|
inputTokens = int(jsonData.Get("timings.prompt_n").Int())
|
||||||
|
outputTokens = int(jsonData.Get("timings.predicted_n").Int())
|
||||||
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
||||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||||
|
|||||||
@@ -191,11 +191,17 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
// Support legacy /v1/completions api, see issue #12
|
// Support legacy /v1/completions api, see issue #12
|
||||||
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
// Support embeddings
|
// Support embeddings and reranking
|
||||||
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
|
// llama-server's /reranking endpoint + aliases
|
||||||
|
pm.ginEngine.POST("/reranking", mm, pm.proxyOAIHandler)
|
||||||
|
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
|
||||||
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
|
||||||
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
|
||||||
|
// llama-server's /infill endpoint for code infilling
|
||||||
|
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||||
|
|||||||
+62
-36
@@ -1,52 +1,78 @@
|
|||||||
|
import { useEffect, useCallback } from "react";
|
||||||
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
|
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
|
||||||
import { useTheme } from "./contexts/ThemeProvider";
|
import { useTheme } from "./contexts/ThemeProvider";
|
||||||
import { APIProvider } from "./contexts/APIProvider";
|
import { useAPI } from "./contexts/APIProvider";
|
||||||
import LogViewerPage from "./pages/LogViewer";
|
import LogViewerPage from "./pages/LogViewer";
|
||||||
import ModelPage from "./pages/Models";
|
import ModelPage from "./pages/Models";
|
||||||
import ActivityPage from "./pages/Activity";
|
import ActivityPage from "./pages/Activity";
|
||||||
import ConnectionStatus from "./components/ConnectionStatus";
|
import ConnectionStatusIcon from "./components/ConnectionStatus";
|
||||||
import { RiSunFill, RiMoonFill } from "react-icons/ri";
|
import { RiSunFill, RiMoonFill } from "react-icons/ri";
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
const { isNarrow, toggleTheme, isDarkMode } = useTheme();
|
const { isNarrow, toggleTheme, isDarkMode, appTitle, setAppTitle, setConnectionState } = useTheme();
|
||||||
|
const handleTitleChange = useCallback(
|
||||||
|
(newTitle: string) => {
|
||||||
|
setAppTitle(newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap");
|
||||||
|
},
|
||||||
|
[setAppTitle]
|
||||||
|
);
|
||||||
|
|
||||||
|
const { connectionStatus } = useAPI();
|
||||||
|
|
||||||
|
// Synchronize the window.title connections state with the actual connection state
|
||||||
|
useEffect(() => {
|
||||||
|
setConnectionState(connectionStatus);
|
||||||
|
}, [connectionStatus]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Router basename="/ui/">
|
<Router basename="/ui/">
|
||||||
<APIProvider>
|
<div className="flex flex-col h-screen">
|
||||||
<div className="flex flex-col h-screen">
|
<nav className="bg-surface border-b border-border p-2 h-[75px]">
|
||||||
<nav className="bg-surface border-b border-border p-2 h-[75px]">
|
<div className="flex items-center justify-between mx-auto px-4 h-full">
|
||||||
<div className="flex items-center justify-between mx-auto px-4 h-full">
|
{!isNarrow && (
|
||||||
{!isNarrow && <h1 className="flex items-center p-0">llama-swap</h1>}
|
<h1
|
||||||
<div className="flex items-center space-x-4">
|
contentEditable
|
||||||
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
suppressContentEditableWarning
|
||||||
Logs
|
className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
|
||||||
</NavLink>
|
onBlur={(e) => handleTitleChange(e.currentTarget.textContent || "(set title)")}
|
||||||
|
onKeyDown={(e) => {
|
||||||
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
if (e.key === "Enter") {
|
||||||
Models
|
e.preventDefault();
|
||||||
</NavLink>
|
handleTitleChange(e.currentTarget.textContent || "(set title)");
|
||||||
|
e.currentTarget.blur();
|
||||||
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
}
|
||||||
Activity
|
}}
|
||||||
</NavLink>
|
>
|
||||||
<button className="" onClick={toggleTheme}>
|
{appTitle}
|
||||||
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
|
</h1>
|
||||||
</button>
|
)}
|
||||||
<ConnectionStatus />
|
<div className="flex items-center space-x-4">
|
||||||
</div>
|
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||||
|
Logs
|
||||||
|
</NavLink>
|
||||||
|
<NavLink to="/models" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||||
|
Models
|
||||||
|
</NavLink>
|
||||||
|
<NavLink to="/activity" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
|
||||||
|
Activity
|
||||||
|
</NavLink>
|
||||||
|
<button className="" onClick={toggleTheme}>
|
||||||
|
{isDarkMode ? <RiMoonFill /> : <RiSunFill />}
|
||||||
|
</button>
|
||||||
|
<ConnectionStatusIcon />
|
||||||
</div>
|
</div>
|
||||||
</nav>
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
<main className="flex-1 overflow-auto p-4">
|
<main className="flex-1 overflow-auto p-4">
|
||||||
<Routes>
|
<Routes>
|
||||||
<Route path="/" element={<LogViewerPage />} />
|
<Route path="/" element={<LogViewerPage />} />
|
||||||
<Route path="/models" element={<ModelPage />} />
|
<Route path="/models" element={<ModelPage />} />
|
||||||
<Route path="/activity" element={<ActivityPage />} />
|
<Route path="/activity" element={<ActivityPage />} />
|
||||||
<Route path="*" element={<Navigate to="/" replace />} />
|
<Route path="*" element={<Navigate to="/" replace />} />
|
||||||
</Routes>
|
</Routes>
|
||||||
</main>
|
</main>
|
||||||
</div>
|
</div>
|
||||||
</APIProvider>
|
|
||||||
</Router>
|
</Router>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,21 +1,11 @@
|
|||||||
import { useAPI } from "../contexts/APIProvider";
|
import { useAPI } from "../contexts/APIProvider";
|
||||||
import { useEffect, useState, useMemo } from "react";
|
import { useMemo } from "react";
|
||||||
|
|
||||||
type ConnectionStatus = "disconnected" | "connecting" | "connected";
|
const ConnectionStatusIcon = () => {
|
||||||
|
const { connectionStatus } = useAPI();
|
||||||
const ConnectionStatus = () => {
|
|
||||||
const { getConnectionStatus } = useAPI();
|
|
||||||
const [eventStreamStatus, setEventStreamStatus] = useState<ConnectionStatus>("disconnected");
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
const interval = setInterval(() => {
|
|
||||||
setEventStreamStatus(getConnectionStatus());
|
|
||||||
}, 1000);
|
|
||||||
return () => clearInterval(interval);
|
|
||||||
});
|
|
||||||
|
|
||||||
const eventStatusColor = useMemo(() => {
|
const eventStatusColor = useMemo(() => {
|
||||||
switch (eventStreamStatus) {
|
switch (connectionStatus) {
|
||||||
case "connected":
|
case "connected":
|
||||||
return "bg-green-500";
|
return "bg-green-500";
|
||||||
case "connecting":
|
case "connecting":
|
||||||
@@ -24,13 +14,13 @@ const ConnectionStatus = () => {
|
|||||||
default:
|
default:
|
||||||
return "bg-red-500";
|
return "bg-red-500";
|
||||||
}
|
}
|
||||||
}, [eventStreamStatus]);
|
}, [connectionStatus]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center" title={`event stream: ${eventStreamStatus}`}>
|
<div className="flex items-center" title={`event stream: ${connectionStatus}`}>
|
||||||
<span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
|
<span className={`inline-block w-3 h-3 rounded-full ${eventStatusColor} mr-2`}></span>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default ConnectionStatus;
|
export default ConnectionStatusIcon;
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
|
import { useRef, createContext, useState, useContext, useEffect, useCallback, useMemo, type ReactNode } from "react";
|
||||||
|
import type { ConnectionState } from "../lib/types";
|
||||||
|
|
||||||
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
|
type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
|
||||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||||
@@ -20,7 +21,7 @@ interface APIProviderType {
|
|||||||
proxyLogs: string;
|
proxyLogs: string;
|
||||||
upstreamLogs: string;
|
upstreamLogs: string;
|
||||||
metrics: Metrics[];
|
metrics: Metrics[];
|
||||||
getConnectionStatus: () => "connected" | "connecting" | "disconnected";
|
connectionStatus: ConnectionState;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Metrics {
|
interface Metrics {
|
||||||
@@ -53,6 +54,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
const [proxyLogs, setProxyLogs] = useState("");
|
const [proxyLogs, setProxyLogs] = useState("");
|
||||||
const [upstreamLogs, setUpstreamLogs] = useState("");
|
const [upstreamLogs, setUpstreamLogs] = useState("");
|
||||||
const [metrics, setMetrics] = useState<Metrics[]>([]);
|
const [metrics, setMetrics] = useState<Metrics[]>([]);
|
||||||
|
const [connectionStatus, setConnectionState] = useState<ConnectionState>("disconnected");
|
||||||
const apiEventSource = useRef<EventSource | null>(null);
|
const apiEventSource = useRef<EventSource | null>(null);
|
||||||
|
|
||||||
const [models, setModels] = useState<Model[]>([]);
|
const [models, setModels] = useState<Model[]>([]);
|
||||||
@@ -64,16 +66,6 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
});
|
});
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const getConnectionStatus = useCallback(() => {
|
|
||||||
if (apiEventSource.current?.readyState === EventSource.OPEN) {
|
|
||||||
return "connected";
|
|
||||||
} else if (apiEventSource.current?.readyState === EventSource.CONNECTING) {
|
|
||||||
return "connecting";
|
|
||||||
} else {
|
|
||||||
return "disconnected";
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const enableAPIEvents = useCallback((enabled: boolean) => {
|
const enableAPIEvents = useCallback((enabled: boolean) => {
|
||||||
if (!enabled) {
|
if (!enabled) {
|
||||||
apiEventSource.current?.close();
|
apiEventSource.current?.close();
|
||||||
@@ -86,7 +78,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
const initialDelay = 1000; // 1 second
|
const initialDelay = 1000; // 1 second
|
||||||
|
|
||||||
const connect = () => {
|
const connect = () => {
|
||||||
|
apiEventSource.current = null;
|
||||||
const eventSource = new EventSource("/api/events");
|
const eventSource = new EventSource("/api/events");
|
||||||
|
setConnectionState("connecting");
|
||||||
|
|
||||||
eventSource.onopen = () => {
|
eventSource.onopen = () => {
|
||||||
// clear everything out on connect to keep things in sync
|
// clear everything out on connect to keep things in sync
|
||||||
@@ -94,6 +88,9 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
setUpstreamLogs("");
|
setUpstreamLogs("");
|
||||||
setMetrics([]); // clear metrics on reconnect
|
setMetrics([]); // clear metrics on reconnect
|
||||||
setModels([]); // clear models on reconnect
|
setModels([]); // clear models on reconnect
|
||||||
|
apiEventSource.current = eventSource;
|
||||||
|
retryCount = 0;
|
||||||
|
setConnectionState("connected");
|
||||||
};
|
};
|
||||||
|
|
||||||
eventSource.onmessage = (e: MessageEvent) => {
|
eventSource.onmessage = (e: MessageEvent) => {
|
||||||
@@ -138,14 +135,14 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
console.error(e.data, err);
|
console.error(e.data, err);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
eventSource.onerror = () => {
|
eventSource.onerror = () => {
|
||||||
eventSource.close();
|
eventSource.close();
|
||||||
retryCount++;
|
retryCount++;
|
||||||
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
|
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
|
||||||
|
setConnectionState("disconnected");
|
||||||
setTimeout(connect, delay);
|
setTimeout(connect, delay);
|
||||||
};
|
};
|
||||||
|
|
||||||
apiEventSource.current = eventSource;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
connect();
|
connect();
|
||||||
@@ -213,7 +210,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
proxyLogs,
|
proxyLogs,
|
||||||
upstreamLogs,
|
upstreamLogs,
|
||||||
metrics,
|
metrics,
|
||||||
getConnectionStatus,
|
connectionStatus,
|
||||||
}),
|
}),
|
||||||
[models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
|
[models, listModels, unloadAllModels, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics]
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
|
import { createContext, useContext, useEffect, type ReactNode, useMemo, useState } from "react";
|
||||||
import { usePersistentState } from "../hooks/usePersistentState";
|
import { usePersistentState } from "../hooks/usePersistentState";
|
||||||
|
import type { ConnectionState } from "../lib/types";
|
||||||
|
|
||||||
type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
|
type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
|
||||||
type ThemeContextType = {
|
type ThemeContextType = {
|
||||||
@@ -7,6 +8,11 @@ type ThemeContextType = {
|
|||||||
screenWidth: ScreenWidth;
|
screenWidth: ScreenWidth;
|
||||||
isNarrow: boolean;
|
isNarrow: boolean;
|
||||||
toggleTheme: () => void;
|
toggleTheme: () => void;
|
||||||
|
|
||||||
|
// for managing the window title and connection state information
|
||||||
|
appTitle: string;
|
||||||
|
setAppTitle: (title: string) => void;
|
||||||
|
setConnectionState: (state: ConnectionState) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
|
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
|
||||||
@@ -16,6 +22,17 @@ type ThemeProviderProps = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export function ThemeProvider({ children }: ThemeProviderProps) {
|
export function ThemeProvider({ children }: ThemeProviderProps) {
|
||||||
|
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
|
||||||
|
const [connectionState, setConnectionState] = useState<ConnectionState>("disconnected");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the document.title with informative information
|
||||||
|
*/
|
||||||
|
useEffect(() => {
|
||||||
|
const connectionIcon = connectionState === "connecting" ? "🟡" : connectionState === "connected" ? "🟢" : "🔴";
|
||||||
|
document.title = connectionIcon + " " + appTitle; // Set initial title
|
||||||
|
}, [appTitle, connectionState]);
|
||||||
|
|
||||||
const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
|
const [isDarkMode, setIsDarkMode] = usePersistentState<boolean>("theme", false);
|
||||||
const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
|
const [screenWidth, setScreenWidth] = useState<ScreenWidth>("md"); // Default to md
|
||||||
|
|
||||||
@@ -55,7 +72,19 @@ export function ThemeProvider({ children }: ThemeProviderProps) {
|
|||||||
}, [screenWidth]);
|
}, [screenWidth]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<ThemeContext.Provider value={{ isDarkMode, toggleTheme, screenWidth, isNarrow }}>{children}</ThemeContext.Provider>
|
<ThemeContext.Provider
|
||||||
|
value={{
|
||||||
|
isDarkMode,
|
||||||
|
toggleTheme,
|
||||||
|
screenWidth,
|
||||||
|
isNarrow,
|
||||||
|
appTitle,
|
||||||
|
setAppTitle,
|
||||||
|
setConnectionState,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{children}
|
||||||
|
</ThemeContext.Provider>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
export type ConnectionState = "connected" | "connecting" | "disconnected";
|
||||||
+4
-1
@@ -3,11 +3,14 @@ import { createRoot } from "react-dom/client";
|
|||||||
import "./index.css";
|
import "./index.css";
|
||||||
import App from "./App.tsx";
|
import App from "./App.tsx";
|
||||||
import { ThemeProvider } from "./contexts/ThemeProvider";
|
import { ThemeProvider } from "./contexts/ThemeProvider";
|
||||||
|
import { APIProvider } from "./contexts/APIProvider";
|
||||||
|
|
||||||
createRoot(document.getElementById("root")!).render(
|
createRoot(document.getElementById("root")!).render(
|
||||||
<StrictMode>
|
<StrictMode>
|
||||||
<ThemeProvider>
|
<ThemeProvider>
|
||||||
<App />
|
<APIProvider>
|
||||||
|
<App />
|
||||||
|
</APIProvider>
|
||||||
</ThemeProvider>
|
</ThemeProvider>
|
||||||
</StrictMode>
|
</StrictMode>
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user