Compare commits

...

2 Commits

Author SHA1 Message Date
Benson Wong fcc5ad135a UI: Allow editing of title (#246)
- make <h1> title contentEditable
- title setting persists across reloads in localStorage
2025-08-17 09:42:06 -07:00
Benson Wong 305e5a0031 improve example config [skip ci] 2025-08-17 09:19:04 -07:00
2 changed files with 72 additions and 28 deletions
+34 -26
View File
@@ -3,14 +3,15 @@
# #
# 💡 Tip - Use an LLM with this file! # 💡 Tip - Use an LLM with this file!
# ==================================== # ====================================
# This example configuration is written to be LLM friendly! Try # This example configuration is written to be LLM friendly. Try
# copying this file into an LLM and asking it to explain or generate # copying this file into an LLM and asking it to explain or generate
# sections for you. # sections for you.
# ==================================== # ====================================
#
# Usage notes:
# - Below are all the available configuration options for llama-swap. # - Below are all the available configuration options for llama-swap.
# - Settings with a default value, or noted as optional can be omitted. # - Settings noted as "required" must be in your configuration file
# - Settings that are marked required must be in your configuration file # - Settings noted as "optional" can be omitted
# healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests # healthCheckTimeout: number of seconds to wait for a model to be ready to serve requests
# - optional, default: 120 # - optional, default: 120
@@ -34,9 +35,9 @@ metricsMaxInMemory: 1000
# - it is automatically incremented for every model that uses it # - it is automatically incremented for every model that uses it
startPort: 10001 startPort: 10001
# macros: sets a dictionary of string:string pairs # macros: a dictionary of string substitutions
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - these are reusable snippets # - macros are reusable snippets
# - used in a model's cmd, cmdStop, proxy and checkEndpoint # - used in a model's cmd, cmdStop, proxy and checkEndpoint
# - useful for reducing common configuration settings # - useful for reducing common configuration settings
macros: macros:
@@ -99,44 +100,46 @@ models:
# checkEndpoint: URL path to check if the server is ready # checkEndpoint: URL path to check if the server is ready
# - optional, default: /health # - optional, default: /health
# - use "none" to skip endpoint ready checking
# - endpoint is expected to return an HTTP 200 response # - endpoint is expected to return an HTTP 200 response
# - all requests wait until the endpoint is ready (or fails) # - all requests wait until the endpoint is ready or fails
# - use "none" to skip endpoint health checking
checkEndpoint: /custom-endpoint checkEndpoint: /custom-endpoint
# ttl: automatically unload the model after this many seconds # ttl: automatically unload the model after ttl seconds
# - optional, default: 0 # - optional, default: 0
# - ttl values must be a value greater than 0 # - ttl values must be a value greater than 0
# - a value of 0 disables automatic unloading of the model # - a value of 0 disables automatic unloading of the model
ttl: 60 ttl: 60
# useModelName: overrides the model name that is sent to upstream server # useModelName: override the model name that is sent to upstream server
# - optional, default: "" # - optional, default: ""
# - useful when the upstream server expects a specific model name or format # - useful for when the upstream server expects a specific model name that
# is different from the model's ID
useModelName: "qwen:qwq" useModelName: "qwen:qwq"
# filters: a dictionary of filter settings # filters: a dictionary of filter settings
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - only strip_params is currently supported
filters: filters:
# strip_params: a comma separated list of parameters to remove from the request # strip_params: a comma separated list of parameters to remove from the request
# - optional, default: "" # - optional, default: ""
# - useful for preventing overriding of default server params by requests # - useful for server side enforcement of sampling parameters
# - `model` parameter is never removed # - the `model` parameter can never be removed
# - can be any JSON key in the request body # - can be any JSON key in the request body
# - recommended to stick to sampling parameters # - recommended to stick to sampling parameters
strip_params: "temperature, top_p, top_k" strip_params: "temperature, top_p, top_k"
# Unlisted model example: # Unlisted model example:
"qwen-unlisted": "qwen-unlisted":
# unlisted: true or false # unlisted: boolean, true or false
# - optional, default: false # - optional, default: false
# - unlisted models do not show up in /v1/models or /upstream lists # - unlisted models do not show up in /v1/models api requests
# - can be requested as normal through all apis # - can be requested as normal through all apis
unlisted: true unlisted: true
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
# Docker example: # Docker example:
# container run times like Docker and Podman can also be used with a # container run times like Docker and Podman can be used reliably with a
# a combination of cmd and cmdStop. # a combination of cmd and cmdStop.
"docker-llama": "docker-llama":
proxy: "http://127.0.0.1:${PORT}" proxy: "http://127.0.0.1:${PORT}"
@@ -149,24 +152,26 @@ models:
# cmdStop: command to run to stop the model gracefully # cmdStop: command to run to stop the model gracefully
# - optional, default: "" # - optional, default: ""
# - useful for stopping commands managed by another system # - useful for stopping commands managed by another system
# - on POSIX systems: a SIGTERM is sent for graceful shutdown
# - on Windows, taskkill is used
# - processes are given 5 seconds to shutdown until they are forcefully killed
# - the upstream's process id is available in the ${PID} macro # - the upstream's process id is available in the ${PID} macro
#
# When empty, llama-swap has this default behaviour:
# - on POSIX systems: a SIGTERM signal is sent
# - on Windows, calls taskkill to stop the process
# - processes have 5 seconds to shutdown until forceful termination is attempted
cmdStop: docker stop dockertest cmdStop: docker stop dockertest
# groups: a dictionary of group settings # groups: a dictionary of group settings
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - provide advanced controls over model swapping behaviour. # - provides advanced controls over model swapping behaviour
# - Using groups some models can be kept loaded indefinitely, while others are swapped out. # - using groups some models can be kept loaded indefinitely, while others are swapped out
# - model ids must be defined in the Models section # - model IDs must be defined in the Models section
# - a model can only be a member of one group # - a model can only be a member of one group
# - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields # - group behaviour is controlled via the `swap`, `exclusive` and `persistent` fields
# - see issue #109 for details # - see issue #109 for details
# #
# NOTE: the example below uses model names that are not defined above for demonstration purposes # NOTE: the example below uses model names that are not defined above for demonstration purposes
groups: groups:
# group1 is same as the default behaviour of llama-swap where only one model is allowed # group1 works the same as the default behaviour of llama-swap where only one model is allowed
# to run a time across the whole llama-swap instance # to run a time across the whole llama-swap instance
"group1": "group1":
# swap: controls the model swapping behaviour in within the group # swap: controls the model swapping behaviour in within the group
@@ -188,10 +193,13 @@ groups:
- "qwen-unlisted" - "qwen-unlisted"
# Example: # Example:
# - in this group all the models can run at the same time # - in group2 all models can run at the same time
# - when a different group loads all running models in this group are unloaded # - when a different group is loaded it causes all running models in this group to unload
"group2": "group2":
swap: false swap: false
# exclusive: false does not unload other groups when a model in group2 is requested
# - the models in group2 will be loaded but will not unload any other groups
exclusive: false exclusive: false
members: members:
- "docker-llama" - "docker-llama"
@@ -220,7 +228,7 @@ groups:
# - the only supported hook is on_startup # - the only supported hook is on_startup
hooks: hooks:
# on_startup: a dictionary of actions to perform on startup # on_startup: a dictionary of actions to perform on startup
# - optional, default: empty dictionar # - optional, default: empty dictionary
# - the only supported action is preload # - the only supported action is preload
on_startup: on_startup:
# preload: a list of model ids to load on startup # preload: a list of model ids to load on startup
+37 -1
View File
@@ -1,3 +1,4 @@
import { useEffect, useCallback } from "react";
import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom"; import { BrowserRouter as Router, Routes, Route, Navigate, NavLink } from "react-router-dom";
import { useTheme } from "./contexts/ThemeProvider"; import { useTheme } from "./contexts/ThemeProvider";
import { APIProvider } from "./contexts/APIProvider"; import { APIProvider } from "./contexts/APIProvider";
@@ -6,9 +7,23 @@ import ModelPage from "./pages/Models";
import ActivityPage from "./pages/Activity"; import ActivityPage from "./pages/Activity";
import ConnectionStatus from "./components/ConnectionStatus"; import ConnectionStatus from "./components/ConnectionStatus";
import { RiSunFill, RiMoonFill } from "react-icons/ri"; import { RiSunFill, RiMoonFill } from "react-icons/ri";
import { usePersistentState } from "./hooks/usePersistentState";
function App() { function App() {
const { isNarrow, toggleTheme, isDarkMode } = useTheme(); const { isNarrow, toggleTheme, isDarkMode } = useTheme();
const [appTitle, setAppTitle] = usePersistentState("app-title", "llama-swap");
const handleTitleChange = useCallback(
(newTitle: string) => {
setAppTitle(newTitle);
document.title = newTitle;
},
[setAppTitle]
);
useEffect(() => {
document.title = appTitle; // Set initial title
}, [appTitle]);
return ( return (
<Router basename="/ui/"> <Router basename="/ui/">
@@ -16,7 +31,28 @@ function App() {
<div className="flex flex-col h-screen"> <div className="flex flex-col h-screen">
<nav className="bg-surface border-b border-border p-2 h-[75px]"> <nav className="bg-surface border-b border-border p-2 h-[75px]">
<div className="flex items-center justify-between mx-auto px-4 h-full"> <div className="flex items-center justify-between mx-auto px-4 h-full">
{!isNarrow && <h1 className="flex items-center p-0">llama-swap</h1>} {!isNarrow && (
<h1
contentEditable
suppressContentEditableWarning
className="flex items-center p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded px-1"
onBlur={(e) =>
handleTitleChange(e.currentTarget.textContent?.replace(/\n/g, "").trim() || "llama-swap")
}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
const sanitizedText =
e.currentTarget.textContent?.replace(/\n/g, "").trim().substring(0, 25) || "llama-swap";
handleTitleChange(sanitizedText);
e.currentTarget.textContent = sanitizedText;
e.currentTarget.blur();
}
}}
>
{appTitle}
</h1>
)}
<div className="flex items-center space-x-4"> <div className="flex items-center space-x-4">
<NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}> <NavLink to="/" className={({ isActive }) => (isActive ? "navlink active" : "navlink")}>
Logs Logs