Compare commits

...

3 Commits

Author SHA1 Message Date
Benson Wong cc77139ff8 proxy,proxy/config: add global TTL feature (#554)
Add a new configuration parameter globalTTL that all models will
inherit. The default value is 0 which matches the currently
functionality to never automatically unload a model.

The model.ttl's default has changed to -1, which means use the global
TTL value. Any model.ttl >=0 is now value with 0 meaning never unload.
This allows a model to override a globalTTL > 0 and be configured to
never unload.

Fixes #459
Closes #512
2026-03-01 21:02:12 -08:00
Benson Wong 390a35bf93 ui-svelte: add copy button to markdown code blocks (#537)
Add a copy-to-clipboard button that appears on hover for each code block
rendered in the chat interface assistant messages.

- Svelte action `codeBlockCopy` injects a button into every `<pre>`
element
- MutationObserver reattaches buttons as streaming content arrives
- Button shows a check icon for 2 seconds after a successful copy
- Uses clipboard API with execCommand fallback for non-secure contexts
- CSS hides button by default and reveals it on pre:hover

https://claude.ai/code/session_01PTA5ao5YQuFAS6a9juLeZW

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-03-01 09:48:56 -08:00
pdscomp 181f71ca11 .github,docker: add cuda13 architecture support (#551)
Add `cuda13` as a supported build architecture, targeting the
`ghcr.io/ggml-org/llama.cpp:server-cuda13` upstream base image.

The `server-cuda13` image ships with CUDA 13 libraries, providing
improved performance on recent NVIDIA hardware compared to the existing
`server-cuda` (CUDA 12) image. Users with newer GPUs (e.g., RTX
50-series) benefit from reduced model load latency and higher token
throughput.

- Add `cuda13` to the allowed architectures list in
`docker/build-container.sh`
- Add `cuda13` to the CI matrix in `.github/workflows/containers.yml` so
the container is built and pushed automatically
2026-03-01 09:37:08 -08:00
11 changed files with 187 additions and 28 deletions
+1 -1
View File
@@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
platform: [intel, cuda, vulkan, cpu, musa, rocm] platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
fail-fast: false fail-fast: false
steps: steps:
- name: Checkout code - name: Checkout code
+9 -3
View File
@@ -48,6 +48,12 @@
"default": 120, "default": 120,
"description": "Number of seconds to wait for a model to be ready to serve requests." "description": "Number of seconds to wait for a model to be ready to serve requests."
}, },
"globalTTL": {
"type": "integer",
"minimum": 0,
"default": 0,
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
},
"logLevel": { "logLevel": {
"type": "string", "type": "string",
"enum": [ "enum": [
@@ -177,9 +183,9 @@
}, },
"ttl": { "ttl": {
"type": "integer", "type": "integer",
"minimum": 0, "minimum": -1,
"default": 0, "default": -1,
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable." "description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
}, },
"useModelName": { "useModelName": {
"type": "string", "type": "string",
+9 -2
View File
@@ -75,6 +75,11 @@ sendLoadingState: true
# all fields except for Id so chat UIs can use the alias equivalent to the original. # all fields except for Id so chat UIs can use the alias equivalent to the original.
includeAliasesInList: false includeAliasesInList: false
# globalTTL: the default TTL in seconds before unloading a model
# - optional, default: 0 (never automatically unload)
# - must be >= 0
globalTTL: 0
# macros: a dictionary of string substitutions # macros: a dictionary of string substitutions
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - macros are reusable snippets # - macros are reusable snippets
@@ -180,8 +185,10 @@ models:
checkEndpoint: /custom-endpoint checkEndpoint: /custom-endpoint
# ttl: automatically unload the model after ttl seconds # ttl: automatically unload the model after ttl seconds
# - optional, default: 0 # - optional, default: -1 (use global default)
# - ttl values must be a value greater than 0 # - ttl values must be a value greater than or equal to 0
# - a ttl of -1 will use the global TTL value as the default
# - a ttl of 0 will mean never unload
# - a value of 0 disables automatic unloading of the model # - a value of 0 disables automatic unloading of the model
ttl: 60 ttl: 60
+1 -1
View File
@@ -27,7 +27,7 @@ ARCH=$1
PUSH_IMAGES=${2:-false} PUSH_IMAGES=${2:-false}
# List of allowed architectures # List of allowed architectures
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm") ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
# Check if ARCH is in the allowed list # Check if ARCH is in the allowed list
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
+15
View File
@@ -124,6 +124,7 @@ type Config struct {
LogToStdout string `yaml:"logToStdout"` LogToStdout string `yaml:"logToStdout"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
CaptureBuffer int `yaml:"captureBuffer"` CaptureBuffer int `yaml:"captureBuffer"`
GlobalTTL int `yaml:"globalTTL"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"` Profiles map[string][]string `yaml:"profiles"`
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */ Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
@@ -203,6 +204,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
LogToStdout: LogToStdoutProxy, LogToStdout: LogToStdoutProxy,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
CaptureBuffer: 5, CaptureBuffer: 5,
GlobalTTL: 0,
} }
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil { if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
return Config{}, err return Config{}, err
@@ -216,6 +218,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
return Config{}, fmt.Errorf("startPort must be greater than 1") return Config{}, fmt.Errorf("startPort must be greater than 1")
} }
if config.GlobalTTL < 0 {
return Config{}, fmt.Errorf("globalTTL must be >= 0")
}
switch config.LogToStdout { switch config.LogToStdout {
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone: case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
default: default:
@@ -255,6 +261,15 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
modelConfig.Cmd = StripComments(modelConfig.Cmd) modelConfig.Cmd = StripComments(modelConfig.Cmd)
modelConfig.CmdStop = StripComments(modelConfig.CmdStop) modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
// set model TTL to globalTTL it is the default value
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
modelConfig.UnloadAfter = config.GlobalTTL
}
if modelConfig.UnloadAfter < 0 {
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
}
// Validate model macros // Validate model macros
for _, macro := range modelConfig.Macros { for _, macro := range modelConfig.Macros {
if err = validateMacro(macro.Name, macro.Value); err != nil { if err = validateMacro(macro.Name, macro.Value); err != nil {
+65
View File
@@ -848,6 +848,71 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
}) })
} }
func TestConfig_GlobalTTL(t *testing.T) {
t.Run("globalTTL sets default for models", func(t *testing.T) {
content := `
globalTTL: 300
models:
model1:
cmd: server --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, 300, config.GlobalTTL)
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
})
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
content := `
globalTTL: 300
models:
model1:
cmd: server --port ${PORT}
ttl: 0
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
})
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
content := `
globalTTL: 300
models:
model1:
cmd: server --port ${PORT}
ttl: 600
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
})
t.Run("globalTTL defaults to 0", func(t *testing.T) {
content := `
models:
model1:
cmd: server --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, 0, config.GlobalTTL)
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
})
t.Run("negative globalTTL rejected", func(t *testing.T) {
content := `
globalTTL: -1
models:
model1:
cmd: server --port ${PORT}
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
})
}
func TestConfig_EnvMacros(t *testing.T) { func TestConfig_EnvMacros(t *testing.T) {
t.Run("basic env substitution in cmd", func(t *testing.T) { t.Run("basic env substitution in cmd", func(t *testing.T) {
t.Setenv("TEST_MODEL_PATH", "/opt/models") t.Setenv("TEST_MODEL_PATH", "/opt/models")
+5 -1
View File
@@ -5,6 +5,10 @@ import (
"runtime" "runtime"
) )
const (
MODEL_CONFIG_DEFAULT_TTL = -1
)
type ModelConfig struct { type ModelConfig struct {
Cmd string `yaml:"cmd"` Cmd string `yaml:"cmd"`
CmdStop string `yaml:"cmdStop"` CmdStop string `yaml:"cmdStop"`
@@ -47,7 +51,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
Aliases: []string{}, Aliases: []string{},
Env: []string{}, Env: []string{},
CheckEndpoint: "/health", CheckEndpoint: "/health",
UnloadAfter: 0, UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
Unlisted: false, Unlisted: false,
UseModelName: "", UseModelName: "",
ConcurrencyLimit: 0, ConcurrencyLimit: 0,
+10 -10
View File
@@ -117,12 +117,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
} }
expectedMessage := "I_sense_imminent_danger" expectedMessage := "I_sense_imminent_danger"
config := getTestSimpleResponderConfig(expectedMessage) conf := getTestSimpleResponderConfig(expectedMessage)
assert.Equal(t, 0, config.UnloadAfter) assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
config.UnloadAfter = 3 // seconds conf.UnloadAfter = 3 // seconds
assert.Equal(t, 3, config.UnloadAfter) assert.Equal(t, 3, conf.UnloadAfter)
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger) process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
defer process.Stop() defer process.Stop()
// this should take 4 seconds // this should take 4 seconds
@@ -159,12 +159,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
t.Skip("skipping test, edit process_test.go to run it ") t.Skip("skipping test, edit process_test.go to run it ")
} }
config := getTestSimpleResponderConfig("fast_ttl") conf := getTestSimpleResponderConfig("fast_ttl")
assert.Equal(t, 0, config.UnloadAfter) assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
config.UnloadAfter = 1 // second conf.UnloadAfter = 1 // second
assert.Equal(t, 1, config.UnloadAfter) assert.Equal(t, 1, conf.UnloadAfter)
process := NewProcess("ttl", 2, config, debugLogger, debugLogger) process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
defer process.Stop() defer process.Stop()
for i := 0; i < 100; i++ { for i := 0; i < 100; i++ {
+1 -1
View File
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
// Verify extended fields are present // Verify extended fields are present
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated") assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated") assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0") assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
}) })
} }
-7
View File
@@ -925,7 +925,6 @@
"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==", "integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
"debug": "^4.4.1", "debug": "^4.4.1",
@@ -1308,7 +1307,6 @@
"integrity": "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA==", "integrity": "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.16.0" "undici-types": "~7.16.0"
} }
@@ -1441,7 +1439,6 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT", "license": "MIT",
"peer": true,
"bin": { "bin": {
"acorn": "bin/acorn" "acorn": "bin/acorn"
}, },
@@ -3452,7 +3449,6 @@
"integrity": "sha512-e5lPJi/aui4TO1LpAXIRLySmwXSE8k3b9zoGfd42p67wzxog4WHjiZF3M2uheQih4DGyc25QEV4yRBbpueNiUA==", "integrity": "sha512-e5lPJi/aui4TO1LpAXIRLySmwXSE8k3b9zoGfd42p67wzxog4WHjiZF3M2uheQih4DGyc25QEV4yRBbpueNiUA==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@types/estree": "1.0.8" "@types/estree": "1.0.8"
}, },
@@ -3565,7 +3561,6 @@
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz", "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz",
"integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==", "integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==",
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@jridgewell/remapping": "^2.3.4", "@jridgewell/remapping": "^2.3.4",
"@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/sourcemap-codec": "^1.5.0",
@@ -3721,7 +3716,6 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"
@@ -3900,7 +3894,6 @@
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"esbuild": "^0.25.0", "esbuild": "^0.25.0",
"fdir": "^6.4.4", "fdir": "^6.4.4",
@@ -116,6 +116,47 @@
cancelEdit(); cancelEdit();
} }
} }
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
function codeBlockCopy(node: HTMLElement) {
function attachButtons() {
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
pre.setAttribute('data-copy-btn', 'true');
const btn = document.createElement('button');
btn.className = 'code-copy-btn';
btn.title = 'Copy code';
btn.innerHTML = COPY_SVG;
btn.addEventListener('click', async () => {
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
try {
if (navigator.clipboard && window.isSecureContext) {
await navigator.clipboard.writeText(text);
} else {
const ta = document.createElement('textarea');
ta.value = text;
ta.style.cssText = 'position:fixed;left:-9999px';
document.body.appendChild(ta);
ta.select();
document.execCommand('copy');
document.body.removeChild(ta);
}
btn.innerHTML = CHECK_SVG;
btn.classList.add('copied');
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
} catch (e) {
console.error('copy failed', e);
}
});
pre.appendChild(btn);
});
}
attachButtons();
const mo = new MutationObserver(attachButtons);
mo.observe(node, { childList: true, subtree: true });
return { destroy: () => mo.disconnect() };
}
</script> </script>
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4"> <div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
@@ -174,7 +215,7 @@
{#if showRaw} {#if showRaw}
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div> <div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
{:else} {:else}
<div class="prose prose-sm dark:prose-invert max-w-none"> <div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
{#each renderedParts.blocks as block (block.id)} {#each renderedParts.blocks as block (block.id)}
{@html block.html} {@html block.html}
{/each} {/each}
@@ -299,14 +340,42 @@
<style> <style>
.prose :global(pre) { .prose :global(pre) {
position: relative;
background-color: var(--color-surface); background-color: var(--color-surface);
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2)); border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
border-radius: 0.375rem; border-radius: 0.375rem;
padding: 0.75rem; padding: 0.75rem;
padding-right: 2.5rem;
overflow-x: auto; overflow-x: auto;
margin: 0.5rem 0; margin: 0.5rem 0;
} }
.prose :global(.code-copy-btn) {
position: absolute;
top: 0.375rem;
right: 0.375rem;
display: flex;
align-items: center;
justify-content: center;
padding: 0.25rem;
border-radius: 0.25rem;
border: 1px solid var(--color-border);
background: var(--color-surface);
color: var(--color-txtsecondary);
cursor: pointer;
transition: background-color 0.15s;
line-height: 0;
}
.prose :global(.code-copy-btn:hover) {
background: var(--color-secondary);
}
.prose :global(.code-copy-btn.copied) {
color: var(--color-success);
opacity: 1;
}
.prose :global(code) { .prose :global(code) {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
font-size: 0.875em; font-size: 0.875em;