Compare commits

...

4 Commits

Author SHA1 Message Date
Benson Wong c3c258a55d proxy: fix metrics capture for v1/responses (#586)
properly parse anthropic compatible usage data from streaming responses.

closes: #577
2026-03-13 16:50:12 -07:00
Benson Wong 29a38fde0d ui-svelte: upgrade to vite 8 (#585)
Upgrade vite and related dependencies to take advantage of Vite 8's
improved build times via Rolldown and Oxc.

- vite: ^6.3.5 → ^8.0.0
- @sveltejs/vite-plugin-svelte: ^5.0.3 → ^7.0.0
- svelte: ^5.19.0 → ^5.46.4
- vite-plugin-compression2: ^2.4.0 → ^2.5.1
- vitest: ^4.0.18 → ^4.1.0

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-03-13 08:45:59 -07:00
tesuri d569681daa Change model sorting to natural order (#582)
Use natural sorting for model names.

Previously the model list was sorted lexicographically, which resulted
in unintuitive ordering when numbers were included in the name.

Example:

Before
qwen3.5:2B
qwen3.5:35B-3AB
qwen3.5:9B

After
qwen3.5:2B
qwen3.5:9B
qwen3.5:35B-3AB

This change sorts models using natural order so numeric parts are
compared numerically.
2026-03-12 07:49:34 -07:00
Benson Wong 24efdb76b1 config: add macro support for name and description fields (#578)
Extend macro substitution to the name and description fields of
ModelConfig, matching the behavior already present for cmd, proxy,
checkEndpoint, and filters.

- substitute global/model macros (including MODEL_ID) in name and
description
- substitute PORT macro in name and description when allocated
- validate no unknown macros remain in name and description after
substitution
- add tests for macro substitution, MODEL_ID, and unknown macro error
2026-03-10 08:27:05 -07:00
8 changed files with 1081 additions and 1187 deletions
+6
View File
@@ -308,6 +308,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
// Substitute macros in SetParamsByID keys and values
if len(modelConfig.Filters.SetParamsByID) > 0 {
@@ -351,6 +353,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
if len(modelConfig.Metadata) > 0 {
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
@@ -370,6 +374,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
"proxy": modelConfig.Proxy,
"checkEndpoint": modelConfig.CheckEndpoint,
"filters.stripParams": modelConfig.Filters.StripParams,
"name": modelConfig.Name,
"description": modelConfig.Description,
}
for fieldName, fieldValue := range fieldMap {
+56
View File
@@ -104,6 +104,62 @@ models:
assert.Contains(t, err.Error(), "self-reference")
}
// Test macro substitution in name and description fields
func TestConfig_MacroInNameAndDescription(t *testing.T) {
content := `
startPort: 10000
macros:
"VARIANT": "Q4_K_M"
"FAMILY": "llama"
models:
my-model:
cmd: echo ok
proxy: http://localhost:8080
name: "${FAMILY} ${VARIANT}"
description: "A ${FAMILY} model in ${VARIANT} format"
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
}
// Test MODEL_ID macro in name and description fields
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
content := `
startPort: 10000
models:
llama-3b:
cmd: echo ok
proxy: http://localhost:8080
name: "Model: ${MODEL_ID}"
description: "Running ${MODEL_ID}"
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
}
// Test unknown macro in name or description returns an error
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
content := `
startPort: 10000
models:
test:
cmd: echo ok
proxy: http://localhost:8080
name: "Model ${UNDEFINED}"
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "UNDEFINED")
}
// Test undefined macro reference error
func TestConfig_UndefinedMacroReference(t *testing.T) {
content := `
+7 -2
View File
@@ -350,6 +350,11 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
usage := parsed.Get("usage")
timings := parsed.Get("timings")
// v1/responses format nests usage under response.usage
if !usage.Exists() {
usage = parsed.Get("response.usage")
}
if usage.Exists() || timings.Exists() {
return parseMetrics(modelID, start, usage, timings)
}
@@ -503,9 +508,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
supported := map[string]bool{"gzip": true, "deflate": true}
var filtered []string
for _, part := range strings.Split(acceptEncoding, ",") {
for part := range strings.SplitSeq(acceptEncoding, ",") {
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
if supported[strings.ToLower(encoding)] {
filtered = append(filtered, strings.TrimSpace(part))
}
+29
View File
@@ -709,6 +709,35 @@ data: [DONE]
assert.Equal(t, 0, metrics[0].OutputTokens)
})
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
// v1/responses SSE format: usage is nested under response.usage
responseBody := "event: response.completed\n" +
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
"\n\n"
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "text/event-stream")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
req := httptest.NewRequest("POST", "/v1/responses", nil)
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 17, metrics[0].InputTokens)
assert.Equal(t, 23, metrics[0].OutputTokens)
})
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
+1
View File
@@ -0,0 +1 @@
legacy-peer-deps=true
+976 -1179
View File
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -12,18 +12,18 @@
"test:watch": "vitest"
},
"devDependencies": {
"@sveltejs/vite-plugin-svelte": "^5.0.3",
"@sveltejs/vite-plugin-svelte": "^7.0.0",
"@tailwindcss/vite": "^4.1.8",
"@tsconfig/svelte": "^5.0.4",
"@types/hast": "^3.0.4",
"@types/node": "^25.1.0",
"svelte": "^5.19.0",
"svelte": "^5.46.4",
"svelte-check": "^4.1.4",
"tailwindcss": "^4.1.8",
"typescript": "~5.8.3",
"vite": "^6.3.5",
"vite-plugin-compression2": "^2.4.0",
"vitest": "^4.0.18"
"vite": "^8.0.0",
"vite-plugin-compression2": "^2.5.1",
"vitest": "^4.1.0"
},
"dependencies": {
"highlight.js": "^11.11.1",
+1 -1
View File
@@ -62,7 +62,7 @@ export function enableAPIEvents(enabled: boolean): void {
const newModels = JSON.parse(message.data) as Model[];
// Sort models by name and id
newModels.sort((a, b) => {
return (a.name + a.id).localeCompare(b.name + b.id);
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
});
models.set(newModels);
break;