Compare commits

...

2 Commits

Author SHA1 Message Date
krzychdre b2fcc2daa1 ui-svelte: fix cached tokens total counting -1 sentinel (#760)
The backend uses cache_tokens=-1 as a sentinel for endpoints that don't
report cache stats (embeddings, vLLM). The activity table correctly
renders these as "-", but the totals widget summed the sentinels
directly, so each such request subtracted 1 from the displayed total.

- clamp cache_tokens with Math.max(0, ...) when reducing
2026-05-15 14:42:44 -07:00
cdwaage 6a9c4efc8f fix: use --loop instead of -loop for nvidia-smi (driver 540+ compat) (#759) 2026-05-15 13:20:29 -07:00
2 changed files with 2 additions and 2 deletions
+1 -1
View File
@@ -139,7 +139,7 @@ func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monit
cmd := exec.CommandContext(ctx, "nvidia-smi",
"--query-gpu=index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw",
"--format=csv,noheader,nounits",
"-loop", fmt.Sprintf("%d", sec),
"--loop", fmt.Sprintf("%d", sec),
)
stdout, err := cmd.StdoutPipe()
@@ -11,7 +11,7 @@
const totalRequests = $metrics.length;
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.tokens.input_tokens, 0);
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.tokens.output_tokens, 0);
const totalCacheTokens = $metrics.reduce((sum, m) => sum + m.tokens.cache_tokens, 0);
const totalCacheTokens = $metrics.reduce((sum, m) => sum + Math.max(0, m.tokens.cache_tokens), 0);
const promptPerSecond = $metrics.filter((m) => m.tokens.prompt_per_second > 0).map((m) => m.tokens.prompt_per_second);