Changes and fixes before the release (docs/small tweaks) (#750)

- update README.md with new docker instructions
- update docs/configuration.md
- update .github/workflows to have pinned action versions
- gofmt events package
- fix small bugs in CI scripts
- reduce config options for internal/perf/monitor and config. A ring buffer is used to keep 1hr of entries at max 5s granularity. For long term stats use prometheus monitoring on /metrics

Fixes #744
This commit is contained in:
Benson Wong
2026-05-13 21:18:19 -07:00
committed by GitHub
parent 3e3646f9f9
commit a4b91e08cf
23 changed files with 499 additions and 569 deletions
+15 -3
View File
@@ -146,6 +146,18 @@ metricsMaxInMemory: 1000
# - set to 0 to disable
captureBuffer: 15
# performance: configuration for system monitoring statistics
# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc.
performance:
# disabled: boolean
# - default: false
enable: true
# every: delay between polling for new performance statistics
# - default: 5s
# - minimum duration 5s
every: 5s
# startPort: sets the starting port number for the automatic ${PORT} macro.
# - optional, default: 5800
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
@@ -187,8 +199,7 @@ globalTTL: 0
macros:
# Example of a multi-line macro
"latest-llama": >
/path/to/llama-server/llama-server-ec9e0301
--port ${PORT}
/path/to/llama-server/llama-server-ec9e0301 --port ${PORT}
"default_ctx": 4096
@@ -348,7 +359,8 @@ models:
# the ${temp} macro will remain a float
temperature: ${temp}
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}"
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp},
context=${default_ctx}"
a_list:
- 1