Changes and fixes before the release (docs/small tweaks) (#750)
- update README.md with new docker instructions - update docs/configuration.md - update .github/workflows to have pinned action versions - gofmt events package - fix small bugs in CI scripts - reduce config options for internal/perf/monitor and config. A ring buffer is used to keep 1hr of entries at max 5s granularity. For long term stats use prometheus monitoring on /metrics Fixes #744
This commit is contained in:
+9
-19
@@ -58,25 +58,15 @@ captureBuffer: 15
|
||||
# performance: configuration for system monitoring statistics
|
||||
# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc.
|
||||
performance:
|
||||
# enabled: boolean
|
||||
# - default: true
|
||||
enable: true
|
||||
# disabled: boolean
|
||||
# - default: false
|
||||
disabled: false
|
||||
|
||||
# every: delay between polling for new performance statistics
|
||||
# - default: 15s
|
||||
# - minimum duration 1s
|
||||
# - note: setting this very low will use up more RAM as stats are kept in memory.
|
||||
# - default: 5s
|
||||
# - minimum duration 5s
|
||||
every: 15s
|
||||
|
||||
# maxAge: maximum age of a performance statistics before it is eligible for garbage collection
|
||||
# - default: 1h
|
||||
maxAge: 12h
|
||||
|
||||
# gc: garbage collection frequency in seconds
|
||||
# - how many seconds the garbage collector runs to clear old stats
|
||||
# - default 5m
|
||||
gc: 5m
|
||||
|
||||
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
||||
# - optional, default: 5800
|
||||
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
||||
@@ -118,8 +108,7 @@ globalTTL: 0
|
||||
macros:
|
||||
# Example of a multi-line macro
|
||||
"latest-llama": >
|
||||
/path/to/llama-server/llama-server-ec9e0301
|
||||
--port ${PORT}
|
||||
/path/to/llama-server/llama-server-ec9e0301 --port ${PORT}
|
||||
|
||||
"default_ctx": 4096
|
||||
|
||||
@@ -279,7 +268,8 @@ models:
|
||||
|
||||
# the ${temp} macro will remain a float
|
||||
temperature: ${temp}
|
||||
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}"
|
||||
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp},
|
||||
context=${default_ctx}"
|
||||
|
||||
a_list:
|
||||
- 1
|
||||
@@ -291,7 +281,7 @@ models:
|
||||
b: 2
|
||||
# objects can contain complex types with macro substitution
|
||||
# becomes: c: [0.7, false, "model: llama"]
|
||||
c: ["${temp}", false, "model: ${MODEL_ID}"]
|
||||
c: [ "${temp}", false, "model: ${MODEL_ID}" ]
|
||||
|
||||
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
||||
# - optional, default: 0
|
||||
|
||||
Reference in New Issue
Block a user