Changes and fixes before the release (docs/small tweaks) (#750)

- update README.md with new docker instructions - update docs/configuration.md - update .github/workflows to have pinned action versions - gofmt events package - fix small bugs in CI scripts - reduce config options for internal/perf/monitor and config. A ring buffer is used to keep 1hr of entries at max 5s granularity. For long term stats use prometheus monitoring on /metrics Fixes #744
2026-05-13 21:18:19 -07:00
parent 3e3646f9f9
commit a4b91e08cf
23 changed files with 499 additions and 569 deletions
@@ -58,25 +58,15 @@ captureBuffer: 15
 # performance: configuration for system monitoring statistics
 # - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc.
 performance:
-  # enabled: boolean
-  # - default: true
-  enable: true
+  # disabled: boolean
+  # - default: false
+  disabled: false

  # every: delay between polling for new performance statistics
-  # - default: 15s
-  # - minimum duration 1s
-  # - note: setting this very low will use up more RAM as stats are kept in memory.
+  # - default: 5s
+  # - minimum duration 5s
  every: 15s

-  # maxAge: maximum age of a performance statistics before it is eligible for garbage collection
-  # - default: 1h
-  maxAge: 12h
-
-  # gc: garbage collection frequency in seconds
-  # - how many seconds the garbage collector runs to clear old stats
-  # - default 5m
-  gc: 5m
-
 # startPort: sets the starting port number for the automatic ${PORT} macro.
 # - optional, default: 5800
 # - the ${PORT} macro can be used in model.cmd and model.proxy settings
@@ -118,8 +108,7 @@ globalTTL: 0
 macros:
  # Example of a multi-line macro
  "latest-llama": >
-    /path/to/llama-server/llama-server-ec9e0301
-    --port ${PORT}
+    /path/to/llama-server/llama-server-ec9e0301 --port ${PORT}

  "default_ctx": 4096

@@ -279,7 +268,8 @@ models:

      # the ${temp} macro will remain a float
      temperature: ${temp}
-      note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}"
+      note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp},
+        context=${default_ctx}"

      a_list:
        - 1
@@ -291,7 +281,7 @@ models:
        b: 2
        # objects can contain complex types with macro substitution
        # becomes: c: [0.7, false, "model: llama"]
-        c: ["${temp}", false, "model: ${MODEL_ID}"]
+        c: [ "${temp}", false, "model: ${MODEL_ID}" ]

    # concurrencyLimit: overrides the allowed number of active parallel requests to a model
    # - optional, default: 0