proxy,ui: add performance monitoring with Prometheus metrics (#743)
Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
This commit is contained in:
@@ -142,6 +142,37 @@
|
||||
"default": 5,
|
||||
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
|
||||
},
|
||||
"performance": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enable": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable or disable system performance monitoring."
|
||||
},
|
||||
"every": {
|
||||
"type": "string",
|
||||
"pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$",
|
||||
"default": "15s",
|
||||
"description": "Delay between polling for new performance statistics. Minimum duration is 1s. Lower values use more RAM as stats are kept in memory."
|
||||
},
|
||||
"maxAge": {
|
||||
"type": "string",
|
||||
"pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$",
|
||||
"default": "1h",
|
||||
"description": "Maximum age of performance statistics before they are eligible for garbage collection."
|
||||
},
|
||||
"gc": {
|
||||
"type": "string",
|
||||
"pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$",
|
||||
"default": "5m",
|
||||
"description": "Garbage collection frequency for clearing old performance statistics."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"default": {},
|
||||
"description": "Configuration for system monitoring statistics. Timing values are duration strings like 1s, 1h30m, 90m, 2h10s."
|
||||
},
|
||||
"startPort": {
|
||||
"type": "integer",
|
||||
"default": 5800,
|
||||
|
||||
Reference in New Issue
Block a user