diff --git a/Makefile b/Makefile index 243d16c3..6a97cf8a 100644 --- a/Makefile +++ b/Makefile @@ -25,15 +25,15 @@ proxy/ui_dist/placeholder.txt: # use cached test results while developing test-dev: proxy/ui_dist/placeholder.txt - go test -short ./proxy/... - staticcheck ./proxy/... || true + go test -short ./proxy/... ./internal/... + staticcheck ./proxy/... ./internal/... || true test: proxy/ui_dist/placeholder.txt - go test -short -count=1 ./proxy/... + go test -short -count=1 ./proxy/... ./internal/... # for CI - full test (takes longer) test-all: proxy/ui_dist/placeholder.txt - go test -race -count=1 ./proxy/... + go test -race -count=1 ./proxy/... ./internal/... ui/node_modules: cd ui-svelte && npm install diff --git a/README.md b/README.md index 82ea5533..a68d3393 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - `GET /logs/stream/upstream` streams upstream process logs only. - `GET /logs/stream/{model_id}` streams logs for one model (including IDs with slashes, like `author/model`). - `/health` - just returns "OK" + - `/metrics` - system and GPU metrics for prometheus - ✅ API Key support - define keys to restrict access to API endpoints - ✅ Customizable - Run concurrent models with a custom DSL swap matrix ([#643](https://github.com/mostlygeek/llama-swap/issues/643)) diff --git a/cmd/monitor-test/main.go b/cmd/monitor-test/main.go new file mode 100644 index 00000000..6a5b6a29 --- /dev/null +++ b/cmd/monitor-test/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "strings" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/internal/perf" + "github.com/mostlygeek/llama-swap/proxy/config" +) + +func printSysStat(s perf.SysStat) { + cores := make([]string, len(s.CpuUtilPerCore)) + for i, v := range s.CpuUtilPerCore { + cores[i] = fmt.Sprintf("%.1f%%", v) + } + fmt.Printf("[SYS %s]\n", s.Timestamp.Format("15:04:05")) + fmt.Printf(" CPU: %s\n", strings.Join(cores, " ")) + fmt.Printf(" Mem: %d MB used / %d MB total (%d MB free)\n", s.MemUsedMB, s.MemTotalMB, s.MemFreeMB) + fmt.Printf(" Swap: %d MB used / %d MB total\n", s.SwapUsedMB, s.SwapTotalMB) + fmt.Printf(" Load: %.2f %.2f %.2f (1m 5m 15m)\n", s.LoadAvg1, s.LoadAvg5, s.LoadAvg15) +} + +func printGpuStats(gpus []perf.GpuStat) { + for _, g := range gpus { + fmt.Printf("[GPU %d %s]\n", g.ID, g.Name) + fmt.Printf(" Util: GPU %.1f%% Mem %.1f%%\n", g.GpuUtilPct, g.MemUtilPct) + fmt.Printf(" Mem: %d MB used / %d MB total\n", g.MemUsedMB, g.MemTotalMB) + fmt.Printf(" Temp: %d°C Fan: %.1f%% Power: %.1f W\n", g.TempC, g.FanSpeedPct, g.PowerDrawW) + } +} + +func main() { + stream := flag.Bool("stream", false, "stream stats") + interval := flag.Duration("t", time.Second, "polling interval (clamped to 1s–1h)") + flag.Parse() + + every := *interval + if every < time.Second { + every = time.Second + } else if every > time.Hour { + every = time.Hour + } + + l := logmon.New() + l.SetLogLevel(logmon.LevelDebug) + + s, err := perf.ReadSysStats() + if err != nil && err != perf.ErrNotImplemented { + fmt.Println("Sys Error:", err) + return + } + printSysStat(s) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + gpuCh, err := perf.GetGpuStats(ctx, every, l) + if err != nil && !errors.Is(err, perf.ErrNotImplemented) && !errors.Is(err, perf.ErrNoGpuTool) { + fmt.Println("GPU Init Error:", err) + return + } + + if gpuCh != nil { + select { + case g := <-gpuCh: + printGpuStats(g) + case <-ctx.Done(): + fmt.Println("GPU: timed out waiting for stats") + } + } + + if *stream { + m, _ := perf.New(config.PerformanceConfig{Enable: true, Every: every}, l) + m.Start() + defer m.Stop() + sysCh, gpuCh, unsub := m.Subscribe() + defer unsub() + for { + select { + case s := <-sysCh: + printSysStat(s) + case g := <-gpuCh: + printGpuStats(g) + } + } + } +} diff --git a/config-schema.json b/config-schema.json index 36161d9a..7039ace2 100644 --- a/config-schema.json +++ b/config-schema.json @@ -142,6 +142,37 @@ "default": 5, "description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures." }, + "performance": { + "type": "object", + "properties": { + "enable": { + "type": "boolean", + "default": true, + "description": "Enable or disable system performance monitoring." + }, + "every": { + "type": "string", + "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", + "default": "15s", + "description": "Delay between polling for new performance statistics. Minimum duration is 1s. Lower values use more RAM as stats are kept in memory." + }, + "maxAge": { + "type": "string", + "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", + "default": "1h", + "description": "Maximum age of performance statistics before they are eligible for garbage collection." + }, + "gc": { + "type": "string", + "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", + "default": "5m", + "description": "Garbage collection frequency for clearing old performance statistics." + } + }, + "additionalProperties": false, + "default": {}, + "description": "Configuration for system monitoring statistics. Timing values are duration strings like 1s, 1h30m, 90m, 2h10s." + }, "startPort": { "type": "integer", "default": 5800, diff --git a/config.example.yaml b/config.example.yaml index 5569dd33..060f8d23 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -55,6 +55,28 @@ metricsMaxInMemory: 1000 # - set to 0 to disable captureBuffer: 15 +# performance: configuration for system monitoring statistics +# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc. +performance: + # enabled: boolean + # - default: true + enable: true + + # every: delay between polling for new performance statistics + # - default: 15s + # - minimum duration 1s + # - note: setting this very low will use up more RAM as stats are kept in memory. + every: 15s + + # maxAge: maximum age of a performance statistics before it is eligible for garbage collection + # - default: 1h + maxAge: 12h + + # gc: garbage collection frequency in seconds + # - how many seconds the garbage collector runs to clear old stats + # - default 5m + gc: 5m + # startPort: sets the starting port number for the automatic ${PORT} macro. # - optional, default: 5800 # - the ${PORT} macro can be used in model.cmd and model.proxy settings diff --git a/docs/grafana/example-dashboard.json b/docs/grafana/example-dashboard.json new file mode 100644 index 00000000..3351b9dd --- /dev/null +++ b/docs/grafana/example-dashboard.json @@ -0,0 +1,1346 @@ +{ + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "builtIn": true, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "query": { + "datasource": { + "name": "-- Grafana --" + }, + "group": "grafana", + "kind": "DataQuery", + "spec": {}, + "version": "v0" + } + } + } + ], + "cursorSync": "Off", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_cpu_util_percent{instance=\"$instance\"}", + "legendFormat": "core {{core}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 1, + "links": [], + "title": "CPU Utilization", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-2": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_load_average{instance=\"$instance\"}", + "legendFormat": "{{interval}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 2, + "links": [], + "title": "Load Average", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-3": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_memory_used_bytes{instance=\"$instance\"}", + "legendFormat": "RAM Used", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_memory_free_bytes{instance=\"$instance\"}", + "legendFormat": "RAM Free", + "range": true + }, + "version": "v0" + }, + "refId": "B" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_swap_used_bytes{instance=\"$instance\"}", + "legendFormat": "Swap Used", + "range": true + }, + "version": "v0" + }, + "refId": "C" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_memory_total_bytes{instance=\"$instance\"}", + "legendFormat": "RAM Total", + "range": true + }, + "version": "v0" + }, + "refId": "D" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 3, + "links": [], + "title": "RAM + Swap", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-4": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "rate(llamaswap_network_bytes_total{instance=\"$instance\",direction=\"recv\"}[1m])", + "legendFormat": "{{interface}} recv", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "rate(llamaswap_network_bytes_total{instance=\"$instance\",direction=\"sent\"}[1m])", + "legendFormat": "{{interface}} sent", + "range": true + }, + "version": "v0" + }, + "refId": "B" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 4, + "links": [], + "title": "Network Bandwidth", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-5": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_gpu_util_percent{instance=\"$instance\"}", + "legendFormat": "{{name}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 5, + "links": [], + "title": "GPU Utilization", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-6": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_gpu_memory_util_percent{instance=\"$instance\"}", + "legendFormat": "{{name}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 6, + "links": [], + "title": "GPU Memory Utilization", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-7": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_gpu_temperature_celsius{instance=\"$instance\"}", + "legendFormat": "{{name}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 7, + "links": [], + "title": "GPU Temperature", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-8": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_gpu_power_draw_watts{instance=\"$instance\"}", + "legendFormat": "{{name}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 8, + "links": [], + "title": "GPU Power Draw", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + }, + "panel-9": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "llamaswap_gpu_fan_speed_percent{instance=\"$instance\"}", + "legendFormat": "{{name}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "", + "id": 9, + "links": [], + "title": "GPU Fan Speed", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + } + }, + "version": "13.0.1" + } + } + } + }, + "layout": { + "kind": "AutoGridLayout", + "spec": { + "columnWidthMode": "standard", + "items": [ + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-1" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-2" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-3" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-4" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-5" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-6" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-7" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-8" + } + } + }, + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-9" + } + } + } + ], + "maxColumnCount": 3, + "rowHeightMode": "standard" + } + }, + "links": [], + "liveNow": false, + "preferences": { + "layout": { + "kind": "AutoGridLayout", + "spec": { + "columnWidthMode": "standard", + "items": [], + "maxColumnCount": 3, + "rowHeightMode": "standard" + } + } + }, + "preload": false, + "tags": [], + "timeSettings": { + "autoRefresh": "10s", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "fiscalYearStartMonth": 0, + "from": "now-15m", + "hideTimepicker": false, + "timezone": "browser", + "to": "now" + }, + "title": "llama-swap resource monitor", + "variables": [ + { + "kind": "QueryVariable", + "spec": { + "allowCustomValue": true, + "current": { + "text": "", + "value": "" + }, + "definition": "label_values(instance)", + "hide": "dontHide", + "includeAll": false, + "multi": false, + "name": "instance", + "options": [], + "query": { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "qryType": 1, + "query": "label_values(instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "version": "v0" + }, + "refresh": "onDashboardLoad", + "regex": "", + "regexApplyTo": "value", + "skipUrlSync": false, + "sort": "disabled" + } + } + ] +} diff --git a/go.mod b/go.mod index 314ea4be..083cc5b0 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,8 @@ require ( github.com/fxamacker/cbor/v2 v2.9.1 github.com/gin-gonic/gin v1.10.0 github.com/klauspost/compress v1.18.5 - github.com/stretchr/testify v1.9.0 + github.com/shirou/gopsutil/v4 v4.26.4 + github.com/stretchr/testify v1.11.1 github.com/tidwall/gjson v1.18.0 github.com/tidwall/sjson v1.2.5 gopkg.in/yaml.v3 v3.0.1 @@ -19,8 +20,10 @@ require ( github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/ebitengine/purego v0.10.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.20.0 // indirect @@ -28,20 +31,25 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/leodido/go-urn v1.4.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect + github.com/tklauser/go-sysconf v0.3.16 // indirect + github.com/tklauser/numcpus v0.11.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect golang.org/x/arch v0.8.0 // indirect golang.org/x/crypto v0.45.0 // indirect golang.org/x/net v0.47.0 // indirect - golang.org/x/sys v0.38.0 // indirect + golang.org/x/sys v0.41.0 // indirect golang.org/x/text v0.31.0 // indirect google.golang.org/protobuf v1.34.1 // indirect ) diff --git a/go.sum b/go.sum index 18d4093e..131f9fb4 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/fxamacker/cbor/v2 v2.9.1 h1:2rWm8B193Ll4VdjsJY28jxs70IdDsHRWgQYAI80+rMQ= github.com/fxamacker/cbor/v2 v2.9.1/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= @@ -19,6 +21,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= @@ -29,8 +33,9 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -42,6 +47,8 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -53,6 +60,10 @@ github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6 github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/shirou/gopsutil/v4 v4.26.4 h1:B4SXVbcwTyrocPHEmWBC4uCYr4Xcu3MK1TXqbprAOWY= +github.com/shirou/gopsutil/v4 v4.26.4/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -63,8 +74,9 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -75,12 +87,18 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= +github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= +github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= +github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= @@ -88,13 +106,14 @@ golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= diff --git a/proxy/logMonitor.go b/internal/logmon/logging.go similarity index 55% rename from proxy/logMonitor.go rename to internal/logmon/logging.go index e440a6f1..55331873 100644 --- a/proxy/logMonitor.go +++ b/internal/logmon/logging.go @@ -1,4 +1,4 @@ -package proxy +package logmon import ( "context" @@ -11,12 +11,22 @@ import ( "github.com/mostlygeek/llama-swap/event" ) +const DataEventID = 0x04 + +type DataEvent struct { + Data []byte +} + +func (e DataEvent) Type() uint32 { + return DataEventID +} + // circularBuffer is a fixed-size circular byte buffer that overwrites // oldest data when full. It provides O(1) writes and O(n) reads. type circularBuffer struct { - data []byte // pre-allocated capacity - head int // next write position - size int // current number of bytes stored (0 to cap) + data []byte + head int + size int } func newCircularBuffer(capacity int) *circularBuffer { @@ -27,8 +37,6 @@ func newCircularBuffer(capacity int) *circularBuffer { } } -// Write appends bytes to the buffer, overwriting oldest data when full. -// Data is copied into the internal buffer (not stored by reference). func (cb *circularBuffer) Write(p []byte) { if len(p) == 0 { return @@ -36,7 +44,6 @@ func (cb *circularBuffer) Write(p []byte) { cap := len(cb.data) - // If input is larger than capacity, only keep the last cap bytes if len(p) >= cap { copy(cb.data, p[len(p)-cap:]) cb.head = 0 @@ -44,28 +51,22 @@ func (cb *circularBuffer) Write(p []byte) { return } - // Calculate how much space is available from head to end of buffer firstPart := cap - cb.head if firstPart >= len(p) { - // All data fits without wrapping copy(cb.data[cb.head:], p) cb.head = (cb.head + len(p)) % cap } else { - // Data wraps around copy(cb.data[cb.head:], p[:firstPart]) copy(cb.data[:len(p)-firstPart], p[firstPart:]) cb.head = len(p) - firstPart } - // Update size cb.size += len(p) if cb.size > cap { cb.size = cap } } -// GetHistory returns all buffered data in correct order (oldest to newest). -// Returns a new slice (copy), not a view into internal buffer. func (cb *circularBuffer) GetHistory() []byte { if cb.size == 0 { return nil @@ -74,14 +75,11 @@ func (cb *circularBuffer) GetHistory() []byte { result := make([]byte, cb.size) cap := len(cb.data) - // Calculate start position (oldest data) start := (cb.head - cb.size + cap) % cap if start+cb.size <= cap { - // Data is contiguous, single copy copy(result, cb.data[start:start+cb.size]) } else { - // Data wraps around, two copies firstPart := cap - start copy(result[:firstPart], cb.data[start:]) copy(result[firstPart:], cb.data[:cb.size-firstPart]) @@ -90,42 +88,38 @@ func (cb *circularBuffer) GetHistory() []byte { return result } -type LogLevel int +type Level int const ( - LevelDebug LogLevel = iota + LevelDebug Level = iota LevelInfo LevelWarn LevelError - LogBufferSize = 100 * 1024 + BufferSize = 100 * 1024 ) -type LogMonitor struct { +type Monitor struct { eventbus *event.Dispatcher mu sync.RWMutex buffer *circularBuffer bufferMu sync.RWMutex - // typically this can be os.Stdout stdout io.Writer - // logging levels - level LogLevel - prefix string - - // timestamps + level Level + prefix string timeFormat string } -func NewLogMonitor() *LogMonitor { - return NewLogMonitorWriter(os.Stdout) +func New() *Monitor { + return NewWriter(os.Stdout) } -func NewLogMonitorWriter(stdout io.Writer) *LogMonitor { - return &LogMonitor{ +func NewWriter(stdout io.Writer) *Monitor { + return &Monitor{ eventbus: event.NewDispatcherConfig(1000), - buffer: nil, // lazy initialized on first Write + buffer: nil, stdout: stdout, level: LevelInfo, prefix: "", @@ -133,7 +127,7 @@ func NewLogMonitorWriter(stdout io.Writer) *LogMonitor { } } -func (w *LogMonitor) Write(p []byte) (n int, err error) { +func (w *Monitor) Write(p []byte) (n int, err error) { if len(p) == 0 { return 0, nil } @@ -145,19 +139,18 @@ func (w *LogMonitor) Write(p []byte) (n int, err error) { w.bufferMu.Lock() if w.buffer == nil { - w.buffer = newCircularBuffer(LogBufferSize) + w.buffer = newCircularBuffer(BufferSize) } w.buffer.Write(p) w.bufferMu.Unlock() - // Make a copy for broadcast to preserve immutability bufferCopy := make([]byte, len(p)) copy(bufferCopy, p) w.broadcast(bufferCopy) return n, nil } -func (w *LogMonitor) GetHistory() []byte { +func (w *Monitor) GetHistory() []byte { w.bufferMu.RLock() defer w.bufferMu.RUnlock() if w.buffer == nil { @@ -168,41 +161,41 @@ func (w *LogMonitor) GetHistory() []byte { // Clear releases the buffer memory, making it eligible for GC. // The buffer will be lazily re-allocated on the next Write. -func (w *LogMonitor) Clear() { +func (w *Monitor) Clear() { w.bufferMu.Lock() w.buffer = nil w.bufferMu.Unlock() } -func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc { - return event.Subscribe(w.eventbus, func(e LogDataEvent) { +func (w *Monitor) OnLogData(callback func(data []byte)) context.CancelFunc { + return event.Subscribe(w.eventbus, func(e DataEvent) { callback(e.Data) }) } -func (w *LogMonitor) broadcast(msg []byte) { - event.Publish(w.eventbus, LogDataEvent{Data: msg}) +func (w *Monitor) broadcast(msg []byte) { + event.Publish(w.eventbus, DataEvent{Data: msg}) } -func (w *LogMonitor) SetPrefix(prefix string) { +func (w *Monitor) SetPrefix(prefix string) { w.mu.Lock() defer w.mu.Unlock() w.prefix = prefix } -func (w *LogMonitor) SetLogLevel(level LogLevel) { +func (w *Monitor) SetLogLevel(level Level) { w.mu.Lock() defer w.mu.Unlock() w.level = level } -func (w *LogMonitor) SetLogTimeFormat(timeFormat string) { +func (w *Monitor) SetLogTimeFormat(timeFormat string) { w.mu.Lock() defer w.mu.Unlock() w.timeFormat = timeFormat } -func (w *LogMonitor) formatMessage(level string, msg string) []byte { +func (w *Monitor) formatMessage(level string, msg string) []byte { prefix := "" if w.prefix != "" { prefix = fmt.Sprintf("[%s] ", w.prefix) @@ -211,49 +204,38 @@ func (w *LogMonitor) formatMessage(level string, msg string) []byte { if w.timeFormat != "" { timestamp = fmt.Sprintf("%s ", time.Now().Format(w.timeFormat)) } - return []byte(fmt.Sprintf("%s%s[%s] %s\n", timestamp, prefix, level, msg)) + return fmt.Appendf(nil, "%s%s[%s] %s\n", timestamp, prefix, level, msg) } -func (w *LogMonitor) log(level LogLevel, msg string) { +func (w *Monitor) log(level Level, msg string) { if level < w.level { return } w.Write(w.formatMessage(level.String(), msg)) } -func (w *LogMonitor) Debug(msg string) { - w.log(LevelDebug, msg) -} +func (w *Monitor) Debug(msg string) { w.log(LevelDebug, msg) } +func (w *Monitor) Info(msg string) { w.log(LevelInfo, msg) } +func (w *Monitor) Warn(msg string) { w.log(LevelWarn, msg) } +func (w *Monitor) Error(msg string) { w.log(LevelError, msg) } -func (w *LogMonitor) Info(msg string) { - w.log(LevelInfo, msg) -} - -func (w *LogMonitor) Warn(msg string) { - w.log(LevelWarn, msg) -} - -func (w *LogMonitor) Error(msg string) { - w.log(LevelError, msg) -} - -func (w *LogMonitor) Debugf(format string, args ...interface{}) { +func (w *Monitor) Debugf(format string, args ...any) { w.log(LevelDebug, fmt.Sprintf(format, args...)) } -func (w *LogMonitor) Infof(format string, args ...interface{}) { +func (w *Monitor) Infof(format string, args ...any) { w.log(LevelInfo, fmt.Sprintf(format, args...)) } -func (w *LogMonitor) Warnf(format string, args ...interface{}) { +func (w *Monitor) Warnf(format string, args ...any) { w.log(LevelWarn, fmt.Sprintf(format, args...)) } -func (w *LogMonitor) Errorf(format string, args ...interface{}) { +func (w *Monitor) Errorf(format string, args ...any) { w.log(LevelError, fmt.Sprintf(format, args...)) } -func (l LogLevel) String() string { +func (l Level) String() string { switch l { case LevelDebug: return "DEBUG" diff --git a/proxy/logMonitor_test.go b/internal/logmon/logging_test.go similarity index 58% rename from proxy/logMonitor_test.go rename to internal/logmon/logging_test.go index aff0d3e3..64f1b1ed 100644 --- a/proxy/logMonitor_test.go +++ b/internal/logmon/logging_test.go @@ -1,4 +1,4 @@ -package proxy +package logmon import ( "bytes" @@ -10,9 +10,8 @@ import ( ) func TestLogMonitor(t *testing.T) { - logMonitor := NewLogMonitorWriter(io.Discard) + logMonitor := NewWriter(io.Discard) - // A WaitGroup is used to wait for all the expected writes to complete var wg sync.WaitGroup client1Messages := make([]byte, 0) @@ -34,10 +33,8 @@ func TestLogMonitor(t *testing.T) { logMonitor.Write([]byte("2")) logMonitor.Write([]byte("3")) - // wait for all writes to complete wg.Wait() - // Check the buffer expectedHistory := "123" history := string(logMonitor.GetHistory()) @@ -57,14 +54,11 @@ func TestLogMonitor(t *testing.T) { } func TestWrite_ImmutableBuffer(t *testing.T) { - // Create a new LogMonitor instance - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) - // Prepare a message to write msg := []byte("Hello, World!") lenmsg := len(msg) - // Write the message to the LogMonitor n, err := lm.Write(msg) if err != nil { t.Fatalf("Write failed: %v", err) @@ -74,13 +68,10 @@ func TestWrite_ImmutableBuffer(t *testing.T) { t.Errorf("Expected %d bytes written but got %d", lenmsg, n) } - // Change the original message - msg[0] = 'B' // This should not affect the buffer + msg[0] = 'B' - // Get the history from the LogMonitor history := lm.GetHistory() - // Check that the history contains the original message, not the modified one expected := []byte("Hello, World!") if !bytes.Equal(history, expected) { t.Errorf("Expected history to be %q, got %q", expected, history) @@ -88,16 +79,12 @@ func TestWrite_ImmutableBuffer(t *testing.T) { } func TestWrite_LogTimeFormat(t *testing.T) { - // Create a new LogMonitor instance - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) - // Enable timestamps lm.timeFormat = time.RFC3339 - // Write the message to the LogMonitor lm.Info("Hello, World!") - // Get the history from the LogMonitor history := lm.GetHistory() timestamp := "" @@ -115,48 +102,40 @@ func TestWrite_LogTimeFormat(t *testing.T) { } func TestCircularBuffer_WrapAround(t *testing.T) { - // Create a small buffer to test wrap-around cb := newCircularBuffer(10) - // Write "hello" (5 bytes) cb.Write([]byte("hello")) if got := string(cb.GetHistory()); got != "hello" { t.Errorf("Expected 'hello', got %q", got) } - // Write "world" (5 bytes) - buffer now full cb.Write([]byte("world")) if got := string(cb.GetHistory()); got != "helloworld" { t.Errorf("Expected 'helloworld', got %q", got) } - // Write "12345" (5 bytes) - should overwrite "hello" cb.Write([]byte("12345")) if got := string(cb.GetHistory()); got != "world12345" { t.Errorf("Expected 'world12345', got %q", got) } - // Write data larger than buffer capacity - cb.Write([]byte("abcdefghijklmnop")) // 16 bytes, only last 10 kept + cb.Write([]byte("abcdefghijklmnop")) if got := string(cb.GetHistory()); got != "ghijklmnop" { t.Errorf("Expected 'ghijklmnop', got %q", got) } } func TestCircularBuffer_BoundaryConditions(t *testing.T) { - // Test empty buffer cb := newCircularBuffer(10) if got := cb.GetHistory(); got != nil { t.Errorf("Expected nil for empty buffer, got %q", got) } - // Test exact capacity cb.Write([]byte("1234567890")) if got := string(cb.GetHistory()); got != "1234567890" { t.Errorf("Expected '1234567890', got %q", got) } - // Test write exactly at capacity boundary cb = newCircularBuffer(10) cb.Write([]byte("12345")) cb.Write([]byte("67890")) @@ -166,19 +145,16 @@ func TestCircularBuffer_BoundaryConditions(t *testing.T) { } func TestLogMonitor_LazyInit(t *testing.T) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) - // Buffer should be nil before any writes if lm.buffer != nil { t.Error("Expected buffer to be nil before first write") } - // GetHistory should return nil when buffer is nil if got := lm.GetHistory(); got != nil { t.Errorf("Expected nil history before first write, got %q", got) } - // Write should lazily initialize the buffer lm.Write([]byte("test")) if lm.buffer == nil { @@ -191,15 +167,13 @@ func TestLogMonitor_LazyInit(t *testing.T) { } func TestLogMonitor_Clear(t *testing.T) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) - // Write some data lm.Write([]byte("hello")) if got := string(lm.GetHistory()); got != "hello" { t.Errorf("Expected 'hello', got %q", got) } - // Clear should release the buffer lm.Clear() if lm.buffer != nil { @@ -212,9 +186,8 @@ func TestLogMonitor_Clear(t *testing.T) { } func TestLogMonitor_ClearAndReuse(t *testing.T) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) - // Write, clear, then write again lm.Write([]byte("first")) lm.Clear() lm.Write([]byte("second")) @@ -225,13 +198,12 @@ func TestLogMonitor_ClearAndReuse(t *testing.T) { } func BenchmarkLogMonitorWrite(b *testing.B) { - // Test data of varying sizes smallMsg := []byte("small message\n") mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n") largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n") b.Run("SmallWrite", func(b *testing.B) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) b.ResetTimer() for i := 0; i < b.N; i++ { lm.Write(smallMsg) @@ -239,7 +211,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) { }) b.Run("MediumWrite", func(b *testing.B) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) b.ResetTimer() for i := 0; i < b.N; i++ { lm.Write(mediumMsg) @@ -247,7 +219,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) { }) b.Run("LargeWrite", func(b *testing.B) { - lm := NewLogMonitorWriter(io.Discard) + lm := NewWriter(io.Discard) b.ResetTimer() for i := 0; i < b.N; i++ { lm.Write(largeMsg) @@ -255,8 +227,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) { }) b.Run("WithSubscribers", func(b *testing.B) { - lm := NewLogMonitorWriter(io.Discard) - // Add some subscribers + lm := NewWriter(io.Discard) for i := 0; i < 5; i++ { lm.OnLogData(func(data []byte) {}) } @@ -267,8 +238,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) { }) b.Run("GetHistory", func(b *testing.B) { - lm := NewLogMonitorWriter(io.Discard) - // Pre-populate with data + lm := NewWriter(io.Discard) for i := 0; i < 1000; i++ { lm.Write(mediumMsg) } @@ -278,39 +248,3 @@ func BenchmarkLogMonitorWrite(b *testing.B) { } }) } - -/* -Benchmark Results - MBP M1 Pro - -Before (ring.Ring): -| Benchmark | ns/op | bytes/op | allocs/op | -|---------------------------------|------------|----------|-----------| -| SmallWrite (14B) | 43 ns | 40 B | 2 | -| MediumWrite (241B) | 76 ns | 264 B | 2 | -| LargeWrite (4KB) | 504 ns | 4,120 B | 2 | -| WithSubscribers (5 subs) | 355 ns | 264 B | 2 | -| GetHistory (after 1000 writes) | 145,000 ns | 1.2 MB | 22 | - -After (circularBuffer 10KB): -| Benchmark | ns/op | bytes/op | allocs/op | -|---------------------------------|------------|----------|-----------| -| SmallWrite (14B) | 26 ns | 16 B | 1 | -| MediumWrite (241B) | 67 ns | 240 B | 1 | -| LargeWrite (4KB) | 774 ns | 4,096 B | 1 | -| WithSubscribers (5 subs) | 325 ns | 240 B | 1 | -| GetHistory (after 1000 writes) | 1,042 ns | 10,240 B | 1 | - -After (circularBuffer 100KB): -| Benchmark | ns/op | bytes/op | allocs/op | -|---------------------------------|------------|-----------|-----------| -| SmallWrite (14B) | 26 ns | 16 B | 1 | -| MediumWrite (241B) | 66 ns | 240 B | 1 | -| LargeWrite (4KB) | 753 ns | 4,096 B | 1 | -| WithSubscribers (5 subs) | 309 ns | 240 B | 1 | -| GetHistory (after 1000 writes) | 7,788 ns | 106,496 B | 1 | - -Summary: -- GetHistory: 139x faster (10KB), 18x faster (100KB) -- Allocations: reduced from 2 to 1 across all operations -- Small/medium writes: ~1.1-1.6x faster -*/ diff --git a/internal/perf/monitor.go b/internal/perf/monitor.go new file mode 100644 index 00000000..1cdc040e --- /dev/null +++ b/internal/perf/monitor.go @@ -0,0 +1,210 @@ +package perf + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/internal/ring" + "github.com/mostlygeek/llama-swap/proxy/config" +) + +var ( + ErrNotImplemented = errors.New("Not Implemented") + ErrNoGpuTool = errors.New("no GPU monitoring tool available") +) + +type Monitor struct { + mutex sync.RWMutex + log *logmon.Monitor + conf config.PerformanceConfig + sysRing ring.Buffer[SysStat] + gpuRing ring.Buffer[[]GpuStat] + + stopCtx context.Context + stopCancel context.CancelFunc + + sysListeners map[chan SysStat]struct{} + gpuListeners map[chan []GpuStat]struct{} +} + +func ringCapacity(c config.PerformanceConfig) int { + n := int(c.MaxAge / c.Every) + if n < 1 { + n = 1 + } + return n +} + +func New(c config.PerformanceConfig, logger *logmon.Monitor) (*Monitor, error) { + + if c.Every < 100*time.Millisecond { + c.Every = 100 * time.Millisecond + } + if c.GC < 1*time.Second { + c.GC = 1 * time.Second + } + if c.MaxAge < 1*time.Minute { + c.MaxAge = 1 * time.Minute + } + + if logger == nil { + return nil, errors.New("logger is required") + } + + capacity := ringCapacity(c) + return &Monitor{ + conf: c, + log: logger, + sysRing: ring.NewBuffer[SysStat](capacity), + gpuRing: ring.NewBuffer[[]GpuStat](capacity), + sysListeners: make(map[chan SysStat]struct{}), + gpuListeners: make(map[chan []GpuStat]struct{}), + }, nil +} + +func (m *Monitor) Stop() { + m.mutex.Lock() + defer m.mutex.Unlock() + if m.stopCancel == nil { + return + } + m.stopCancel() + m.stopCancel = nil +} + +// UpdateConfig updates the monitor configuration and restarts if changed. +func (m *Monitor) UpdateConfig(newConf config.PerformanceConfig) { + m.mutex.RLock() + changed := m.conf != newConf + m.mutex.RUnlock() + + if !changed { + return + } + + m.Stop() + m.mutex.Lock() + m.conf = newConf + capacity := ringCapacity(newConf) + m.sysRing = ring.NewBuffer[SysStat](capacity) + m.gpuRing = ring.NewBuffer[[]GpuStat](capacity) + m.mutex.Unlock() + m.Start() +} + +// Subscribe returns channels to listen to system and GPU stats. +func (m *Monitor) Subscribe() (chan SysStat, chan []GpuStat, func()) { + m.mutex.Lock() + defer m.mutex.Unlock() + sysChan := make(chan SysStat, 1) + gpuChan := make(chan []GpuStat, 1) + + m.sysListeners[sysChan] = struct{}{} + m.gpuListeners[gpuChan] = struct{}{} + + unsub := func() { + m.mutex.Lock() + defer m.mutex.Unlock() + delete(m.sysListeners, sysChan) + delete(m.gpuListeners, gpuChan) + } + + return sysChan, gpuChan, unsub +} + +func (m *Monitor) Start() { + m.mutex.Lock() + defer m.mutex.Unlock() + if m.stopCancel != nil { + return + } + + m.stopCtx, m.stopCancel = context.WithCancel(context.Background()) + + go func() { + tick := time.NewTicker(m.conf.Every) + defer tick.Stop() + for { + select { + case <-m.stopCtx.Done(): + return + case <-tick.C: + s, err := ReadSysStats() + if err != nil { + if err != ErrNotImplemented { + m.log.Errorf("failed to read sys stats: %s", err.Error()) + } + continue + } + m.mutex.Lock() + m.sysRing.Push(s) + for l := range m.sysListeners { + select { + case l <- s: + default: + } + } + m.mutex.Unlock() + } + } + }() + + go func() { + gpuCh, err := getGpuStats(m.stopCtx, m.conf.Every, m.log) + if err != nil { + if errors.Is(err, ErrNotImplemented) || errors.Is(err, ErrNoGpuTool) { + m.log.Infof("GPU monitoring not available: %s", err.Error()) + } else { + m.log.Errorf("failed to initialize GPU monitoring: %s", err.Error()) + } + return + } + + for { + select { + case <-m.stopCtx.Done(): + return + case g, ok := <-gpuCh: + if !ok { + m.log.Errorf("failed reading from gpuCh - stopping read goroutine") + return + } + m.mutex.Lock() + m.gpuRing.Push(g) + for l := range m.gpuListeners { + select { + case l <- g: + default: + } + } + m.mutex.Unlock() + } + } + }() +} + +// Current returns a copy of the current log of system and GPU stats. +func (m *Monitor) Current() ([]SysStat, []GpuStat) { + m.mutex.RLock() + defer m.mutex.RUnlock() + + sysStats := m.sysRing.Slice() + + snapshots := m.gpuRing.Slice() + var gpuStats []GpuStat + for _, snapshot := range snapshots { + gpuStats = append(gpuStats, snapshot...) + } + return sysStats, gpuStats +} + +func ReadSysStats() (SysStat, error) { + return readSysStats() +} + +func GetGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + return getGpuStats(ctx, every, logger) +} diff --git a/internal/perf/monitor_darwin.go b/internal/perf/monitor_darwin.go new file mode 100644 index 00000000..ff6dee18 --- /dev/null +++ b/internal/perf/monitor_darwin.go @@ -0,0 +1,55 @@ +package perf + +import ( + "context" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/shirou/gopsutil/v4/cpu" + "github.com/shirou/gopsutil/v4/load" + "github.com/shirou/gopsutil/v4/mem" +) + +func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + return nil, ErrNotImplemented +} + +func readSysStats() (SysStat, error) { + cpuPcts, err := cpu.Percent(0, true) + if err != nil { + return SysStat{}, err + } + + vmStat, err := mem.VirtualMemory() + if err != nil { + return SysStat{}, err + } + + const toMB = 1024 * 1024 + + var swapTotalMB, swapUsedMB int + if swapStat, err := mem.SwapMemory(); err == nil { + swapTotalMB = int(swapStat.Total / toMB) + swapUsedMB = int(swapStat.Used / toMB) + } + + var loadAvg1, loadAvg5, loadAvg15 float64 + if loadStat, err := load.Avg(); err == nil { + loadAvg1 = loadStat.Load1 + loadAvg5 = loadStat.Load5 + loadAvg15 = loadStat.Load15 + } + + return SysStat{ + Timestamp: time.Now(), + CpuUtilPerCore: cpuPcts, + MemTotalMB: int(vmStat.Total / toMB), + MemUsedMB: int(vmStat.Used / toMB), + MemFreeMB: int(vmStat.Free / toMB), + SwapTotalMB: swapTotalMB, + SwapUsedMB: swapUsedMB, + LoadAvg1: loadAvg1, + LoadAvg5: loadAvg5, + LoadAvg15: loadAvg15, + }, nil +} diff --git a/internal/perf/monitor_test.go b/internal/perf/monitor_test.go new file mode 100644 index 00000000..a6665eee --- /dev/null +++ b/internal/perf/monitor_test.go @@ -0,0 +1,238 @@ +package perf + +import ( + "io" + "sync" + "testing" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/proxy/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestLogger() *logmon.Monitor { + return logmon.NewWriter(io.Discard) +} + +func TestNew_DefaultConfig(t *testing.T) { + logger := newTestLogger() + + m, err := New(config.PerformanceConfig{}, logger) + require.NoError(t, err) + require.NotNil(t, m) + + assert.Equal(t, 100*time.Millisecond, m.conf.Every) + assert.Equal(t, 1*time.Second, m.conf.GC) + assert.Equal(t, 1*time.Minute, m.conf.MaxAge) +} + +func TestNew_CustomConfig(t *testing.T) { + logger := newTestLogger() + + cfg := config.PerformanceConfig{ + Enable: true, + Every: 500 * time.Millisecond, + GC: 5 * time.Second, + MaxAge: 10 * time.Minute, + } + + m, err := New(cfg, logger) + require.NoError(t, err) + + assert.Equal(t, 500*time.Millisecond, m.conf.Every) + assert.Equal(t, 5*time.Second, m.conf.GC) + assert.Equal(t, 10*time.Minute, m.conf.MaxAge) +} + +func TestNew_NilLogger(t *testing.T) { + m, err := New(config.PerformanceConfig{}, nil) + assert.Error(t, err) + assert.Nil(t, m) +} + +func TestNew_BelowMinimumConfig(t *testing.T) { + logger := newTestLogger() + + cfg := config.PerformanceConfig{ + Enable: true, + Every: 1 * time.Millisecond, + GC: 100 * time.Millisecond, + MaxAge: 1 * time.Second, + } + + m, err := New(cfg, logger) + require.NoError(t, err) + + assert.Equal(t, 100*time.Millisecond, m.conf.Every) + assert.Equal(t, 1*time.Second, m.conf.GC) + assert.Equal(t, 1*time.Minute, m.conf.MaxAge) +} + +func TestSubscribe_ReturnsChannels(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + sysCh, gpuCh, unsub := m.Subscribe() + defer unsub() + + assert.NotNil(t, sysCh) + assert.NotNil(t, gpuCh) + assert.NotNil(t, unsub) +} + +func TestSubscribe_UnsubscribeRemovesListeners(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + _, _, unsub := m.Subscribe() + + m.mutex.RLock() + assert.Len(t, m.sysListeners, 1) + assert.Len(t, m.gpuListeners, 1) + m.mutex.RUnlock() + + unsub() + + m.mutex.RLock() + assert.Len(t, m.sysListeners, 0) + assert.Len(t, m.gpuListeners, 0) + m.mutex.RUnlock() +} + +func TestSubscribe_MultipleSubscriptions(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + sysCh1, gpuCh1, unsub1 := m.Subscribe() + sysCh2, gpuCh2, unsub2 := m.Subscribe() + defer unsub1() + defer unsub2() + + assert.NotEqual(t, sysCh1, sysCh2) + assert.NotEqual(t, gpuCh1, gpuCh2) + + m.mutex.RLock() + assert.Len(t, m.sysListeners, 2) + assert.Len(t, m.gpuListeners, 2) + m.mutex.RUnlock() +} + +func TestCurrent_EmptyByDefault(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + sysStats, gpuStats := m.Current() + assert.Empty(t, sysStats) + assert.Empty(t, gpuStats) +} + +func TestCurrent_ReturnsCopies(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + now := time.Now() + m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 1024}) + m.gpuRing.Push([]GpuStat{{Timestamp: now, ID: 0, Name: "gpu0"}}) + + sysStats, gpuStats := m.Current() + + assert.Len(t, sysStats, 1) + assert.Len(t, gpuStats, 1) + assert.Equal(t, 1024, sysStats[0].MemTotalMB) + assert.Equal(t, "gpu0", gpuStats[0].Name) + + // modifying the returned slice should not affect the original + sysStats[0].MemTotalMB = 999 + original, _ := m.Current() + assert.Equal(t, 1024, original[0].MemTotalMB) +} + +func TestStart_CollectsSysStats(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test") + } + + m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger()) + require.NoError(t, err) + + m.Start() + + time.Sleep(350 * time.Millisecond) + m.Stop() + + sysStats, _ := m.Current() + assert.NotEmpty(t, sysStats, "expected sys stats to be collected") +} + +func TestStart_StopStopsGoroutines(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test") + } + + m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger()) + require.NoError(t, err) + + m.Start() + if m.stopCancel == nil { + t.Error("stopCancel should not be nil after Start()") + } + + m.Stop() + if m.stopCancel != nil { + t.Error("stopCancel should be nil after Stop()") + } +} + +func TestStart_SubscriberReceivesStats(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test") + } + + m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger()) + require.NoError(t, err) + + sysCh, _, unsub := m.Subscribe() + defer unsub() + + m.Start() + defer m.Stop() + + select { + case s := <-sysCh: + assert.False(t, s.Timestamp.IsZero()) + assert.NotEmpty(t, s.CpuUtilPerCore) + case <-time.After(500 * time.Millisecond): + t.Fatal("timed out waiting for sys stats") + } +} + +func TestReadSysStats(t *testing.T) { + s, err := ReadSysStats() + require.NoError(t, err) + + assert.False(t, s.Timestamp.IsZero()) + assert.NotEmpty(t, s.CpuUtilPerCore) + assert.Greater(t, s.MemTotalMB, 0) +} + +func TestCurrent_ConcurrentAccess(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + m.sysRing.Push(SysStat{Timestamp: time.Now(), MemTotalMB: 1024}) + m.gpuRing.Push([]GpuStat{{Timestamp: time.Now(), ID: 0}}) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + sys, gpu := m.Current() + assert.Len(t, sys, 1) + assert.Len(t, gpu, 1) + }() + } + wg.Wait() +} diff --git a/internal/perf/monitor_unix.go b/internal/perf/monitor_unix.go new file mode 100644 index 00000000..fe178505 --- /dev/null +++ b/internal/perf/monitor_unix.go @@ -0,0 +1,461 @@ +//go:build unix && !darwin + +package perf + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "net" + "os" + "os/exec" + "os/user" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/shirou/gopsutil/v4/cpu" + "github.com/shirou/gopsutil/v4/load" + "github.com/shirou/gopsutil/v4/mem" + psnet "github.com/shirou/gopsutil/v4/net" +) + +func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + if ch, err := tryLACT(ctx, every, logger); err == nil { + logger.Info("using LACT for GPU monitoring") + return ch, nil + } else { + logger.Debugf("LACT: %s", err.Error()) + } + + if ch, err := tryNvidiaSmi(ctx, every, logger); err == nil { + logger.Info("using nvidia-smi for GPU monitoring") + return ch, nil + } else { + logger.Debugf("nvidia-smi: %s", err.Error()) + } + + if ch, err := trySysfs(ctx, every, logger); err == nil { + logger.Info("using sysfs for GPU monitoring") + return ch, nil + } else { + logger.Debugf("sysfs: %s", err.Error()) + } + + return nil, ErrNoGpuTool +} + +func tryLACT(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + socketPath := lactSocketPath() + if socketPath == "" { + return nil, ErrNoGpuTool + } + + conn, err := net.DialTimeout("unix", socketPath, 2*time.Second) + if err != nil { + return nil, fmt.Errorf("cannot connect to LACT socket: %w", err) + } + defer conn.Close() + + conn.SetDeadline(time.Now().Add(5 * time.Second)) + + devices, err := lactListDevices(conn) + if err != nil { + return nil, fmt.Errorf("LACT ListDevices failed: %w", err) + } + + if len(devices) == 0 { + return nil, fmt.Errorf("LACT returned no devices") + } + + ch := make(chan []GpuStat, 1) + + go func() { + defer close(ch) + ticker := time.NewTicker(every) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + socketPath := lactSocketPath() + if socketPath == "" { + continue + } + + conn, err := net.DialTimeout("unix", socketPath, 2*time.Second) + if err != nil { + continue + } + conn.SetDeadline(time.Now().Add(5 * time.Second)) + + devices, err := lactListDevices(conn) + if err != nil { + conn.Close() + continue + } + + stats := make([]GpuStat, 0, len(devices)) + for i, d := range devices { + stat, err := lactGetDeviceStats(conn, d.ID, d.Name, i) + if err != nil { + continue + } + stats = append(stats, stat) + } + conn.Close() + + if len(stats) > 0 { + select { + case ch <- stats: + default: + } + } + } + } + }() + + return ch, nil +} + +func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + if _, err := exec.LookPath("nvidia-smi"); err != nil { + return nil, ErrNoGpuTool + } + + sec := int(every.Seconds()) + if sec < 1 { + sec = 1 + } + + cmd := exec.CommandContext(ctx, "nvidia-smi", + "--query-gpu=index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw", + "--format=csv,noheader,nounits", + "-loop", fmt.Sprintf("%d", sec), + ) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("nvidia-smi stdout pipe failed: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("nvidia-smi start failed: %w", err) + } + + ch := make(chan []GpuStat, 1) + + go func() { + defer close(ch) + + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + + stat := parseNvidiaSmiLine(line) + if stat != nil { + select { + case ch <- []GpuStat{*stat}: + default: + } + } + } + cmd.Wait() + }() + + return ch, nil +} + +func parseNvidiaSmiLine(line string) *GpuStat { + fields := strings.Split(line, ", ") + if len(fields) < 9 { + return nil + } + + id, _ := strconv.Atoi(strings.TrimSpace(fields[0])) + name := strings.TrimSpace(fields[1]) + uuid := strings.TrimSpace(fields[2]) + tempC, _ := strconv.Atoi(strings.TrimSpace(fields[3])) + gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[4]), 64) + memUsed, _ := strconv.Atoi(strings.TrimSpace(fields[5])) + memTotal, _ := strconv.Atoi(strings.TrimSpace(fields[6])) + fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[7]), 64) + powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64) + + var memUtil float64 + if memTotal > 0 { + memUtil = float64(memUsed) / float64(memTotal) * 100 + } + + return &GpuStat{ + Timestamp: time.Now(), + ID: id, + Name: name, + UUID: uuid, + TempC: tempC, + GpuUtilPct: gpuUtil, + MemUtilPct: memUtil, + MemUsedMB: memUsed, + MemTotalMB: memTotal, + FanSpeedPct: fanSpeed, + PowerDrawW: powerDraw, + } +} + +func trySysfs(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + return nil, ErrNotImplemented +} + +func lactSocketPath() string { + if p := os.Getenv("LACT_DAEMON_SOCKET_PATH"); p != "" { + if _, err := os.Stat(p); err == nil { + return p + } + } + + rootPath := "/run/lactd.sock" + if _, err := os.Stat(rootPath); err == nil { + return rootPath + } + + u, err := user.Current() + if err != nil { + return "" + } + userPath := filepath.Join("/run/user", u.Uid, "lactd.sock") + if _, err := os.Stat(userPath); err == nil { + return userPath + } + + return "" +} + +type lactRequest struct { + Command string `json:"command"` + Args interface{} `json:"args,omitempty"` +} + +type lactResponse struct { + Status string `json:"status"` + Data json.RawMessage `json:"data"` +} + +type lactDeviceEntry struct { + ID string `json:"id"` + Name string `json:"name"` +} + +type lactDeviceStats struct { + Fan struct { + PwmCurrent *uint8 `json:"pwm_current"` + } `json:"fan"` + Vram struct { + Total *uint64 `json:"total"` + Used *uint64 `json:"used"` + } `json:"vram"` + Power struct { + Average *float64 `json:"average"` + Current *float64 `json:"current"` + } `json:"power"` + Temps map[string]lactTempEntry `json:"temps"` + BusyPercent *uint8 `json:"busy_percent"` +} + +type lactTempEntry struct { + Current *float64 `json:"current"` +} + +func lactSendRequest(conn net.Conn, req lactRequest) (json.RawMessage, error) { + data, err := json.Marshal(req) + if err != nil { + return nil, err + } + data = append(data, '\n') + + if _, err := conn.Write(data); err != nil { + return nil, err + } + + reader := bufio.NewReader(conn) + line, err := reader.ReadBytes('\n') + if err != nil { + return nil, err + } + + var resp lactResponse + if err := json.Unmarshal(line, &resp); err != nil { + return nil, err + } + + if resp.Status != "ok" { + return nil, fmt.Errorf("LACT error: %s", string(resp.Data)) + } + + return resp.Data, nil +} + +func lactListDevices(conn net.Conn) ([]lactDeviceEntry, error) { + data, err := lactSendRequest(conn, lactRequest{Command: "list_devices"}) + if err != nil { + return nil, err + } + + var devices []lactDeviceEntry + if err := json.Unmarshal(data, &devices); err != nil { + return nil, err + } + + return devices, nil +} + +func lactGetDeviceStats(conn net.Conn, id string, name string, index int) (GpuStat, error) { + data, err := lactSendRequest(conn, lactRequest{ + Command: "device_stats", + Args: struct { + ID string `json:"id"` + }{ID: id}, + }) + if err != nil { + return GpuStat{}, err + } + + var stats lactDeviceStats + if err := json.Unmarshal(data, &stats); err != nil { + return GpuStat{}, err + } + + var memUsedMB, memTotalMB int + if stats.Vram.Used != nil { + memUsedMB = int(*stats.Vram.Used / 1024 / 1024) + } + if stats.Vram.Total != nil { + memTotalMB = int(*stats.Vram.Total / 1024 / 1024) + } + + var memUtil float64 + if memTotalMB > 0 { + memUtil = float64(memUsedMB) / float64(memTotalMB) * 100 + } + + var gpuUtil float64 + if stats.BusyPercent != nil { + gpuUtil = float64(*stats.BusyPercent) + } + + var fanSpeed float64 + if stats.Fan.PwmCurrent != nil { + fanSpeed = float64(*stats.Fan.PwmCurrent) / 255.0 * 100.0 + } + + var powerDraw float64 + if stats.Power.Average != nil && *stats.Power.Average > 0 { + powerDraw = *stats.Power.Average + } else if stats.Power.Current != nil { + powerDraw = *stats.Power.Current + } + + var tempC int + if t, ok := stats.Temps["edge"]; ok && t.Current != nil { + tempC = int(*t.Current) + } else if t, ok := stats.Temps["junction"]; ok && t.Current != nil { + tempC = int(*t.Current) + } else { + for _, t := range stats.Temps { + if t.Current != nil { + tempC = int(*t.Current) + break + } + } + } + + var vramTempC int + // nvidia uses "VRAM", amd "mem" + for _, key := range []string{"mem", "VRAM"} { + if t, ok := stats.Temps[key]; ok && t.Current != nil && *t.Current > 0 { + vramTempC = int(*t.Current) + break + } + } + + return GpuStat{ + Timestamp: time.Now(), + ID: index, + Name: name, + UUID: id, + TempC: tempC, + VramTempC: vramTempC, + GpuUtilPct: gpuUtil, + MemUtilPct: memUtil, + MemUsedMB: memUsedMB, + MemTotalMB: memTotalMB, + FanSpeedPct: fanSpeed, + PowerDrawW: powerDraw, + }, nil +} + +func readSysfs() ([]GpuStat, error) { + return nil, ErrNotImplemented +} + +func readSysStats() (SysStat, error) { + cpuPcts, err := cpu.Percent(0, true) + if err != nil { + return SysStat{}, err + } + + vmStat, err := mem.VirtualMemory() + if err != nil { + return SysStat{}, err + } + + const toMB = 1024 * 1024 + + var swapTotalMB, swapUsedMB int + if swapStat, err := mem.SwapMemory(); err == nil { + swapTotalMB = int(swapStat.Total / toMB) + swapUsedMB = int(swapStat.Used / toMB) + } + + var loadAvg1, loadAvg5, loadAvg15 float64 + if loadStat, err := load.Avg(); err == nil { + loadAvg1 = loadStat.Load1 + loadAvg5 = loadStat.Load5 + loadAvg15 = loadStat.Load15 + } + + netIO := make([]NetIOStat, 0) + if ioCounters, err := psnet.IOCounters(true); err == nil { + for _, ioc := range ioCounters { + if ioc.Name == "lo" { + continue + } + netIO = append(netIO, NetIOStat{ + Name: ioc.Name, + BytesRecv: ioc.BytesRecv, + BytesSent: ioc.BytesSent, + }) + } + } + + return SysStat{ + Timestamp: time.Now(), + CpuUtilPerCore: cpuPcts, + MemTotalMB: int(vmStat.Total / toMB), + MemUsedMB: int(vmStat.Used / toMB), + MemFreeMB: int(vmStat.Free / toMB), + SwapTotalMB: swapTotalMB, + SwapUsedMB: swapUsedMB, + LoadAvg1: loadAvg1, + LoadAvg5: loadAvg5, + LoadAvg15: loadAvg15, + NetIO: netIO, + }, nil +} diff --git a/internal/perf/monitor_windows.go b/internal/perf/monitor_windows.go new file mode 100644 index 00000000..acf9f986 --- /dev/null +++ b/internal/perf/monitor_windows.go @@ -0,0 +1,49 @@ +package perf + +import ( + "context" + "time" + + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/shirou/gopsutil/v4/cpu" + "github.com/shirou/gopsutil/v4/mem" + "github.com/shirou/gopsutil/v4/net" +) + +func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + return nil, ErrNotImplemented +} + +func readSysStats() (SysStat, error) { + cpuPcts, err := cpu.Percent(0, true) + if err != nil { + return SysStat{}, err + } + + vmStat, err := mem.VirtualMemory() + if err != nil { + return SysStat{}, err + } + + const toMB = 1024 * 1024 + + netIO := make([]NetIOStat, 0) + if ioCounters, err := net.IOCounters(true); err == nil { + for _, ioc := range ioCounters { + netIO = append(netIO, NetIOStat{ + Name: ioc.Name, + BytesRecv: ioc.BytesRecv, + BytesSent: ioc.BytesSent, + }) + } + } + + return SysStat{ + Timestamp: time.Now(), + CpuUtilPerCore: cpuPcts, + MemTotalMB: int(vmStat.Total / toMB), + MemUsedMB: int(vmStat.Used / toMB), + MemFreeMB: int(vmStat.Free / toMB), + NetIO: netIO, + }, nil +} diff --git a/internal/perf/prometheus.go b/internal/perf/prometheus.go new file mode 100644 index 00000000..8d7232ff --- /dev/null +++ b/internal/perf/prometheus.go @@ -0,0 +1,129 @@ +package perf + +import ( + "fmt" + "net/http" + "sort" + "strings" +) + +const mbToBytes = int64(1024 * 1024) + +// MetricsHandler returns an http.HandlerFunc serving Prometheus text format metrics +// with the most recent system and GPU stats. +func (m *Monitor) MetricsHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + sysStats, gpuStats := m.Current() + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + + if len(sysStats) > 0 { + writeSysMetrics(w, sysStats[len(sysStats)-1]) + } + + if len(gpuStats) > 0 { + writeGpuMetrics(w, latestPerGPU(gpuStats)) + } + } +} + +func writeSysMetrics(w http.ResponseWriter, s SysStat) { + fmt.Fprintf(w, "# HELP llamaswap_cpu_util_percent CPU utilization per core (0-100)\n") + fmt.Fprintf(w, "# TYPE llamaswap_cpu_util_percent gauge\n") + for i, pct := range s.CpuUtilPerCore { + fmt.Fprintf(w, "llamaswap_cpu_util_percent{core=\"%d\"} %g\n", i, pct) + } + + fmt.Fprintf(w, "# HELP llamaswap_memory_total_bytes Total memory in bytes\n") + fmt.Fprintf(w, "# TYPE llamaswap_memory_total_bytes gauge\n") + fmt.Fprintf(w, "llamaswap_memory_total_bytes %d\n", int64(s.MemTotalMB)*mbToBytes) + + fmt.Fprintf(w, "# HELP llamaswap_memory_used_bytes Used memory in bytes\n") + fmt.Fprintf(w, "# TYPE llamaswap_memory_used_bytes gauge\n") + fmt.Fprintf(w, "llamaswap_memory_used_bytes %d\n", int64(s.MemUsedMB)*mbToBytes) + + fmt.Fprintf(w, "# HELP llamaswap_memory_free_bytes Free memory in bytes\n") + fmt.Fprintf(w, "# TYPE llamaswap_memory_free_bytes gauge\n") + fmt.Fprintf(w, "llamaswap_memory_free_bytes %d\n", int64(s.MemFreeMB)*mbToBytes) + + fmt.Fprintf(w, "# HELP llamaswap_swap_total_bytes Total swap in bytes\n") + fmt.Fprintf(w, "# TYPE llamaswap_swap_total_bytes gauge\n") + fmt.Fprintf(w, "llamaswap_swap_total_bytes %d\n", int64(s.SwapTotalMB)*mbToBytes) + + fmt.Fprintf(w, "# HELP llamaswap_swap_used_bytes Used swap in bytes\n") + fmt.Fprintf(w, "# TYPE llamaswap_swap_used_bytes gauge\n") + fmt.Fprintf(w, "llamaswap_swap_used_bytes %d\n", int64(s.SwapUsedMB)*mbToBytes) + + fmt.Fprintf(w, "# HELP llamaswap_load_average Load average\n") + fmt.Fprintf(w, "# TYPE llamaswap_load_average gauge\n") + fmt.Fprintf(w, "llamaswap_load_average{interval=\"1m\"} %g\n", s.LoadAvg1) + fmt.Fprintf(w, "llamaswap_load_average{interval=\"5m\"} %g\n", s.LoadAvg5) + fmt.Fprintf(w, "llamaswap_load_average{interval=\"15m\"} %g\n", s.LoadAvg15) + + if len(s.NetIO) > 0 { + fmt.Fprintf(w, "# HELP llamaswap_network_bytes_total Total network bytes transferred\n") + fmt.Fprintf(w, "# TYPE llamaswap_network_bytes_total counter\n") + for _, io := range s.NetIO { + iface := sanitizeLabel(io.Name) + fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"recv\"} %d\n", iface, io.BytesRecv) + fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"sent\"} %d\n", iface, io.BytesSent) + } + } +} + +func writeGpuMetrics(w http.ResponseWriter, gpus []GpuStat) { + if len(gpus) == 0 { + return + } + + type gpuMetric struct { + help string + name string + value func(GpuStat) float64 + } + + metrics := []gpuMetric{ + {"GPU temperature in Celsius", "llamaswap_gpu_temperature_celsius", func(g GpuStat) float64 { return float64(g.TempC) }}, + {"GPU VRAM temperature in Celsius", "llamaswap_gpu_vram_temperature_celsius", func(g GpuStat) float64 { return float64(g.VramTempC) }}, + {"GPU utilization percent (0-100)", "llamaswap_gpu_util_percent", func(g GpuStat) float64 { return g.GpuUtilPct }}, + {"GPU memory utilization percent (0-100)", "llamaswap_gpu_memory_util_percent", func(g GpuStat) float64 { return g.MemUtilPct }}, + {"GPU memory used in bytes", "llamaswap_gpu_memory_used_bytes", func(g GpuStat) float64 { return float64(g.MemUsedMB) * float64(mbToBytes) }}, + {"GPU memory total in bytes", "llamaswap_gpu_memory_total_bytes", func(g GpuStat) float64 { return float64(g.MemTotalMB) * float64(mbToBytes) }}, + {"GPU fan speed percent (0-100)", "llamaswap_gpu_fan_speed_percent", func(g GpuStat) float64 { return g.FanSpeedPct }}, + {"GPU power draw in watts", "llamaswap_gpu_power_draw_watts", func(g GpuStat) float64 { return g.PowerDrawW }}, + } + + for _, m := range metrics { + fmt.Fprintf(w, "# HELP %s %s\n", m.name, m.help) + fmt.Fprintf(w, "# TYPE %s gauge\n", m.name) + for _, g := range gpus { + if g.UUID != "" { + fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\",uuid=\"%s\"} %g\n", + m.name, g.ID, sanitizeLabel(g.Name), sanitizeLabel(g.UUID), m.value(g)) + } else { + fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\"} %g\n", + m.name, g.ID, sanitizeLabel(g.Name), m.value(g)) + } + } + } +} + +// latestPerGPU returns the most recent GpuStat for each GPU ID, sorted by ID. +func latestPerGPU(stats []GpuStat) []GpuStat { + latest := make(map[int]GpuStat) + for _, g := range stats { + if prev, ok := latest[g.ID]; !ok || g.Timestamp.After(prev.Timestamp) { + latest[g.ID] = g + } + } + result := make([]GpuStat, 0, len(latest)) + for _, g := range latest { + result = append(result, g) + } + sort.Slice(result, func(i, j int) bool { return result[i].ID < result[j].ID }) + return result +} + +// sanitizeLabel escapes characters that are invalid in Prometheus label values. +func sanitizeLabel(s string) string { + return strings.NewReplacer(`"`, `\"`, `\`, `\\`, "\n", `\n`).Replace(s) +} diff --git a/internal/perf/prometheus_test.go b/internal/perf/prometheus_test.go new file mode 100644 index 00000000..dec57cfd --- /dev/null +++ b/internal/perf/prometheus_test.go @@ -0,0 +1,248 @@ +package perf + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/mostlygeek/llama-swap/proxy/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSanitizeLabel(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"normal", "normal"}, + {"", ""}, + {`with"quote`, `with\"quote`}, + {`with\backslash`, `with\\backslash`}, + {"with\nnewline", `with\nnewline`}, + {`"both\n"`, `\"both\\n\"`}, + } + for _, tc := range tests { + assert.Equal(t, tc.want, sanitizeLabel(tc.input), "input: %q", tc.input) + } +} + +func TestLatestPerGPU_Empty(t *testing.T) { + result := latestPerGPU(nil) + assert.Empty(t, result) +} + +func TestLatestPerGPU_Single(t *testing.T) { + now := time.Now() + stats := []GpuStat{{ID: 0, Name: "gpu0", Timestamp: now}} + result := latestPerGPU(stats) + require.Len(t, result, 1) + assert.Equal(t, "gpu0", result[0].Name) +} + +func TestLatestPerGPU_PicksLatest(t *testing.T) { + earlier := time.Now().Add(-time.Second) + later := time.Now() + stats := []GpuStat{ + {ID: 0, Name: "old", TempC: 50, Timestamp: earlier}, + {ID: 0, Name: "new", TempC: 70, Timestamp: later}, + } + result := latestPerGPU(stats) + require.Len(t, result, 1) + assert.Equal(t, "new", result[0].Name) + assert.Equal(t, 70, result[0].TempC) +} + +func TestLatestPerGPU_MultipleGPUsSortedByID(t *testing.T) { + now := time.Now() + stats := []GpuStat{ + {ID: 2, Name: "gpu2", Timestamp: now}, + {ID: 0, Name: "gpu0", Timestamp: now}, + {ID: 1, Name: "gpu1", Timestamp: now}, + } + result := latestPerGPU(stats) + require.Len(t, result, 3) + assert.Equal(t, 0, result[0].ID) + assert.Equal(t, 1, result[1].ID) + assert.Equal(t, 2, result[2].ID) +} + +func TestWriteSysMetrics(t *testing.T) { + rec := httptest.NewRecorder() + s := SysStat{ + CpuUtilPerCore: []float64{10.5, 20.0}, + MemTotalMB: 8192, + MemUsedMB: 4096, + MemFreeMB: 4096, + SwapTotalMB: 2048, + SwapUsedMB: 512, + LoadAvg1: 1.5, + LoadAvg5: 1.2, + LoadAvg15: 0.9, + NetIO: []NetIOStat{ + {Name: "eth0", BytesRecv: 1000, BytesSent: 2000}, + }, + } + + writeSysMetrics(rec, s) + body := rec.Body.String() + + assert.Contains(t, body, `llamaswap_cpu_util_percent{core="0"} 10.5`) + assert.Contains(t, body, `llamaswap_cpu_util_percent{core="1"} 20`) + assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592") + assert.Contains(t, body, "llamaswap_memory_used_bytes 4294967296") + assert.Contains(t, body, "llamaswap_memory_free_bytes 4294967296") + assert.Contains(t, body, "llamaswap_swap_total_bytes 2147483648") + assert.Contains(t, body, "llamaswap_swap_used_bytes 536870912") + assert.Contains(t, body, `llamaswap_load_average{interval="1m"} 1.5`) + assert.Contains(t, body, `llamaswap_load_average{interval="5m"} 1.2`) + assert.Contains(t, body, `llamaswap_load_average{interval="15m"} 0.9`) + assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="recv"} 1000`) + assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="sent"} 2000`) +} + +func TestWriteSysMetrics_NoNetIO(t *testing.T) { + rec := httptest.NewRecorder() + writeSysMetrics(rec, SysStat{CpuUtilPerCore: []float64{5.0}}) + body := rec.Body.String() + assert.NotContains(t, body, "llamaswap_network_bytes_total") +} + +func TestWriteGpuMetrics_Empty(t *testing.T) { + rec := httptest.NewRecorder() + writeGpuMetrics(rec, nil) + assert.Empty(t, rec.Body.String()) +} + +func TestWriteGpuMetrics(t *testing.T) { + rec := httptest.NewRecorder() + gpus := []GpuStat{ + { + ID: 0, + Name: "NVIDIA RTX 4090", + UUID: "GPU-1234", + TempC: 75, + GpuUtilPct: 85.5, + MemUtilPct: 60.0, + MemUsedMB: 8192, + MemTotalMB: 24576, + FanSpeedPct: 55.0, + PowerDrawW: 300.5, + }, + } + + writeGpuMetrics(rec, gpus) + body := rec.Body.String() + + assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 75`) + assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 0`) + assert.Contains(t, body, `llamaswap_gpu_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 85.5`) + assert.Contains(t, body, `llamaswap_gpu_memory_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 60`) + assert.Contains(t, body, `llamaswap_gpu_memory_used_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`) + assert.Contains(t, body, `llamaswap_gpu_memory_total_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`) + assert.Contains(t, body, `llamaswap_gpu_fan_speed_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 55`) + assert.Contains(t, body, `llamaswap_gpu_power_draw_watts{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 300.5`) +} + +func TestWriteGpuMetrics_VramTemp(t *testing.T) { + rec := httptest.NewRecorder() + gpus := []GpuStat{ + {ID: 0, Name: "AMD RX 7900", UUID: "GPU-5678", TempC: 70, VramTempC: 85}, + } + writeGpuMetrics(rec, gpus) + body := rec.Body.String() + assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 70`) + assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 85`) +} + +func TestWriteGpuMetrics_EmptyUUID(t *testing.T) { + rec := httptest.NewRecorder() + gpus := []GpuStat{{ID: 3, Name: "AMD RX 7900", UUID: ""}} + writeGpuMetrics(rec, gpus) + body := rec.Body.String() + assert.NotContains(t, body, "uuid=") + assert.Contains(t, body, `name="AMD RX 7900"`) +} + +func TestWriteGpuMetrics_LabelSanitization(t *testing.T) { + rec := httptest.NewRecorder() + gpus := []GpuStat{ + {ID: 0, Name: `GPU "special"`, UUID: "uuid\nline"}, + } + writeGpuMetrics(rec, gpus) + body := rec.Body.String() + assert.Contains(t, body, `name="GPU \"special\""`) + assert.Contains(t, body, `uuid="uuid\nline"`) +} + +func TestMetricsHandler_ContentType(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + m.MetricsHandler()(rec, req) + + assert.Equal(t, "text/plain; version=0.0.4; charset=utf-8", rec.Header().Get("Content-Type")) +} + +func TestMetricsHandler_EmptyStats(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + m.MetricsHandler()(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + assert.Empty(t, strings.TrimSpace(rec.Body.String())) +} + +func TestMetricsHandler_WithSysStats(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + m.sysRing.Push(SysStat{Timestamp: time.Now(), CpuUtilPerCore: []float64{25.0}, MemTotalMB: 4096, MemUsedMB: 2048, MemFreeMB: 2048}) + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + m.MetricsHandler()(rec, req) + + body := rec.Body.String() + assert.Contains(t, body, "llamaswap_cpu_util_percent") + assert.Contains(t, body, "llamaswap_memory_total_bytes") +} + +func TestMetricsHandler_UsesLatestSysStat(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + now := time.Now() + m.sysRing.Push(SysStat{Timestamp: now.Add(-time.Second), MemTotalMB: 1000}) + m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 8192}) + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + m.MetricsHandler()(rec, req) + + body := rec.Body.String() + // 8192 MB = 8589934592 bytes + assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592") +} + +func TestMetricsHandler_WithGpuStats(t *testing.T) { + m, err := New(config.PerformanceConfig{}, newTestLogger()) + require.NoError(t, err) + + m.gpuRing.Push([]GpuStat{{ID: 0, Name: "TestGPU", UUID: "uuid-0", TempC: 65, Timestamp: time.Now()}}) + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + m.MetricsHandler()(rec, req) + + body := rec.Body.String() + assert.Contains(t, body, "llamaswap_gpu_temperature_celsius") + assert.Contains(t, body, `name="TestGPU"`) +} diff --git a/internal/perf/types.go b/internal/perf/types.go new file mode 100644 index 00000000..b1be67cf --- /dev/null +++ b/internal/perf/types.go @@ -0,0 +1,40 @@ +package perf + +import "time" + +type GpuStat struct { + Timestamp time.Time `json:"timestamp"` + + ID int `json:"id"` + Name string `json:"name"` + UUID string `json:"uuid"` + TempC int `json:"temp_c"` + VramTempC int `json:"vram_temp_c"` + GpuUtilPct float64 `json:"gpu_util_pct"` + MemUtilPct float64 `json:"mem_util_pct"` + MemUsedMB int `json:"mem_used_mb"` + MemTotalMB int `json:"mem_total_mb"` + FanSpeedPct float64 `json:"fan_speed_pct"` + PowerDrawW float64 `json:"power_draw_w"` +} + +type NetIOStat struct { + Name string `json:"name"` + BytesRecv uint64 `json:"bytes_recv"` + BytesSent uint64 `json:"bytes_sent"` +} + +type SysStat struct { + Timestamp time.Time `json:"timestamp"` + + CpuUtilPerCore []float64 `json:"cpu_util_per_core"` + MemTotalMB int `json:"mem_total_mb"` + MemUsedMB int `json:"mem_used_mb"` + MemFreeMB int `json:"mem_free_mb"` + SwapTotalMB int `json:"swap_total_mb"` + SwapUsedMB int `json:"swap_used_mb"` + LoadAvg1 float64 `json:"load_avg_1"` + LoadAvg5 float64 `json:"load_avg_5"` + LoadAvg15 float64 `json:"load_avg_15"` + NetIO []NetIOStat `json:"net_io"` +} diff --git a/internal/ring/buffer.go b/internal/ring/buffer.go new file mode 100644 index 00000000..421d0635 --- /dev/null +++ b/internal/ring/buffer.go @@ -0,0 +1,39 @@ +package ring + +type Buffer[T any] struct { + buf []T + head int + size int +} + +func NewBuffer[T any](capacity int) Buffer[T] { + if capacity < 1 { + capacity = 1 + } + return Buffer[T]{buf: make([]T, capacity)} +} + +// Push adds v, overwriting the oldest entry when the buffer is full. +func (r *Buffer[T]) Push(v T) { + cap := len(r.buf) + if r.size < cap { + r.buf[(r.head+r.size)%cap] = v + r.size++ + } else { + r.buf[r.head] = v + r.head = (r.head + 1) % cap + } +} + +// Slice returns all entries in insertion order as a new slice. +func (r *Buffer[T]) Slice() []T { + if r.size == 0 { + return nil + } + cap := len(r.buf) + result := make([]T, r.size) + for i := 0; i < r.size; i++ { + result[i] = r.buf[(r.head+i)%cap] + } + return result +} diff --git a/internal/ring/buffer_bench_test.go b/internal/ring/buffer_bench_test.go new file mode 100644 index 00000000..f0bb9684 --- /dev/null +++ b/internal/ring/buffer_bench_test.go @@ -0,0 +1,44 @@ +package ring + +import "testing" + +const benchCap = 600 // matches default MaxAge/Every (1min / 100ms) + +func BenchmarkBuffer_PushNoWrap(b *testing.B) { + for b.Loop() { + buf := NewBuffer[int](b.N + 1) + for i := range b.N { + buf.Push(i) + } + } +} + +func BenchmarkBuffer_PushWrap(b *testing.B) { + buf := NewBuffer[int](benchCap) + b.ResetTimer() + for i := range b.N { + buf.Push(i) + } +} + +func BenchmarkBuffer_Slice(b *testing.B) { + buf := NewBuffer[int](benchCap) + for i := range benchCap { + buf.Push(i) + } + b.ResetTimer() + for range b.N { + _ = buf.Slice() + } +} + +func BenchmarkBuffer_PushAndSlice(b *testing.B) { + buf := NewBuffer[int](benchCap) + b.ResetTimer() + for i := range b.N { + buf.Push(i) + if i%benchCap == 0 { + _ = buf.Slice() + } + } +} diff --git a/internal/ring/buffer_test.go b/internal/ring/buffer_test.go new file mode 100644 index 00000000..101ad82f --- /dev/null +++ b/internal/ring/buffer_test.go @@ -0,0 +1,65 @@ +package ring + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuffer_EmptySliceIsNil(t *testing.T) { + b := NewBuffer[int](4) + assert.Nil(t, b.Slice()) +} + +func TestBuffer_PushBelowCapacity(t *testing.T) { + b := NewBuffer[int](4) + b.Push(1) + b.Push(2) + assert.Equal(t, []int{1, 2}, b.Slice()) +} + +func TestBuffer_PushAtCapacity(t *testing.T) { + b := NewBuffer[int](3) + b.Push(1) + b.Push(2) + b.Push(3) + assert.Equal(t, []int{1, 2, 3}, b.Slice()) +} + +func TestBuffer_PushOverCapacityEvictsOldest(t *testing.T) { + b := NewBuffer[int](3) + b.Push(1) + b.Push(2) + b.Push(3) + b.Push(4) + assert.Equal(t, []int{2, 3, 4}, b.Slice()) +} + +func TestBuffer_CapacityOne(t *testing.T) { + b := NewBuffer[int](1) + b.Push(1) + b.Push(2) + assert.Equal(t, []int{2}, b.Slice()) +} + +func TestBuffer_ZeroCapacityDefaultsToOne(t *testing.T) { + b := NewBuffer[int](0) + b.Push(42) + assert.Equal(t, []int{42}, b.Slice()) +} + +func TestBuffer_SliceReturnsCopy(t *testing.T) { + b := NewBuffer[int](4) + b.Push(10) + s := b.Slice() + s[0] = 99 + assert.Equal(t, []int{10}, b.Slice()) +} + +func TestBuffer_InsertionOrderPreservedAfterWrap(t *testing.T) { + b := NewBuffer[int](4) + for i := 1; i <= 8; i++ { + b.Push(i) + } + assert.Equal(t, []int{5, 6, 7, 8}, b.Slice()) +} diff --git a/llama-swap.go b/llama-swap.go index 771c52b0..3c3055f2 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -15,6 +15,8 @@ import ( "github.com/gin-gonic/gin" "github.com/mostlygeek/llama-swap/event" + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/internal/perf" "github.com/mostlygeek/llama-swap/proxy" "github.com/mostlygeek/llama-swap/proxy/config" "github.com/mostlygeek/llama-swap/proxy/configwatcher" @@ -34,7 +36,7 @@ func main() { keyFile := flag.String("tls-key-file", "", "TLS key file") showVersion := flag.Bool("version", false, "show version of build") watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change") - mainLogger := proxy.NewLogMonitor() + mainLogger := logmon.New() flag.Parse() // Parse the command-line flags @@ -45,7 +47,7 @@ func main() { conf, err := config.LoadConfig(*configPath) if err != nil { - mainLogger.Errorf("Error loading config: %", err) + mainLogger.Errorf("Error loading config: %v", err) os.Exit(1) } @@ -55,15 +57,15 @@ func main() { switch strings.ToLower(strings.TrimSpace(conf.LogLevel)) { case "debug": - mainLogger.SetLogLevel(proxy.LevelDebug) + mainLogger.SetLogLevel(logmon.LevelDebug) case "info": - mainLogger.SetLogLevel(proxy.LevelInfo) + mainLogger.SetLogLevel(logmon.LevelInfo) case "warn": - mainLogger.SetLogLevel(proxy.LevelWarn) + mainLogger.SetLogLevel(logmon.LevelWarn) case "error": - mainLogger.SetLogLevel(proxy.LevelError) + mainLogger.SetLogLevel(logmon.LevelError) default: - mainLogger.SetLogLevel(proxy.LevelInfo) + mainLogger.SetLogLevel(logmon.LevelInfo) } mainLogger.Debugf("PID: %d", os.Getpid()) @@ -91,6 +93,13 @@ func main() { listenStr = &defaultPort } + mon, err := perf.New(conf.Performance, mainLogger) + if err != nil { + mainLogger.Errorf("failed to create monitor: %s", err.Error()) + os.Exit(1) + } + mon.Start() + // Setup channels for server management exitChan := make(chan struct{}) sigChan := make(chan os.Signal, 1) @@ -121,8 +130,8 @@ func main() { reloadMutex.Unlock() }() - mainLogger.Info("Reloading Configuration") if currentPM, ok := srv.Handler.(*proxy.ProxyManager); ok { + mainLogger.Info("Reloading Configuration") conf, err = config.LoadConfig(*configPath) if err != nil { mainLogger.Warnf("Unable to reload configuration: %v", err) @@ -131,8 +140,10 @@ func main() { mainLogger.Debug("Configuration Changed") currentPM.Shutdown() + mon.UpdateConfig(conf.Performance) newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetPerfMonitor(mon) srv.Handler = newPM mainLogger.Debug("Configuration Reloaded") @@ -150,6 +161,7 @@ func main() { } newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetPerfMonitor(mon) srv.Handler = newPM } } @@ -185,6 +197,7 @@ func main() { reloadProxyManager() case syscall.SIGINT, syscall.SIGTERM: mainLogger.Debugf("Received signal %v, shutting down...", sig) + mon.Stop() watcherCancel() ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() diff --git a/proxy/config/config.go b/proxy/config/config.go index 71b5594e..4549bf1a 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -9,6 +9,7 @@ import ( "runtime" "sort" "strings" + "time" "github.com/billziss-gh/golib/shlex" "gopkg.in/yaml.v3" @@ -124,6 +125,7 @@ type Config struct { LogToStdout string `yaml:"logToStdout"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` CaptureBuffer int `yaml:"captureBuffer"` + Performance PerformanceConfig `yaml:"performance"` GlobalTTL int `yaml:"globalTTL"` Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Profiles map[string][]string `yaml:"profiles"` @@ -220,6 +222,17 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { config.HealthCheckTimeout = 15 } + // Apply defaults for performance config when section is missing + if !config.Performance.Enable && config.Performance.Every == 0 && config.Performance.MaxAge == 0 && config.Performance.GC == 0 { + config.Performance.Enable = true + config.Performance.Every = 15 * time.Second + config.Performance.MaxAge = 1 * time.Hour + config.Performance.GC = 5 * time.Minute + } + if err = config.Performance.Validate(); err != nil { + return Config{}, fmt.Errorf("performance: %w", err) + } + if config.StartPort < 1 { return Config{}, fmt.Errorf("startPort must be greater than 1") } diff --git a/proxy/config/config_posix_test.go b/proxy/config/config_posix_test.go index 4124092e..5c35c310 100644 --- a/proxy/config/config_posix_test.go +++ b/proxy/config/config_posix_test.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -229,6 +230,12 @@ groups: HealthCheckTimeout: 15, MetricsMaxInMemory: 1000, CaptureBuffer: 5, + Performance: PerformanceConfig{ + Enable: true, + Every: 15 * time.Second, + MaxAge: 1 * time.Hour, + GC: 5 * time.Minute, + }, Profiles: map[string][]string{ "test": {"model1", "model2"}, }, diff --git a/proxy/config/config_windows_test.go b/proxy/config/config_windows_test.go index bad61b82..f4372be2 100644 --- a/proxy/config/config_windows_test.go +++ b/proxy/config/config_windows_test.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -218,6 +219,12 @@ groups: HealthCheckTimeout: 15, MetricsMaxInMemory: 1000, CaptureBuffer: 5, + Performance: PerformanceConfig{ + Enable: true, + Every: 15 * time.Second, + MaxAge: 1 * time.Hour, + GC: 5 * time.Minute, + }, Profiles: map[string][]string{ "test": {"model1", "model2"}, }, diff --git a/proxy/config/performance.go b/proxy/config/performance.go new file mode 100644 index 00000000..8144acaf --- /dev/null +++ b/proxy/config/performance.go @@ -0,0 +1,45 @@ +package config + +import ( + "fmt" + "time" +) + +// PerformanceConfig holds configuration for system performance monitoring +type PerformanceConfig struct { + Enable bool `yaml:"enable"` + Every time.Duration `yaml:"every"` + MaxAge time.Duration `yaml:"maxAge"` + GC time.Duration `yaml:"gc"` +} + +func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + type rawPerformanceConfig PerformanceConfig + defaults := rawPerformanceConfig{ + Enable: true, + Every: 15 * time.Second, + MaxAge: 1 * time.Hour, + GC: 5 * time.Minute, + } + + if err := unmarshal(&defaults); err != nil { + return err + } + + *p = PerformanceConfig(defaults) + return nil +} + +// Validate checks the PerformanceConfig values and returns an error if invalid +func (p *PerformanceConfig) Validate() error { + if p.Every < time.Second { + return fmt.Errorf("every must be at least 1s, got %v", p.Every) + } + if p.MaxAge <= 0 { + return fmt.Errorf("maxAge must be greater than 0, got %v", p.MaxAge) + } + if p.GC <= 0 { + return fmt.Errorf("gc must be greater than 0, got %v", p.GC) + } + return nil +} diff --git a/proxy/config/performance_test.go b/proxy/config/performance_test.go new file mode 100644 index 00000000..1284a85f --- /dev/null +++ b/proxy/config/performance_test.go @@ -0,0 +1,140 @@ +package config + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestPerformanceConfig_Defaults(t *testing.T) { + content := ` +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + // When performance section is missing, defaults should be applied + assert.True(t, config.Performance.Enable) + assert.Equal(t, 15*time.Second, config.Performance.Every) + assert.Equal(t, 1*time.Hour, config.Performance.MaxAge) + assert.Equal(t, 5*time.Minute, config.Performance.GC) +} + +func TestPerformanceConfig_CustomValues(t *testing.T) { + content := ` +performance: + enable: true + every: 30s + maxAge: 12h + gc: 10m +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + assert.True(t, config.Performance.Enable) + assert.Equal(t, 30*time.Second, config.Performance.Every) + assert.Equal(t, 12*time.Hour, config.Performance.MaxAge) + assert.Equal(t, 10*time.Minute, config.Performance.GC) +} + +func TestPerformanceConfig_Disabled(t *testing.T) { + content := ` +performance: + enable: false +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + assert.False(t, config.Performance.Enable) + // Duration defaults should still apply + assert.Equal(t, 15*time.Second, config.Performance.Every) + assert.Equal(t, 1*time.Hour, config.Performance.MaxAge) + assert.Equal(t, 5*time.Minute, config.Performance.GC) +} + +func TestPerformanceConfig_PartialValues(t *testing.T) { + content := ` +performance: + every: 10s + maxAge: 6h +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + // enable should default to true + assert.True(t, config.Performance.Enable) + assert.Equal(t, 10*time.Second, config.Performance.Every) + assert.Equal(t, 6*time.Hour, config.Performance.MaxAge) + // gc should use default + assert.Equal(t, 5*time.Minute, config.Performance.GC) +} + +func TestPerformanceConfig_InvalidEvery(t *testing.T) { + content := ` +performance: + every: 500ms +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.Error(t, err) + assert.Contains(t, err.Error(), "every must be at least 1s") +} + +func TestPerformanceConfig_InvalidMaxAge(t *testing.T) { + content := ` +performance: + maxAge: 0s +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.Error(t, err) + assert.Contains(t, err.Error(), "maxAge must be greater than 0") +} + +func TestPerformanceConfig_InvalidGC(t *testing.T) { + content := ` +performance: + gc: 0s +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.Error(t, err) + assert.Contains(t, err.Error(), "gc must be greater than 0") +} + +func TestPerformanceConfig_ComplexDurations(t *testing.T) { + content := ` +performance: + every: 1m30s + maxAge: 2h10m + gc: 1m +models: + model1: + cmd: path/to/cmd --port ${PORT} +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + assert.Equal(t, 90*time.Second, config.Performance.Every) + assert.Equal(t, (2*time.Hour)+(10*time.Minute), config.Performance.MaxAge) + assert.Equal(t, 1*time.Minute, config.Performance.GC) +} diff --git a/proxy/events.go b/proxy/events.go index 9daec0c9..3b0b2a49 100644 --- a/proxy/events.go +++ b/proxy/events.go @@ -5,7 +5,6 @@ package proxy const ProcessStateChangeEventID = 0x01 const ChatCompletionStatsEventID = 0x02 const ConfigFileChangedEventID = 0x03 -const LogDataEventID = 0x04 const ActivityLogEventID = 0x05 const ModelPreloadedEventID = 0x06 const InFlightRequestsEventID = 0x07 @@ -43,14 +42,6 @@ func (e ConfigFileChangedEvent) Type() uint32 { return ConfigFileChangedEventID } -type LogDataEvent struct { - Data []byte -} - -func (e LogDataEvent) Type() uint32 { - return LogDataEventID -} - type ModelPreloadedEvent struct { ModelName string Success bool diff --git a/proxy/helpers_test.go b/proxy/helpers_test.go index eb19c7af..256f8cea 100644 --- a/proxy/helpers_test.go +++ b/proxy/helpers_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" "github.com/stretchr/testify/require" "github.com/tidwall/gjson" @@ -24,7 +25,7 @@ import ( var ( nextTestPort int = 12000 portMutex sync.Mutex - testLogger = NewLogMonitorWriter(os.Stdout) + testLogger = logmon.NewWriter(os.Stdout) simpleResponderPath = getSimpleResponderPath() ) @@ -40,13 +41,13 @@ func TestMain(m *testing.M) { switch os.Getenv("LOG_LEVEL") { case "debug": - testLogger.SetLogLevel(LevelDebug) + testLogger.SetLogLevel(logmon.LevelDebug) case "warn": - testLogger.SetLogLevel(LevelWarn) + testLogger.SetLogLevel(logmon.LevelWarn) case "info": - testLogger.SetLogLevel(LevelInfo) + testLogger.SetLogLevel(logmon.LevelInfo) default: - testLogger.SetLogLevel(LevelWarn) + testLogger.SetLogLevel(logmon.LevelWarn) } m.Run() diff --git a/proxy/matrix.go b/proxy/matrix.go index 16ed202a..feb0f12c 100644 --- a/proxy/matrix.go +++ b/proxy/matrix.go @@ -7,6 +7,7 @@ import ( "sort" "sync" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" ) @@ -145,8 +146,8 @@ type Matrix struct { solver *MatrixSolver processes map[string]*Process // all processes keyed by real model name config config.Config - proxyLogger *LogMonitor - upstreamLogger *LogMonitor + proxyLogger *logmon.Monitor + upstreamLogger *logmon.Monitor // inflight tracks ProxyRequest calls that have released m.Lock but may // not yet have incremented Process.inFlightRequests. A concurrent @@ -165,10 +166,10 @@ type Matrix struct { // NewMatrix creates a Matrix from config. It creates a Process for every // model defined in the config (any model can run alone even if not in a set). -func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *LogMonitor) *Matrix { +func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *logmon.Monitor) *Matrix { processes := make(map[string]*Process) for modelID, modelConfig := range cfg.Models { - processLogger := NewLogMonitorWriter(upstreamLogger) + processLogger := logmon.NewWriter(upstreamLogger) process := NewProcess(modelID, cfg.HealthCheckTimeout, modelConfig, processLogger, proxyLogger) processes[modelID] = process } diff --git a/proxy/metrics_monitor.go b/proxy/metrics_monitor.go index 8a671030..62b9bbf2 100644 --- a/proxy/metrics_monitor.go +++ b/proxy/metrics_monitor.go @@ -16,6 +16,8 @@ import ( "github.com/gin-gonic/gin" "github.com/klauspost/compress/zstd" "github.com/mostlygeek/llama-swap/event" + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/internal/ring" "github.com/mostlygeek/llama-swap/proxy/cache" "github.com/tidwall/gjson" ) @@ -113,11 +115,10 @@ func (e ActivityLogEvent) Type() uint32 { // metricsMonitor parses llama-server output for token statistics type metricsMonitor struct { - mu sync.RWMutex - metrics []ActivityLogEntry - maxMetrics int - nextID int - logger *LogMonitor + mu sync.RWMutex + metrics ring.Buffer[ActivityLogEntry] + nextID int + logger *logmon.Monitor // capture fields enableCaptures bool @@ -126,10 +127,10 @@ type metricsMonitor struct { // newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the // capture buffer size in megabytes; 0 disables captures. -func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor { +func newMetricsMonitor(logger *logmon.Monitor, maxMetrics int, captureBufferMB int) *metricsMonitor { mm := &metricsMonitor{ logger: logger, - maxMetrics: maxMetrics, + metrics: ring.NewBuffer[ActivityLogEntry](maxMetrics), enableCaptures: captureBufferMB > 0, } if captureBufferMB > 0 { @@ -146,10 +147,7 @@ func (mp *metricsMonitor) queueMetrics(metric ActivityLogEntry) int { metric.ID = mp.nextID mp.nextID++ - mp.metrics = append(mp.metrics, metric) - if len(mp.metrics) > mp.maxMetrics { - mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:] - } + mp.metrics.Push(metric) return metric.ID } @@ -213,30 +211,36 @@ func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture { return capture } -// getMetrics returns a copy of the current metrics +// getMetrics returns a copy of the current metrics with HasCapture resolved from cache. func (mp *metricsMonitor) getMetrics() []ActivityLogEntry { mp.mu.RLock() defer mp.mu.RUnlock() - result := make([]ActivityLogEntry, len(mp.metrics)) - copy(result, mp.metrics) + result := mp.metrics.Slice() + if result == nil { + return []ActivityLogEntry{} + } + if mp.captureCache != nil { + for i := range result { + result[i].HasCapture = mp.captureCache.Has(result[i].ID) + } + } return result } -// getMetricsJSON returns metrics as JSON +// getMetricsJSON returns metrics as JSON with HasCapture resolved from cache. func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) { mp.mu.RLock() defer mp.mu.RUnlock() - if mp.captureCache == nil { - return json.Marshal(mp.metrics) + result := mp.metrics.Slice() + if result == nil { + return json.Marshal([]ActivityLogEntry{}) } - - // Make a copy with up-to-date has_capture from cache - result := make([]ActivityLogEntry, len(mp.metrics)) - for i, m := range mp.metrics { - m.HasCapture = mp.captureCache.Has(m.ID) - result[i] = m + if mp.captureCache != nil { + for i := range result { + result[i].HasCapture = mp.captureCache.Has(result[i].ID) + } } return json.Marshal(result) } @@ -412,9 +416,6 @@ func (mp *metricsMonitor) wrapHandler( capture.ID = metricID if mp.addCapture(*capture) { tm.HasCapture = true - mp.mu.Lock() - mp.metrics[len(mp.metrics)-1].HasCapture = true - mp.mu.Unlock() } } diff --git a/proxy/peerproxy.go b/proxy/peerproxy.go index cd0d1dc4..5350fa3e 100644 --- a/proxy/peerproxy.go +++ b/proxy/peerproxy.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" ) @@ -24,7 +25,7 @@ type PeerProxy struct { proxyMap map[string]*peerProxyMember } -func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error) { +func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *logmon.Monitor) (*PeerProxy, error) { proxyMap := make(map[string]*peerProxyMember) // Sort peer IDs for consistent iteration order diff --git a/proxy/process.go b/proxy/process.go index 06bfb577..b679fc93 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -18,6 +18,7 @@ import ( "time" "github.com/mostlygeek/llama-swap/event" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" ) @@ -53,8 +54,8 @@ type Process struct { // closed when command exits cmdWaitChan chan struct{} - processLogger *LogMonitor - proxyLogger *LogMonitor + processLogger *logmon.Monitor + proxyLogger *logmon.Monitor healthCheckTimeout int healthCheckLoopInterval time.Duration @@ -84,7 +85,7 @@ type Process struct { failedStartCount int } -func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process { +func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *logmon.Monitor, proxyLogger *logmon.Monitor) *Process { concurrentLimit := 10 if config.ConcurrencyLimit > 0 { concurrentLimit = config.ConcurrencyLimit @@ -149,7 +150,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr } // LogMonitor returns the log monitor associated with the process. -func (p *Process) LogMonitor() *LogMonitor { +func (p *Process) LogMonitor() *logmon.Monitor { return p.processLogger } @@ -726,7 +727,7 @@ func (p *Process) cmdStopUpstreamProcess() error { } // Logger returns the logger for this process. -func (p *Process) Logger() *LogMonitor { +func (p *Process) Logger() *logmon.Monitor { return p.processLogger } diff --git a/proxy/process_test.go b/proxy/process_test.go index 47b28389..192f2eca 100644 --- a/proxy/process_test.go +++ b/proxy/process_test.go @@ -11,20 +11,21 @@ import ( "testing" "time" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" "github.com/stretchr/testify/assert" ) var ( - debugLogger = NewLogMonitorWriter(os.Stdout) + debugLogger = logmon.NewWriter(os.Stdout) ) func init() { // flip to help with debugging tests if false { - debugLogger.SetLogLevel(LevelDebug) + debugLogger.SetLogLevel(logmon.LevelDebug) } else { - debugLogger.SetLogLevel(LevelError) + debugLogger.SetLogLevel(logmon.LevelError) } } @@ -585,7 +586,7 @@ func TestProcess_CustomTimeouts(t *testing.T) { }, } - debugLogger := NewLogMonitorWriter(io.Discard) + debugLogger := logmon.NewWriter(io.Discard) process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger) // Verify the process was created successfully diff --git a/proxy/processgroup.go b/proxy/processgroup.go index c3055e0c..aaa24d97 100644 --- a/proxy/processgroup.go +++ b/proxy/processgroup.go @@ -6,6 +6,7 @@ import ( "slices" "sync" + "github.com/mostlygeek/llama-swap/internal/logmon" "github.com/mostlygeek/llama-swap/proxy/config" ) @@ -18,8 +19,8 @@ type ProcessGroup struct { exclusive bool persistent bool - proxyLogger *LogMonitor - upstreamLogger *LogMonitor + proxyLogger *logmon.Monitor + upstreamLogger *logmon.Monitor // map of current processes processes map[string]*Process @@ -42,7 +43,7 @@ type ProcessGroup struct { testDelayFastPath func() } -func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup { +func NewProcessGroup(id string, config config.Config, proxyLogger *logmon.Monitor, upstreamLogger *logmon.Monitor) *ProcessGroup { groupConfig, ok := config.Groups[id] if !ok { panic("Unable to find configuration for group id: " + id) @@ -62,7 +63,7 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u // Create a Process for each member in the group for _, modelID := range groupConfig.Members { modelConfig, modelID, _ := pg.config.FindConfig(modelID) - processLogger := NewLogMonitorWriter(upstreamLogger) + processLogger := logmon.NewWriter(upstreamLogger) process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger) pg.processes[modelID] = process } diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 4d99bdbc..8293d1a5 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -17,6 +17,8 @@ import ( "github.com/gin-gonic/gin" "github.com/mostlygeek/llama-swap/event" + "github.com/mostlygeek/llama-swap/internal/logmon" + "github.com/mostlygeek/llama-swap/internal/perf" "github.com/mostlygeek/llama-swap/proxy/config" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -69,11 +71,12 @@ type ProxyManager struct { ginEngine *gin.Engine // logging - proxyLogger *LogMonitor - upstreamLogger *LogMonitor - muxLogger *LogMonitor + proxyLogger *logmon.Monitor + upstreamLogger *logmon.Monitor + muxLogger *logmon.Monitor metricsMonitor *metricsMonitor + perfMonitor *perf.Monitor processGroups map[string]*ProcessGroup @@ -98,27 +101,27 @@ type ProxyManager struct { func New(proxyConfig config.Config) *ProxyManager { // set up loggers - var muxLogger, upstreamLogger, proxyLogger *LogMonitor + var muxLogger, upstreamLogger, proxyLogger *logmon.Monitor switch proxyConfig.LogToStdout { case config.LogToStdoutNone: - muxLogger = NewLogMonitorWriter(io.Discard) - upstreamLogger = NewLogMonitorWriter(io.Discard) - proxyLogger = NewLogMonitorWriter(io.Discard) + muxLogger = logmon.NewWriter(io.Discard) + upstreamLogger = logmon.NewWriter(io.Discard) + proxyLogger = logmon.NewWriter(io.Discard) case config.LogToStdoutBoth: - muxLogger = NewLogMonitorWriter(os.Stdout) - upstreamLogger = NewLogMonitorWriter(muxLogger) - proxyLogger = NewLogMonitorWriter(muxLogger) + muxLogger = logmon.NewWriter(os.Stdout) + upstreamLogger = logmon.NewWriter(muxLogger) + proxyLogger = logmon.NewWriter(muxLogger) case config.LogToStdoutUpstream: - muxLogger = NewLogMonitorWriter(os.Stdout) - upstreamLogger = NewLogMonitorWriter(muxLogger) - proxyLogger = NewLogMonitorWriter(io.Discard) + muxLogger = logmon.NewWriter(os.Stdout) + upstreamLogger = logmon.NewWriter(muxLogger) + proxyLogger = logmon.NewWriter(io.Discard) default: // same as config.LogToStdoutProxy // helpful because some old tests create a config.Config directly and it // may not have LogToStdout set explicitly - muxLogger = NewLogMonitorWriter(os.Stdout) - upstreamLogger = NewLogMonitorWriter(io.Discard) - proxyLogger = NewLogMonitorWriter(muxLogger) + muxLogger = logmon.NewWriter(os.Stdout) + upstreamLogger = logmon.NewWriter(io.Discard) + proxyLogger = logmon.NewWriter(muxLogger) } if proxyConfig.LogRequests { @@ -127,20 +130,20 @@ func New(proxyConfig config.Config) *ProxyManager { switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) { case "debug": - proxyLogger.SetLogLevel(LevelDebug) - upstreamLogger.SetLogLevel(LevelDebug) + proxyLogger.SetLogLevel(logmon.LevelDebug) + upstreamLogger.SetLogLevel(logmon.LevelDebug) case "info": - proxyLogger.SetLogLevel(LevelInfo) - upstreamLogger.SetLogLevel(LevelInfo) + proxyLogger.SetLogLevel(logmon.LevelInfo) + upstreamLogger.SetLogLevel(logmon.LevelInfo) case "warn": - proxyLogger.SetLogLevel(LevelWarn) - upstreamLogger.SetLogLevel(LevelWarn) + proxyLogger.SetLogLevel(logmon.LevelWarn) + upstreamLogger.SetLogLevel(logmon.LevelWarn) case "error": - proxyLogger.SetLogLevel(LevelError) - upstreamLogger.SetLogLevel(LevelError) + proxyLogger.SetLogLevel(logmon.LevelError) + upstreamLogger.SetLogLevel(logmon.LevelError) default: - proxyLogger.SetLogLevel(LevelInfo) - upstreamLogger.SetLogLevel(LevelInfo) + proxyLogger.SetLogLevel(logmon.LevelInfo) + upstreamLogger.SetLogLevel(logmon.LevelInfo) } // see: https://go.dev/src/time/format.go @@ -271,13 +274,17 @@ func (pm *ProxyManager) setupGinEngine() { pm.ginEngine.Use(func(c *gin.Context) { - // don't log the Wake on Lan proxy health check - if c.Request.URL.Path == "/wol-health" { - c.Next() - return + for _, prefix := range []string{ + "/wol-health", + "/api/performance", + "/metrics", + } { + if strings.HasPrefix(c.Request.URL.Path, prefix) { + c.Next() + return + } } - // Start timer start := time.Now() // capture these because /upstream/:model rewrites them in c.Next() @@ -285,12 +292,9 @@ func (pm *ProxyManager) setupGinEngine() { method := c.Request.Method path := c.Request.URL.Path - // Process request c.Next() - // Stop timer duration := time.Since(start) - statusCode := c.Writer.Status() bodySize := c.Writer.Size() @@ -439,6 +443,8 @@ func (pm *ProxyManager) setupGinEngine() { c.String(http.StatusOK, "OK") }) + pm.ginEngine.GET("/metrics", pm.prometheusMetricsHandler) + // see cmd/wol-proxy/wol-proxy.go, not logged pm.ginEngine.GET("/wol-health", func(c *gin.Context) { c.String(http.StatusOK, "OK") @@ -1218,3 +1224,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri pm.commit = commit pm.version = version } + +func (pm *ProxyManager) SetPerfMonitor(m *perf.Monitor) { + pm.Lock() + defer pm.Unlock() + pm.perfMonitor = m +} diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go index 7942b8aa..8c348cd5 100644 --- a/proxy/proxymanager_api.go +++ b/proxy/proxymanager_api.go @@ -8,9 +8,11 @@ import ( "sort" "strconv" "strings" + "time" "github.com/gin-gonic/gin" "github.com/mostlygeek/llama-swap/event" + "github.com/mostlygeek/llama-swap/internal/perf" ) type Model struct { @@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) { apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler) apiGroup.GET("/events", pm.apiSendEvents) apiGroup.GET("/metrics", pm.apiGetMetrics) + apiGroup.GET("/performance", pm.apiGetPerformance) apiGroup.GET("/version", pm.apiGetVersion) apiGroup.GET("/captures/:id", pm.apiGetCapture) } @@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) { c.Data(http.StatusOK, "application/json", jsonData) } +func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) { + if pm.perfMonitor == nil { + c.String(http.StatusServiceUnavailable, "# performance monitor not available\n") + return + } + pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request) +} + +func (pm *ProxyManager) apiGetPerformance(c *gin.Context) { + if pm.perfMonitor == nil { + c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"}) + return + } + + sysStats, gpuStats := pm.perfMonitor.Current() + + var after time.Time + if afterStr := c.Query("after"); afterStr != "" { + ts, err := time.Parse(time.RFC3339, afterStr) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"}) + return + } + after = ts + } + + if !after.IsZero() { + filtered := make([]perf.SysStat, 0, len(sysStats)) + for _, s := range sysStats { + if s.Timestamp.After(after) { + filtered = append(filtered, s) + } + } + sysStats = filtered + + filteredGpu := make([]perf.GpuStat, 0, len(gpuStats)) + for _, g := range gpuStats { + if g.Timestamp.After(after) { + filteredGpu = append(filteredGpu, g) + } + } + gpuStats = filteredGpu + } + + c.JSON(http.StatusOK, gin.H{ + "sys_stats": sysStats, + "gpu_stats": gpuStats, + }) +} + func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) { requestedModel := strings.TrimPrefix(c.Param("model"), "/") realModelName, found := pm.config.RealModelName(requestedModel) @@ -291,7 +344,7 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) { } capture := pm.metricsMonitor.getCaptureByID(id) - if capture == nil { + if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) { c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"}) return } diff --git a/proxy/proxymanager_loghandlers.go b/proxy/proxymanager_loghandlers.go index b08d5ae2..dc94d6d0 100644 --- a/proxy/proxymanager_loghandlers.go +++ b/proxy/proxymanager_loghandlers.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/gin-gonic/gin" + "github.com/mostlygeek/llama-swap/internal/logmon" ) func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) { @@ -89,7 +90,7 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) { } // getLogger searches for the appropriate logger based on the logMonitorId -func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) { +func (pm *ProxyManager) getLogger(logMonitorId string) (*logmon.Monitor, error) { switch logMonitorId { case "": // maintain the default diff --git a/ui-svelte/package-lock.json b/ui-svelte/package-lock.json index 11674498..7fb60712 100644 --- a/ui-svelte/package-lock.json +++ b/ui-svelte/package-lock.json @@ -8,6 +8,7 @@ "name": "ui-svelte", "version": "0.0.0", "dependencies": { + "chart.js": "4.5.1", "highlight.js": "^11.11.1", "katex": "^0.16.28", "lucide-svelte": "^0.563.0", @@ -120,6 +121,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@kurkle/color": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz", + "integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==", + "license": "MIT" + }, "node_modules/@napi-rs/wasm-runtime": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.3.tgz", @@ -1096,6 +1103,18 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/chart.js": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz", + "integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==", + "license": "MIT", + "dependencies": { + "@kurkle/color": "^0.3.0" + }, + "engines": { + "pnpm": ">=8" + } + }, "node_modules/chokidar": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", diff --git a/ui-svelte/package.json b/ui-svelte/package.json index e5d154e2..efedc65b 100644 --- a/ui-svelte/package.json +++ b/ui-svelte/package.json @@ -35,6 +35,7 @@ "remark-math": "^6.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.1.2", + "chart.js": "4.5.1", "svelte-spa-router": "^4.0.1", "unified": "^11.0.5", "unist-util-visit": "^5.1.0" diff --git a/ui-svelte/src/App.svelte b/ui-svelte/src/App.svelte index f3ca909b..6ea0606c 100644 --- a/ui-svelte/src/App.svelte +++ b/ui-svelte/src/App.svelte @@ -5,6 +5,7 @@ import LogViewer from "./routes/LogViewer.svelte"; import Models from "./routes/Models.svelte"; import Activity from "./routes/Activity.svelte"; + import Performance from "./routes/Performance.svelte"; import Playground from "./routes/Playground.svelte"; import PlaygroundStub from "./routes/PlaygroundStub.svelte"; import { enableAPIEvents } from "./stores/api"; @@ -16,6 +17,7 @@ "/models": Models, "/logs": LogViewer, "/activity": Activity, + "/performance": Performance, "*": PlaygroundStub, }; diff --git a/ui-svelte/src/components/CaptureDialog.svelte b/ui-svelte/src/components/CaptureDialog.svelte index 93458f1f..e4795a1e 100644 --- a/ui-svelte/src/components/CaptureDialog.svelte +++ b/ui-svelte/src/components/CaptureDialog.svelte @@ -427,6 +427,14 @@ + {:else} +
+

Capture not found

+

The capture may have expired or was never recorded.

+
+ +
+
{/if} diff --git a/ui-svelte/src/components/Header.svelte b/ui-svelte/src/components/Header.svelte index 1c9529d0..08bbe5f2 100644 --- a/ui-svelte/src/components/Header.svelte +++ b/ui-svelte/src/components/Header.svelte @@ -84,6 +84,16 @@ > Logs + + Performance + + {/each} + +
+ Refresh: + {#each INTERVALS as intv, i} + + {/each} +
+ + + +

+ This is an experimental feature. Please see issue 596 for instructions. +

+ + +
+

GPU

+ {#if !hasGpuData} +

No GPU data available

+ {:else} +
+ + + + {#if hasVramTemp} + + {/if} + +
+ {/if} +
+ + +
+

System

+
+ +
+ + {#if latestMemSwap} +
+ Mem: {latestMemSwap.mem_used_mb.toLocaleString()} / {latestMemSwap.mem_total_mb.toLocaleString()} MB ({latestMemSwap.mem_used_pct}%) + {#if latestMemSwap.swap_used_pct !== null} + Swap: {latestMemSwap.swap_used_mb.toLocaleString()} / {latestMemSwap.swap_total_mb.toLocaleString()} MB ({latestMemSwap.swap_used_pct}%) + {/if} +
+ {/if} +
+ + {#if netBandwidthDatasets.length > 0} + + {/if} +
+
+ diff --git a/ui-svelte/src/stores/api.ts b/ui-svelte/src/stores/api.ts index 90b24b0d..e77d5b76 100644 --- a/ui-svelte/src/stores/api.ts +++ b/ui-svelte/src/stores/api.ts @@ -7,6 +7,7 @@ import type { APIEventEnvelope, ReqRespCapture, InFlightStats, + PerformanceResponse, } from "../lib/types"; import { connectionState } from "./theme"; @@ -204,3 +205,17 @@ export async function getCapture(id: number): Promise { return null; } } + +export async function fetchPerformance(after?: string): Promise { + try { + const url = after ? `/api/performance?after=${encodeURIComponent(after)}` : "/api/performance"; + const response = await fetch(url); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + return await response.json(); + } catch (error) { + console.error("Failed to fetch performance data:", error); + return null; + } +} diff --git a/ui-svelte/vite.config.ts b/ui-svelte/vite.config.ts index 18dcc3ee..6206c53f 100644 --- a/ui-svelte/vite.config.ts +++ b/ui-svelte/vite.config.ts @@ -30,13 +30,11 @@ export default defineConfig({ // on the public internet for dev?! haha. host: "0.0.0.0", allowedHosts: true, - proxy: { - "/api": "http://localhost:8080", // Proxy API calls to Go backend during development - "/logs": "http://localhost:8080", - "/upstream": "http://localhost:8080", - "/unload": "http://localhost:8080", - "/v1": "http://localhost:8080", - "/sdapi": "http://localhost:8080", - }, + proxy: Object.fromEntries( + ["/api", "/logs", "/upstream", "/unload", "/v1", "/sdapi"].map((path) => [ + path, + process.env.LLAMA_SWAP_URL ?? "http://localhost:8080", + ]), + ), }, });