Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 29d3d9ba20 |
@@ -1,6 +1,10 @@
|
|||||||
package perf
|
package perf
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -43,3 +47,168 @@ func ParseNvidiaSmiLine(line string) *GpuStat {
|
|||||||
PowerDrawW: powerDraw,
|
PowerDrawW: powerDraw,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mactopOutput maps the subset of mactop's headless JSON output that is
|
||||||
|
// relevant to GpuStat. Note that mactop's memory object is whole-system memory,
|
||||||
|
// not GPU-attributed; the darwin monitor overlays ioreg's GPU-attributed
|
||||||
|
// unified memory (see overlayIoregMem) so both backends report consistent
|
||||||
|
// memory figures.
|
||||||
|
type mactopOutput struct {
|
||||||
|
SocMetrics struct {
|
||||||
|
GPUPower float64 `json:"gpu_power"`
|
||||||
|
GPUFreq int `json:"gpu_freq_mhz"`
|
||||||
|
GPUTemp float64 `json:"gpu_temp"`
|
||||||
|
} `json:"soc_metrics"`
|
||||||
|
Memory struct {
|
||||||
|
Total uint64 `json:"total"`
|
||||||
|
Used uint64 `json:"used"`
|
||||||
|
} `json:"memory"`
|
||||||
|
GPUUsage float64 `json:"gpu_usage"`
|
||||||
|
SystemInfo struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
GPUCoreCount int `json:"gpu_core_count"`
|
||||||
|
} `json:"system_info"`
|
||||||
|
Fans []struct {
|
||||||
|
RPM int `json:"rpm"`
|
||||||
|
MinRPM int `json:"min_rpm"`
|
||||||
|
MaxRPM int `json:"max_rpm"`
|
||||||
|
} `json:"fans"`
|
||||||
|
Temperatures []struct {
|
||||||
|
Group string `json:"group"`
|
||||||
|
Avg float64 `json:"avg_celsius"`
|
||||||
|
} `json:"temperatures"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ioreg output uses ` = ` (with spaces) for top-level device properties and
|
||||||
|
// `=` (no spaces) for values inside nested dictionaries such as
|
||||||
|
// PerformanceStatistics.
|
||||||
|
var (
|
||||||
|
reIoregModel = regexp.MustCompile(`"model"\s*=\s*"([^"]+)"`)
|
||||||
|
reIoregCoreCount = regexp.MustCompile(`"gpu-core-count"\s*=\s*(\d+)`)
|
||||||
|
reIoregUtil = regexp.MustCompile(`"Device Utilization %"=(\d+)`)
|
||||||
|
reIoregMemUsed = regexp.MustCompile(`"In use system memory"=(\d+)`)
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseIoregOutput parses `ioreg -r -c IOGPU -d 1 -f` output into a GpuStat for
|
||||||
|
// the Apple Silicon integrated GPU. This is a fallback for when mactop is not
|
||||||
|
// installed: utilization and used memory are available, but power, temperature,
|
||||||
|
// and fan speed are not exposed by ioreg. memTotalMB is the unified memory size
|
||||||
|
// supplied by the caller, since Apple Silicon shares memory between CPU and GPU.
|
||||||
|
// Returns nil if no GPU device is found in the output.
|
||||||
|
func ParseIoregOutput(out []byte, memTotalMB int) *GpuStat {
|
||||||
|
utilMatch := reIoregUtil.FindSubmatch(out)
|
||||||
|
memMatch := reIoregMemUsed.FindSubmatch(out)
|
||||||
|
if utilMatch == nil && memMatch == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var gpuUtil float64
|
||||||
|
if utilMatch != nil {
|
||||||
|
gpuUtil, _ = strconv.ParseFloat(string(utilMatch[1]), 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
const toMB = 1024 * 1024
|
||||||
|
var memUsedMB int
|
||||||
|
if memMatch != nil {
|
||||||
|
memUsedBytes, _ := strconv.ParseInt(string(memMatch[1]), 10, 64)
|
||||||
|
memUsedMB = int(memUsedBytes / toMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
var memUtil float64
|
||||||
|
if memTotalMB > 0 {
|
||||||
|
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
name := "Apple GPU"
|
||||||
|
if m := reIoregModel.FindSubmatch(out); m != nil {
|
||||||
|
name = string(m[1])
|
||||||
|
}
|
||||||
|
if m := reIoregCoreCount.FindSubmatch(out); m != nil {
|
||||||
|
if cores, err := strconv.Atoi(string(m[1])); err == nil && cores > 0 {
|
||||||
|
name = fmt.Sprintf("%s (%d-core GPU)", name, cores)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &GpuStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
ID: 0,
|
||||||
|
Name: name,
|
||||||
|
GpuUtilPct: gpuUtil,
|
||||||
|
MemUtilPct: memUtil,
|
||||||
|
MemUsedMB: memUsedMB,
|
||||||
|
MemTotalMB: memTotalMB,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseMactopLine parses a single line of mactop headless JSON output into a
|
||||||
|
// GpuStat for the Apple Silicon integrated GPU. Returns nil if the line cannot
|
||||||
|
// be parsed.
|
||||||
|
func ParseMactopLine(line string) *GpuStat {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var out mactopOutput
|
||||||
|
if err := json.Unmarshal([]byte(line), &out); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const toMB = 1024 * 1024
|
||||||
|
memUsedMB := int(out.Memory.Used / toMB)
|
||||||
|
memTotalMB := int(out.Memory.Total / toMB)
|
||||||
|
|
||||||
|
var memUtil float64
|
||||||
|
if memTotalMB > 0 {
|
||||||
|
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
name := out.SystemInfo.Name
|
||||||
|
if name == "" {
|
||||||
|
name = "Apple GPU"
|
||||||
|
}
|
||||||
|
if out.SystemInfo.GPUCoreCount > 0 {
|
||||||
|
name = fmt.Sprintf("%s (%d-core GPU)", name, out.SystemInfo.GPUCoreCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unified memory has no dedicated VRAM sensor; use the memory temperature
|
||||||
|
// group when mactop exposes it.
|
||||||
|
var vramTempC int
|
||||||
|
for _, t := range out.Temperatures {
|
||||||
|
if strings.EqualFold(t.Group, "Memory") {
|
||||||
|
vramTempC = int(math.Round(t.Avg))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Average fan load across all fans as a percentage of their RPM range.
|
||||||
|
var fanSpeed float64
|
||||||
|
var fanCount int
|
||||||
|
for _, f := range out.Fans {
|
||||||
|
if f.MaxRPM > f.MinRPM {
|
||||||
|
pct := float64(f.RPM-f.MinRPM) / float64(f.MaxRPM-f.MinRPM) * 100
|
||||||
|
if pct < 0 {
|
||||||
|
pct = 0
|
||||||
|
}
|
||||||
|
fanSpeed += pct
|
||||||
|
fanCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if fanCount > 0 {
|
||||||
|
fanSpeed /= float64(fanCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &GpuStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
ID: 0,
|
||||||
|
Name: name,
|
||||||
|
TempC: int(math.Round(out.SocMetrics.GPUTemp)),
|
||||||
|
VramTempC: vramTempC,
|
||||||
|
GpuUtilPct: out.GPUUsage,
|
||||||
|
MemUtilPct: memUtil,
|
||||||
|
MemUsedMB: memUsedMB,
|
||||||
|
MemTotalMB: memTotalMB,
|
||||||
|
FanSpeedPct: fanSpeed,
|
||||||
|
PowerDrawW: out.SocMetrics.GPUPower,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
package perf
|
package perf
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
@@ -11,7 +15,156 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
return nil, ErrNotImplemented
|
if ch, err := tryMactop(ctx, every, logger); err == nil {
|
||||||
|
logger.Info("using mactop for GPU monitoring")
|
||||||
|
return ch, nil
|
||||||
|
} else {
|
||||||
|
logger.Debugf("mactop: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if ch, err := tryIoreg(ctx, every, logger); err == nil {
|
||||||
|
logger.Info("using ioreg for GPU monitoring")
|
||||||
|
return ch, nil
|
||||||
|
} else {
|
||||||
|
logger.Debugf("ioreg: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryIoreg polls `ioreg -r -c IOGPU -d 1 -f` for Apple Silicon GPU stats. It is
|
||||||
|
// a fallback for when mactop is not installed. ioreg exposes GPU utilization and
|
||||||
|
// used memory but not power, temperature, or fan speed.
|
||||||
|
func tryIoreg(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
if _, err := exec.LookPath("ioreg"); err != nil {
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify ioreg actually reports a GPU device before committing to it, so we
|
||||||
|
// can fall through to ErrNoGpuTool otherwise.
|
||||||
|
if stat := sampleIoreg(ctx); stat == nil {
|
||||||
|
return nil, fmt.Errorf("ioreg reported no GPU device")
|
||||||
|
}
|
||||||
|
|
||||||
|
if every < time.Second {
|
||||||
|
every = time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
ch := make(chan []GpuStat, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
ticker := time.NewTicker(every)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
stat := sampleIoreg(ctx)
|
||||||
|
if stat == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case ch <- []GpuStat{*stat}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// sampleIoreg runs ioreg once and parses a single GpuStat, or returns nil.
|
||||||
|
func sampleIoreg(ctx context.Context) *GpuStat {
|
||||||
|
out, err := exec.CommandContext(ctx, "ioreg", "-r", "-c", "IOGPU", "-d", "1", "-f").Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var memTotalMB int
|
||||||
|
if vmStat, err := mem.VirtualMemory(); err == nil {
|
||||||
|
memTotalMB = int(vmStat.Total / (1024 * 1024))
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParseIoregOutput(out, memTotalMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
// overlayIoregMem replaces a GpuStat's memory fields with the GPU-attributed
|
||||||
|
// unified memory reported by ioreg. mactop only exposes whole-system memory, so
|
||||||
|
// without this the mactop and ioreg backends would report different memory
|
||||||
|
// semantics. It is a no-op when ioreg is unavailable or reports no GPU memory,
|
||||||
|
// leaving the mactop-supplied values in place.
|
||||||
|
func overlayIoregMem(ctx context.Context, stat *GpuStat) {
|
||||||
|
ioStat := sampleIoreg(ctx)
|
||||||
|
if ioStat == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
stat.MemUsedMB = ioStat.MemUsedMB
|
||||||
|
stat.MemTotalMB = ioStat.MemTotalMB
|
||||||
|
stat.MemUtilPct = ioStat.MemUtilPct
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryMactop streams Apple Silicon GPU stats from mactop's headless mode.
|
||||||
|
// See https://github.com/metaspartan/mactop. mactop emits one JSON object per
|
||||||
|
// sample to stdout, which we parse into GpuStat.
|
||||||
|
func tryMactop(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
if _, err := exec.LookPath("mactop"); err != nil {
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
// mactop samples power over the interval, so give it at least a second.
|
||||||
|
intervalMs := int(every.Milliseconds())
|
||||||
|
if intervalMs < 1000 {
|
||||||
|
intervalMs = 1000
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "mactop",
|
||||||
|
"--headless",
|
||||||
|
"--format", "json",
|
||||||
|
"--interval", fmt.Sprintf("%d", intervalMs),
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("mactop stdout pipe failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
return nil, fmt.Errorf("mactop start failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ch := make(chan []GpuStat, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
// mactop's JSON objects can be large; allow generous line lengths.
|
||||||
|
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stat := ParseMactopLine(line)
|
||||||
|
if stat != nil {
|
||||||
|
// mactop only reports whole-system memory; overlay ioreg's
|
||||||
|
// GPU-attributed unified memory so both backends are consistent.
|
||||||
|
overlayIoregMem(ctx, stat)
|
||||||
|
select {
|
||||||
|
case ch <- []GpuStat{*stat}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cmd.Wait()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readSysStats() (SysStat, error) {
|
func readSysStats() (SysStat, error) {
|
||||||
|
|||||||
@@ -264,3 +264,50 @@ func TestParseNvidiaSmiLine_ZeroMemoryTotal(t *testing.T) {
|
|||||||
require.NotNil(t, stat)
|
require.NotNil(t, stat)
|
||||||
assert.Equal(t, 0.0, stat.MemUtilPct)
|
assert.Equal(t, 0.0, stat.MemUtilPct)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const ioregSample = `+-o AGXAcceleratorG13X <class AGXAcceleratorG13X, id 0x1000009a1, registered, matched, active, busy 0 (39191 ms), retain 108>
|
||||||
|
{
|
||||||
|
"model" = "Apple M1 Pro"
|
||||||
|
"gpu-core-count" = 16
|
||||||
|
"PerformanceStatistics" = {"In use system memory (driver)"=0,"Alloc system memory"=14511046656,"Tiler Utilization %"=34,"recoveryCount"=0,"Renderer Utilization %"=34,"Device Utilization %"=34,"In use system memory"=7688503296}
|
||||||
|
"IOClass" = "AGXAcceleratorG13X"
|
||||||
|
}`
|
||||||
|
|
||||||
|
func TestParseIoregOutput_ValidOutput(t *testing.T) {
|
||||||
|
const memTotalMB = 32768
|
||||||
|
|
||||||
|
stat := ParseIoregOutput([]byte(ioregSample), memTotalMB)
|
||||||
|
require.NotNil(t, stat)
|
||||||
|
|
||||||
|
assert.Equal(t, 0, stat.ID)
|
||||||
|
assert.Equal(t, "Apple M1 Pro (16-core GPU)", stat.Name)
|
||||||
|
assert.Equal(t, 34.0, stat.GpuUtilPct)
|
||||||
|
assert.Equal(t, 7688503296/(1024*1024), stat.MemUsedMB)
|
||||||
|
assert.Equal(t, memTotalMB, stat.MemTotalMB)
|
||||||
|
assert.InDelta(t, float64(stat.MemUsedMB)/memTotalMB*100, stat.MemUtilPct, 0.01)
|
||||||
|
// Not exposed by ioreg.
|
||||||
|
assert.Equal(t, 0, stat.TempC)
|
||||||
|
assert.Equal(t, 0.0, stat.PowerDrawW)
|
||||||
|
assert.Equal(t, 0.0, stat.FanSpeedPct)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseIoregOutput_NoGpuDevice(t *testing.T) {
|
||||||
|
stat := ParseIoregOutput([]byte("no gpu here"), 32768)
|
||||||
|
assert.Nil(t, stat)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseIoregOutput_ZeroMemTotal(t *testing.T) {
|
||||||
|
stat := ParseIoregOutput([]byte(ioregSample), 0)
|
||||||
|
require.NotNil(t, stat)
|
||||||
|
assert.Equal(t, 0.0, stat.MemUtilPct)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseIoregOutput_MissingModel(t *testing.T) {
|
||||||
|
const out = `"Device Utilization %"=50,"In use system memory"=1048576`
|
||||||
|
|
||||||
|
stat := ParseIoregOutput([]byte(out), 1024)
|
||||||
|
require.NotNil(t, stat)
|
||||||
|
assert.Equal(t, "Apple GPU", stat.Name)
|
||||||
|
assert.Equal(t, 50.0, stat.GpuUtilPct)
|
||||||
|
assert.Equal(t, 1, stat.MemUsedMB)
|
||||||
|
}
|
||||||
|
|||||||
@@ -402,7 +402,7 @@
|
|||||||
<p class="text-sm text-txtsecondary">
|
<p class="text-sm text-txtsecondary">
|
||||||
This is an experimental feature. Please use <a
|
This is an experimental feature. Please use <a
|
||||||
class="underline hover:text-txtmain"
|
class="underline hover:text-txtmain"
|
||||||
href="https://github.com/mostlygeek/llama-swap/discussions/771">discussion #711</a
|
href="https://github.com/mostlygeek/llama-swap/discussions/771">discussion #771</a
|
||||||
> for instructions and to share feedback.
|
> for instructions and to share feedback.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user