0ab214d1c8
Add GPU monitoring support for AMD and Intel GPUs on Windows using D3DKMT (DirectX) and PDH performance counters. - Add PDH-based GPU utilization via \GPU Engine(*)\Utilization Percentage counter, summing all engine types per adapter (3D, Compute, Copy, Video). - Add D3DKMT bindings for adapter enumeration, memory segments, and adapter perf data. - Use PDH as primary utilization source (works on all vendors), with D3DKMT RunningTime as fallback for systems without PDH counters. - Prefer nvidia-smi when available, fall back to D3DKMT + PDH for AMD/Intel. - Backend priority: nvidia-smi -> D3DKMT + PDH -> ErrNoGpuTool. Verified on AMD 7900XTX GPU with llama.cpp Vulkan & ROCm backend: GPU utilization correctly shows ~99% during inference, ~0-2% when idle. --- LLM disclosure: GLM 5.1 & Kimi K2.6 have been used extensively during exploration and coding to the point that the LLM's wrote over 3/4 of the code, and I have done additional verification myself. As such, it should be considered experimental. Additional verification is needed. I have tested it on my 7900XTX system with Windows 11, and it works correctly, but as I only have this one rig, I cannot verify it everywhere.
160 lines
3.9 KiB
Go
160 lines
3.9 KiB
Go
//go:build windows
|
|
|
|
package perf
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
"unsafe"
|
|
|
|
"golang.org/x/sys/windows"
|
|
)
|
|
|
|
var (
|
|
pdhDLL = windows.NewLazySystemDLL("pdh.dll")
|
|
procPdhOpenQuery = pdhDLL.NewProc("PdhOpenQueryW")
|
|
procPdhAddEnglishCounter = pdhDLL.NewProc("PdhAddEnglishCounterW")
|
|
procPdhCollectQueryData = pdhDLL.NewProc("PdhCollectQueryData")
|
|
procPdhGetFormattedCounterArray = pdhDLL.NewProc("PdhGetFormattedCounterArrayW")
|
|
procPdhCloseQuery = pdhDLL.NewProc("PdhCloseQuery")
|
|
)
|
|
|
|
const (
|
|
pdhFmtDouble = 0x00000200
|
|
pdhMoreData = 0x800007D2
|
|
pdhNoData = 0x800007D5
|
|
)
|
|
|
|
type pdhCounterValue struct {
|
|
CStatus uint32
|
|
DblVal float64
|
|
}
|
|
|
|
type pdhCounterValueItem struct {
|
|
SzName *uint16
|
|
FmtValue pdhCounterValue
|
|
}
|
|
|
|
func init() {
|
|
var item pdhCounterValueItem
|
|
if unsafe.Sizeof(item) != 24 {
|
|
panic(fmt.Sprintf("pdhCounterValueItem size %d != expected 24 on x64", unsafe.Sizeof(item)))
|
|
}
|
|
}
|
|
|
|
type pdhGpuUtil struct {
|
|
query uintptr
|
|
counter uintptr
|
|
}
|
|
|
|
// initPdhGpuUtil creates a PDH query for the GPU Engine utilization counter.
|
|
// Returns nil with an error if PDH or the counter is unavailable.
|
|
func initPdhGpuUtil() (*pdhGpuUtil, error) {
|
|
var query uintptr
|
|
if ret, _, _ := procPdhOpenQuery.Call(0, 0, uintptr(unsafe.Pointer(&query))); ret != 0 {
|
|
return nil, fmt.Errorf("PdhOpenQuery: 0x%x", ret)
|
|
}
|
|
|
|
path, _ := windows.UTF16PtrFromString(`\GPU Engine(*)\Utilization Percentage`)
|
|
var counter uintptr
|
|
if ret, _, _ := procPdhAddEnglishCounter.Call(
|
|
query, uintptr(unsafe.Pointer(path)), 0, uintptr(unsafe.Pointer(&counter)),
|
|
); ret != 0 {
|
|
procPdhCloseQuery.Call(query)
|
|
return nil, fmt.Errorf("PdhAddEnglishCounter(GPU Engine): 0x%x", ret)
|
|
}
|
|
|
|
procPdhCollectQueryData.Call(query)
|
|
|
|
return &pdhGpuUtil{query: query, counter: counter}, nil
|
|
}
|
|
|
|
// close releases the PDH query handle.
|
|
func (p *pdhGpuUtil) close() {
|
|
if p.query != 0 {
|
|
procPdhCloseQuery.Call(p.query)
|
|
p.query = 0
|
|
}
|
|
}
|
|
|
|
// collect reads the PDH counter and returns a map of adapter LUID to
|
|
// aggregated GPU utilization percentage, summed across all engine instances
|
|
// per adapter and clamped to 100%.
|
|
func (p *pdhGpuUtil) collect() map[LUID]float64 {
|
|
ret, _, _ := procPdhCollectQueryData.Call(p.query)
|
|
if ret != 0 && ret != pdhNoData {
|
|
return nil
|
|
}
|
|
|
|
var bufSize uint32
|
|
var itemCount uint32
|
|
ret, _, _ = procPdhGetFormattedCounterArray.Call(
|
|
p.counter, pdhFmtDouble,
|
|
uintptr(unsafe.Pointer(&bufSize)),
|
|
uintptr(unsafe.Pointer(&itemCount)),
|
|
0,
|
|
)
|
|
if ret != pdhMoreData || itemCount == 0 {
|
|
return nil
|
|
}
|
|
|
|
buf := make([]byte, bufSize)
|
|
ret, _, _ = procPdhGetFormattedCounterArray.Call(
|
|
p.counter, pdhFmtDouble,
|
|
uintptr(unsafe.Pointer(&bufSize)),
|
|
uintptr(unsafe.Pointer(&itemCount)),
|
|
uintptr(unsafe.Pointer(&buf[0])),
|
|
)
|
|
if ret != 0 {
|
|
return nil
|
|
}
|
|
|
|
itemSize := uint32(unsafe.Sizeof(pdhCounterValueItem{}))
|
|
result := make(map[LUID]float64)
|
|
|
|
for i := uint32(0); i < itemCount; i++ {
|
|
item := (*pdhCounterValueItem)(unsafe.Pointer(&buf[i*itemSize]))
|
|
if item.FmtValue.CStatus != 0 {
|
|
continue
|
|
}
|
|
luid, ok := parsePdhLuid(windows.UTF16PtrToString(item.SzName))
|
|
if !ok {
|
|
continue
|
|
}
|
|
result[luid] += item.FmtValue.DblVal
|
|
}
|
|
|
|
for luid := range result {
|
|
if result[luid] > 100.0 {
|
|
result[luid] = 100.0
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// parsePdhLuid extracts the adapter LUID (high and low parts) from a PDH
|
|
// GPU Engine instance name (e.g. "pid_1234_luid_0x00000000_0x000148BF_phys_0_eng_2_engtype_Compute").
|
|
func parsePdhLuid(name string) (LUID, bool) {
|
|
idx := strings.Index(name, "luid_0x")
|
|
if idx < 0 {
|
|
return LUID{}, false
|
|
}
|
|
rest := name[idx+7:]
|
|
parts := strings.SplitN(rest, "_", 4)
|
|
if len(parts) < 3 {
|
|
return LUID{}, false
|
|
}
|
|
hp, err := strconv.ParseUint(parts[0], 16, 32)
|
|
if err != nil {
|
|
return LUID{}, false
|
|
}
|
|
lpStr := strings.TrimPrefix(parts[1], "0x")
|
|
lp, err := strconv.ParseUint(lpStr, 16, 32)
|
|
if err != nil {
|
|
return LUID{}, false
|
|
}
|
|
return LUID{LowPart: uint32(lp), HighPart: int32(hp)}, true
|
|
}
|