Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 636b53e70f | |||
| 59cd3b690d | |||
| 5d1e62d224 | |||
| dbb869d019 | |||
| 26bb17e57e | |||
| 2982dd3d40 |
+1
-1
@@ -13,7 +13,7 @@ reviews:
|
||||
docstrings:
|
||||
enabled: false
|
||||
auto_review:
|
||||
enabled: true
|
||||
enabled: false
|
||||
drafts: false
|
||||
chat:
|
||||
auto_reply: true
|
||||
|
||||
@@ -13,11 +13,11 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f #v10.2.0
|
||||
with:
|
||||
days-before-issue-stale: 14
|
||||
days-before-issue-close: 14
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 30
|
||||
stale-issue-label: "stale"
|
||||
stale-issue-message: "This issue is stale because it has been open for 2 weeks with no activity."
|
||||
close-issue-message: "This issue was closed because it has been inactive for 2 weeks since being marked as stale."
|
||||
stale-issue-message: "This issue is stale because it has been open without activity for 30 days. Please remove the stale label if this was an error."
|
||||
close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale."
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
+13
-4
@@ -281,7 +281,7 @@ models:
|
||||
b: 2
|
||||
# objects can contain complex types with macro substitution
|
||||
# becomes: c: [0.7, false, "model: llama"]
|
||||
c: [ "${temp}", false, "model: ${MODEL_ID}" ]
|
||||
c: ["${temp}", false, "model: ${MODEL_ID}"]
|
||||
|
||||
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
||||
# - optional, default: 0
|
||||
@@ -347,11 +347,20 @@ models:
|
||||
# matrix: run concurrent models with a solver-based swap DSL
|
||||
# =============================================================================
|
||||
#
|
||||
# Note:
|
||||
# A config must use either a matrix or legacy groups, not both. A configuration error
|
||||
# will occur if both are defined. Configuration examples for legacy Groups can be found:
|
||||
# Matrix or Groups?
|
||||
#
|
||||
# Groups are available and fully supported. The syntax may be easier to use
|
||||
# for simple use cases.
|
||||
#
|
||||
# Documentation can be found here:
|
||||
# https://github.com/mostlygeek/llama-swap/blob/40e39f7/config.example.yaml#L334-L396
|
||||
#
|
||||
# A config can only use a matrix (recommended) or groups. A configuration error
|
||||
# will occur if both are defined. Groups is legacy but is fully supported with
|
||||
# no plans to deprecate it.
|
||||
#
|
||||
# ~~~~~
|
||||
#
|
||||
# The matrix declares valid combinations of models that can run concurrently.
|
||||
# When a model is requested, the solver finds the cheapest way to make it
|
||||
# available by evicting as few (and least costly) running models as possible.
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package perf
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ParseNvidiaSmiLine parses a single line from nvidia-smi CSV output.
|
||||
// Format: index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw
|
||||
func ParseNvidiaSmiLine(line string) *GpuStat {
|
||||
fields := strings.Split(line, ",")
|
||||
if len(fields) < 9 {
|
||||
return nil
|
||||
}
|
||||
|
||||
id, _ := strconv.Atoi(strings.TrimSpace(fields[0]))
|
||||
name := strings.TrimSpace(fields[1])
|
||||
uuid := strings.TrimSpace(fields[2])
|
||||
tempC, _ := strconv.Atoi(strings.TrimSpace(fields[3]))
|
||||
gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[4]), 64)
|
||||
memUsed, _ := strconv.Atoi(strings.TrimSpace(fields[5]))
|
||||
memTotal, _ := strconv.Atoi(strings.TrimSpace(fields[6]))
|
||||
fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[7]), 64)
|
||||
powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64)
|
||||
|
||||
var memUtil float64
|
||||
if memTotal > 0 {
|
||||
memUtil = float64(memUsed) / float64(memTotal) * 100
|
||||
}
|
||||
|
||||
return &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: id,
|
||||
Name: name,
|
||||
UUID: uuid,
|
||||
TempC: tempC,
|
||||
GpuUtilPct: gpuUtil,
|
||||
MemUtilPct: memUtil,
|
||||
MemUsedMB: memUsed,
|
||||
MemTotalMB: memTotal,
|
||||
FanSpeedPct: fanSpeed,
|
||||
PowerDrawW: powerDraw,
|
||||
}
|
||||
}
|
||||
@@ -224,3 +224,43 @@ func TestCurrent_ConcurrentAccess(t *testing.T) {
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestParseNvidiaSmiLine_ValidLine(t *testing.T) {
|
||||
line := "0, NVIDIA GeForce RTX 3080, GPU-12345678-1234-1234-1234-123456789abc, 65, 80, 8192, 10240, 75, 250"
|
||||
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
require.NotNil(t, stat)
|
||||
|
||||
assert.Equal(t, 0, stat.ID)
|
||||
assert.Equal(t, "NVIDIA GeForce RTX 3080", stat.Name)
|
||||
assert.Equal(t, "GPU-12345678-1234-1234-1234-123456789abc", stat.UUID)
|
||||
assert.Equal(t, 65, stat.TempC)
|
||||
assert.Equal(t, 80.0, stat.GpuUtilPct)
|
||||
assert.Equal(t, 8192, stat.MemUsedMB)
|
||||
assert.Equal(t, 10240, stat.MemTotalMB)
|
||||
assert.Equal(t, 75.0, stat.FanSpeedPct)
|
||||
assert.Equal(t, 250.0, stat.PowerDrawW)
|
||||
assert.InDelta(t, 80.0, stat.MemUtilPct, 0.01)
|
||||
}
|
||||
|
||||
func TestParseNvidiaSmiLine_ShortLine(t *testing.T) {
|
||||
line := "0, NVIDIA GPU, GPU-123"
|
||||
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
assert.Nil(t, stat)
|
||||
}
|
||||
|
||||
func TestParseNvidiaSmiLine_MissingFields(t *testing.T) {
|
||||
line := "0, NVIDIA GPU, GPU-123, 65, 80, 8192, 10240, 75"
|
||||
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
assert.Nil(t, stat)
|
||||
}
|
||||
|
||||
func TestParseNvidiaSmiLine_ZeroMemoryTotal(t *testing.T) {
|
||||
line := "0, NVIDIA GPU, GPU-123, 65, 80, 0, 0, 75, 250"
|
||||
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
require.NotNil(t, stat)
|
||||
assert.Equal(t, 0.0, stat.MemUtilPct)
|
||||
}
|
||||
|
||||
@@ -170,7 +170,7 @@ func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monit
|
||||
continue
|
||||
}
|
||||
|
||||
stat := parseNvidiaSmiLine(line)
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
if stat != nil {
|
||||
select {
|
||||
case ch <- []GpuStat{*stat}:
|
||||
@@ -184,42 +184,6 @@ func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monit
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func parseNvidiaSmiLine(line string) *GpuStat {
|
||||
fields := strings.Split(line, ", ")
|
||||
if len(fields) < 9 {
|
||||
return nil
|
||||
}
|
||||
|
||||
id, _ := strconv.Atoi(strings.TrimSpace(fields[0]))
|
||||
name := strings.TrimSpace(fields[1])
|
||||
uuid := strings.TrimSpace(fields[2])
|
||||
tempC, _ := strconv.Atoi(strings.TrimSpace(fields[3]))
|
||||
gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[4]), 64)
|
||||
memUsed, _ := strconv.Atoi(strings.TrimSpace(fields[5]))
|
||||
memTotal, _ := strconv.Atoi(strings.TrimSpace(fields[6]))
|
||||
fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[7]), 64)
|
||||
powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64)
|
||||
|
||||
var memUtil float64
|
||||
if memTotal > 0 {
|
||||
memUtil = float64(memUsed) / float64(memTotal) * 100
|
||||
}
|
||||
|
||||
return &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: id,
|
||||
Name: name,
|
||||
UUID: uuid,
|
||||
TempC: tempC,
|
||||
GpuUtilPct: gpuUtil,
|
||||
MemUtilPct: memUtil,
|
||||
MemUsedMB: memUsed,
|
||||
MemTotalMB: memTotal,
|
||||
FanSpeedPct: fanSpeed,
|
||||
PowerDrawW: powerDraw,
|
||||
}
|
||||
}
|
||||
|
||||
func tryRocmSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
if _, err := exec.LookPath("rocm-smi"); err != nil {
|
||||
return nil, ErrNoGpuTool
|
||||
@@ -255,13 +219,18 @@ func tryRocmSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor
|
||||
|
||||
stats := make([]GpuStat, 0)
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(out)))
|
||||
var header string
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, "device,") {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "device,") {
|
||||
header = line
|
||||
continue
|
||||
}
|
||||
|
||||
stat := parseRocmSmiLine(line)
|
||||
stat := parseRocmSmiLine(header, line)
|
||||
if stat != nil {
|
||||
stats = append(stats, *stat)
|
||||
}
|
||||
@@ -280,51 +249,99 @@ func tryRocmSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func parseRocmSmiLine(line string) *GpuStat {
|
||||
func parseRocmSmiLine(header string, line string) *GpuStat {
|
||||
if header == "" || line == "" {
|
||||
return nil
|
||||
}
|
||||
labels := strings.Split(header, ",")
|
||||
fields := strings.Split(line, ",")
|
||||
if len(fields) < 20 {
|
||||
if len(labels) != len(fields) {
|
||||
return nil
|
||||
}
|
||||
|
||||
device := strings.TrimSpace(fields[0])
|
||||
id, err := strconv.Atoi(strings.TrimPrefix(device, "card"))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
deviceName := strings.TrimSpace(fields[1])
|
||||
uuid := strings.TrimSpace(fields[5])
|
||||
tempC, _ := strconv.ParseFloat(strings.TrimSpace(fields[6]), 64)
|
||||
vramTempC, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64)
|
||||
fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[10]), 64)
|
||||
powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[12]), 64)
|
||||
gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[13]), 64)
|
||||
memUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[14]), 64)
|
||||
memTotal, _ := strconv.ParseUint(strings.TrimSpace(fields[17]), 10, 64)
|
||||
memUsed, _ := strconv.ParseUint(strings.TrimSpace(fields[18]), 10, 64)
|
||||
cardSeries := strings.TrimSpace(fields[19])
|
||||
name := device
|
||||
if cardSeries != "" && cardSeries != "N/A" {
|
||||
name = cardSeries + " " + device
|
||||
} else if deviceName != "" && deviceName != "N/A" {
|
||||
name = deviceName + " " + device
|
||||
result := &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: -1,
|
||||
}
|
||||
|
||||
var device string
|
||||
var deviceName string
|
||||
var cardSeries string
|
||||
var gfxVersion string
|
||||
|
||||
const toMB = 1024 * 1024
|
||||
|
||||
return &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: id,
|
||||
Name: name,
|
||||
UUID: uuid,
|
||||
TempC: int(tempC),
|
||||
VramTempC: int(vramTempC),
|
||||
GpuUtilPct: gpuUtil,
|
||||
MemUtilPct: memUtil,
|
||||
MemUsedMB: int(memUsed / toMB),
|
||||
MemTotalMB: int(memTotal / toMB),
|
||||
FanSpeedPct: fanSpeed,
|
||||
PowerDrawW: powerDraw,
|
||||
for i, col := range labels {
|
||||
val := strings.TrimSpace(fields[i])
|
||||
switch col {
|
||||
case "device":
|
||||
device = val
|
||||
id, err := strconv.Atoi(strings.TrimPrefix(val, "card"))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
result.ID = id
|
||||
break
|
||||
case "Device Name":
|
||||
deviceName = val
|
||||
break
|
||||
case "GUID":
|
||||
result.UUID = val
|
||||
break
|
||||
case "Temperature (Sensor edge) (C)":
|
||||
tempC, _ := strconv.ParseFloat(val, 64)
|
||||
result.TempC = int(tempC)
|
||||
break
|
||||
case "Temperature (Sensor memory) (C)":
|
||||
vramTempC, _ := strconv.ParseFloat(val, 64)
|
||||
result.VramTempC = int(vramTempC)
|
||||
break
|
||||
case "Fan speed (%)":
|
||||
fanSpeed, _ := strconv.ParseFloat(val, 64)
|
||||
result.FanSpeedPct = fanSpeed
|
||||
break
|
||||
case "Current Socket Graphics Package Power (W)":
|
||||
powerDraw, _ := strconv.ParseFloat(val, 64)
|
||||
result.PowerDrawW = powerDraw
|
||||
break
|
||||
case "GPU use (%)":
|
||||
gpuUtil, _ := strconv.ParseFloat(val, 64)
|
||||
result.GpuUtilPct = gpuUtil
|
||||
break
|
||||
case "GPU Memory Allocated (VRAM%)":
|
||||
memUtil, _ := strconv.ParseFloat(val, 64)
|
||||
result.MemUtilPct = memUtil
|
||||
break
|
||||
case "VRAM Total Memory (B)":
|
||||
memTotal, _ := strconv.ParseUint(val, 10, 64)
|
||||
result.MemTotalMB = int(memTotal / toMB)
|
||||
break
|
||||
case "VRAM Total Used Memory (B)":
|
||||
memUsed, _ := strconv.ParseUint(val, 10, 64)
|
||||
result.MemUsedMB = int(memUsed / toMB)
|
||||
break
|
||||
case "Card Series":
|
||||
cardSeries = val
|
||||
break
|
||||
case "GFX Version":
|
||||
gfxVersion = val
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if result.ID == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
name := device
|
||||
if cardSeries != "" && cardSeries != "N/A" {
|
||||
name = cardSeries + " " + device + " (" + gfxVersion + ")"
|
||||
} else if deviceName != "" && deviceName != "N/A" {
|
||||
name = deviceName + " " + device + " (" + gfxVersion + ")"
|
||||
}
|
||||
result.Name = name
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func trySysfs(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
package perf
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
@@ -11,7 +15,68 @@ import (
|
||||
)
|
||||
|
||||
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
return nil, ErrNotImplemented
|
||||
if ch, err := tryNvidiaSmiWindows(ctx, every, logger); err == nil {
|
||||
logger.Info("using nvidia-smi for GPU monitoring")
|
||||
return ch, nil
|
||||
} else {
|
||||
logger.Debugf("nvidia-smi: %s", err.Error())
|
||||
}
|
||||
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
// tryNvidiaSmiWindows starts nvidia-smi in loop mode on Windows and returns
|
||||
// a channel receiving GPU stat snapshots. Returns ErrNoGpuTool if nvidia-smi
|
||||
// is not available.
|
||||
func tryNvidiaSmiWindows(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
if _, err := exec.LookPath("nvidia-smi"); err != nil {
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
sec := int(every.Seconds())
|
||||
if sec < 1 {
|
||||
sec = 1
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "nvidia-smi",
|
||||
"--query-gpu=index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw",
|
||||
"--format=csv,noheader,nounits",
|
||||
"--loop", fmt.Sprintf("%d", sec),
|
||||
)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("nvidia-smi stdout pipe failed: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("nvidia-smi start failed: %w", err)
|
||||
}
|
||||
|
||||
ch := make(chan []GpuStat, 1)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
stat := ParseNvidiaSmiLine(line)
|
||||
if stat != nil {
|
||||
select {
|
||||
case ch <- []GpuStat{*stat}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
cmd.Wait()
|
||||
}()
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func readSysStats() (SysStat, error) {
|
||||
|
||||
@@ -400,7 +400,10 @@
|
||||
</div>
|
||||
</div>
|
||||
<p class="text-sm text-txtsecondary">
|
||||
This is an experimental feature. Please see <a class="underline hover:text-txtmain" href="https://github.com/mostlygeek/llama-swap/issues/596">issue 596</a> for instructions.
|
||||
This is an experimental feature. Please use <a
|
||||
class="underline hover:text-txtmain"
|
||||
href="https://github.com/mostlygeek/llama-swap/discussions/771">discussion #711</a
|
||||
> for instructions and to share feedback.
|
||||
</p>
|
||||
|
||||
<!-- GPU Section -->
|
||||
|
||||
Reference in New Issue
Block a user