Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 29d3d9ba20 | |||
| 9be9a87fa0 | |||
| 6ea551362e | |||
| 03d58e53fa | |||
| c790d0ee03 | |||
| 4ca9c478a2 | |||
| 146a9eab24 |
@@ -32,11 +32,9 @@ jobs:
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # 6.4.0
|
||||
with:
|
||||
node-version: "24"
|
||||
- name: Install dependencies and build UI
|
||||
- name: Build UI
|
||||
run: |
|
||||
cd ui-svelte
|
||||
npm ci
|
||||
npm run build
|
||||
make ui
|
||||
|
||||
- name: Run GoReleaser
|
||||
uses: goreleaser/goreleaser-action@1a80836c5c9d9e5755a25cb59ec6f45a3b5f41a8 #7.2.1
|
||||
|
||||
@@ -8,4 +8,3 @@ dist/
|
||||
|
||||
# UI build output; placeholder.txt is kept so the go:embed succeeds.
|
||||
internal/server/ui_dist/*
|
||||
!internal/server/ui_dist/placeholder.txt
|
||||
|
||||
@@ -41,8 +41,7 @@ ui/node_modules:
|
||||
# build react UI
|
||||
ui: ui/node_modules
|
||||
cd ui-svelte && npm run build
|
||||
mkdir -p internal/server/ui_dist
|
||||
cp -R proxy/ui_dist/. internal/server/ui_dist/
|
||||
touch internal/server/ui_dist/placeholder.txt
|
||||
|
||||
# Build OSX binary
|
||||
mac: ui
|
||||
|
||||
@@ -15,6 +15,7 @@ require (
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
github.com/tidwall/sjson v1.2.5
|
||||
golang.org/x/sync v0.20.0
|
||||
golang.org/x/sys v0.41.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
@@ -70,7 +71,6 @@ require (
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.45.0 // indirect
|
||||
golang.org/x/net v0.47.0 // indirect
|
||||
golang.org/x/sys v0.41.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
google.golang.org/protobuf v1.34.1 // indirect
|
||||
)
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
package perf
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -43,3 +47,168 @@ func ParseNvidiaSmiLine(line string) *GpuStat {
|
||||
PowerDrawW: powerDraw,
|
||||
}
|
||||
}
|
||||
|
||||
// mactopOutput maps the subset of mactop's headless JSON output that is
|
||||
// relevant to GpuStat. Note that mactop's memory object is whole-system memory,
|
||||
// not GPU-attributed; the darwin monitor overlays ioreg's GPU-attributed
|
||||
// unified memory (see overlayIoregMem) so both backends report consistent
|
||||
// memory figures.
|
||||
type mactopOutput struct {
|
||||
SocMetrics struct {
|
||||
GPUPower float64 `json:"gpu_power"`
|
||||
GPUFreq int `json:"gpu_freq_mhz"`
|
||||
GPUTemp float64 `json:"gpu_temp"`
|
||||
} `json:"soc_metrics"`
|
||||
Memory struct {
|
||||
Total uint64 `json:"total"`
|
||||
Used uint64 `json:"used"`
|
||||
} `json:"memory"`
|
||||
GPUUsage float64 `json:"gpu_usage"`
|
||||
SystemInfo struct {
|
||||
Name string `json:"name"`
|
||||
GPUCoreCount int `json:"gpu_core_count"`
|
||||
} `json:"system_info"`
|
||||
Fans []struct {
|
||||
RPM int `json:"rpm"`
|
||||
MinRPM int `json:"min_rpm"`
|
||||
MaxRPM int `json:"max_rpm"`
|
||||
} `json:"fans"`
|
||||
Temperatures []struct {
|
||||
Group string `json:"group"`
|
||||
Avg float64 `json:"avg_celsius"`
|
||||
} `json:"temperatures"`
|
||||
}
|
||||
|
||||
// ioreg output uses ` = ` (with spaces) for top-level device properties and
|
||||
// `=` (no spaces) for values inside nested dictionaries such as
|
||||
// PerformanceStatistics.
|
||||
var (
|
||||
reIoregModel = regexp.MustCompile(`"model"\s*=\s*"([^"]+)"`)
|
||||
reIoregCoreCount = regexp.MustCompile(`"gpu-core-count"\s*=\s*(\d+)`)
|
||||
reIoregUtil = regexp.MustCompile(`"Device Utilization %"=(\d+)`)
|
||||
reIoregMemUsed = regexp.MustCompile(`"In use system memory"=(\d+)`)
|
||||
)
|
||||
|
||||
// ParseIoregOutput parses `ioreg -r -c IOGPU -d 1 -f` output into a GpuStat for
|
||||
// the Apple Silicon integrated GPU. This is a fallback for when mactop is not
|
||||
// installed: utilization and used memory are available, but power, temperature,
|
||||
// and fan speed are not exposed by ioreg. memTotalMB is the unified memory size
|
||||
// supplied by the caller, since Apple Silicon shares memory between CPU and GPU.
|
||||
// Returns nil if no GPU device is found in the output.
|
||||
func ParseIoregOutput(out []byte, memTotalMB int) *GpuStat {
|
||||
utilMatch := reIoregUtil.FindSubmatch(out)
|
||||
memMatch := reIoregMemUsed.FindSubmatch(out)
|
||||
if utilMatch == nil && memMatch == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var gpuUtil float64
|
||||
if utilMatch != nil {
|
||||
gpuUtil, _ = strconv.ParseFloat(string(utilMatch[1]), 64)
|
||||
}
|
||||
|
||||
const toMB = 1024 * 1024
|
||||
var memUsedMB int
|
||||
if memMatch != nil {
|
||||
memUsedBytes, _ := strconv.ParseInt(string(memMatch[1]), 10, 64)
|
||||
memUsedMB = int(memUsedBytes / toMB)
|
||||
}
|
||||
|
||||
var memUtil float64
|
||||
if memTotalMB > 0 {
|
||||
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
|
||||
}
|
||||
|
||||
name := "Apple GPU"
|
||||
if m := reIoregModel.FindSubmatch(out); m != nil {
|
||||
name = string(m[1])
|
||||
}
|
||||
if m := reIoregCoreCount.FindSubmatch(out); m != nil {
|
||||
if cores, err := strconv.Atoi(string(m[1])); err == nil && cores > 0 {
|
||||
name = fmt.Sprintf("%s (%d-core GPU)", name, cores)
|
||||
}
|
||||
}
|
||||
|
||||
return &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: 0,
|
||||
Name: name,
|
||||
GpuUtilPct: gpuUtil,
|
||||
MemUtilPct: memUtil,
|
||||
MemUsedMB: memUsedMB,
|
||||
MemTotalMB: memTotalMB,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseMactopLine parses a single line of mactop headless JSON output into a
|
||||
// GpuStat for the Apple Silicon integrated GPU. Returns nil if the line cannot
|
||||
// be parsed.
|
||||
func ParseMactopLine(line string) *GpuStat {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var out mactopOutput
|
||||
if err := json.Unmarshal([]byte(line), &out); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
const toMB = 1024 * 1024
|
||||
memUsedMB := int(out.Memory.Used / toMB)
|
||||
memTotalMB := int(out.Memory.Total / toMB)
|
||||
|
||||
var memUtil float64
|
||||
if memTotalMB > 0 {
|
||||
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
|
||||
}
|
||||
|
||||
name := out.SystemInfo.Name
|
||||
if name == "" {
|
||||
name = "Apple GPU"
|
||||
}
|
||||
if out.SystemInfo.GPUCoreCount > 0 {
|
||||
name = fmt.Sprintf("%s (%d-core GPU)", name, out.SystemInfo.GPUCoreCount)
|
||||
}
|
||||
|
||||
// Unified memory has no dedicated VRAM sensor; use the memory temperature
|
||||
// group when mactop exposes it.
|
||||
var vramTempC int
|
||||
for _, t := range out.Temperatures {
|
||||
if strings.EqualFold(t.Group, "Memory") {
|
||||
vramTempC = int(math.Round(t.Avg))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Average fan load across all fans as a percentage of their RPM range.
|
||||
var fanSpeed float64
|
||||
var fanCount int
|
||||
for _, f := range out.Fans {
|
||||
if f.MaxRPM > f.MinRPM {
|
||||
pct := float64(f.RPM-f.MinRPM) / float64(f.MaxRPM-f.MinRPM) * 100
|
||||
if pct < 0 {
|
||||
pct = 0
|
||||
}
|
||||
fanSpeed += pct
|
||||
fanCount++
|
||||
}
|
||||
}
|
||||
if fanCount > 0 {
|
||||
fanSpeed /= float64(fanCount)
|
||||
}
|
||||
|
||||
return &GpuStat{
|
||||
Timestamp: time.Now(),
|
||||
ID: 0,
|
||||
Name: name,
|
||||
TempC: int(math.Round(out.SocMetrics.GPUTemp)),
|
||||
VramTempC: vramTempC,
|
||||
GpuUtilPct: out.GPUUsage,
|
||||
MemUtilPct: memUtil,
|
||||
MemUsedMB: memUsedMB,
|
||||
MemTotalMB: memTotalMB,
|
||||
FanSpeedPct: fanSpeed,
|
||||
PowerDrawW: out.SocMetrics.GPUPower,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
package perf
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
@@ -11,7 +15,156 @@ import (
|
||||
)
|
||||
|
||||
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
return nil, ErrNotImplemented
|
||||
if ch, err := tryMactop(ctx, every, logger); err == nil {
|
||||
logger.Info("using mactop for GPU monitoring")
|
||||
return ch, nil
|
||||
} else {
|
||||
logger.Debugf("mactop: %s", err.Error())
|
||||
}
|
||||
|
||||
if ch, err := tryIoreg(ctx, every, logger); err == nil {
|
||||
logger.Info("using ioreg for GPU monitoring")
|
||||
return ch, nil
|
||||
} else {
|
||||
logger.Debugf("ioreg: %s", err.Error())
|
||||
}
|
||||
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
// tryIoreg polls `ioreg -r -c IOGPU -d 1 -f` for Apple Silicon GPU stats. It is
|
||||
// a fallback for when mactop is not installed. ioreg exposes GPU utilization and
|
||||
// used memory but not power, temperature, or fan speed.
|
||||
func tryIoreg(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
if _, err := exec.LookPath("ioreg"); err != nil {
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
// Verify ioreg actually reports a GPU device before committing to it, so we
|
||||
// can fall through to ErrNoGpuTool otherwise.
|
||||
if stat := sampleIoreg(ctx); stat == nil {
|
||||
return nil, fmt.Errorf("ioreg reported no GPU device")
|
||||
}
|
||||
|
||||
if every < time.Second {
|
||||
every = time.Second
|
||||
}
|
||||
|
||||
ch := make(chan []GpuStat, 1)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
ticker := time.NewTicker(every)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
stat := sampleIoreg(ctx)
|
||||
if stat == nil {
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case ch <- []GpuStat{*stat}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// sampleIoreg runs ioreg once and parses a single GpuStat, or returns nil.
|
||||
func sampleIoreg(ctx context.Context) *GpuStat {
|
||||
out, err := exec.CommandContext(ctx, "ioreg", "-r", "-c", "IOGPU", "-d", "1", "-f").Output()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var memTotalMB int
|
||||
if vmStat, err := mem.VirtualMemory(); err == nil {
|
||||
memTotalMB = int(vmStat.Total / (1024 * 1024))
|
||||
}
|
||||
|
||||
return ParseIoregOutput(out, memTotalMB)
|
||||
}
|
||||
|
||||
// overlayIoregMem replaces a GpuStat's memory fields with the GPU-attributed
|
||||
// unified memory reported by ioreg. mactop only exposes whole-system memory, so
|
||||
// without this the mactop and ioreg backends would report different memory
|
||||
// semantics. It is a no-op when ioreg is unavailable or reports no GPU memory,
|
||||
// leaving the mactop-supplied values in place.
|
||||
func overlayIoregMem(ctx context.Context, stat *GpuStat) {
|
||||
ioStat := sampleIoreg(ctx)
|
||||
if ioStat == nil {
|
||||
return
|
||||
}
|
||||
stat.MemUsedMB = ioStat.MemUsedMB
|
||||
stat.MemTotalMB = ioStat.MemTotalMB
|
||||
stat.MemUtilPct = ioStat.MemUtilPct
|
||||
}
|
||||
|
||||
// tryMactop streams Apple Silicon GPU stats from mactop's headless mode.
|
||||
// See https://github.com/metaspartan/mactop. mactop emits one JSON object per
|
||||
// sample to stdout, which we parse into GpuStat.
|
||||
func tryMactop(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
if _, err := exec.LookPath("mactop"); err != nil {
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
// mactop samples power over the interval, so give it at least a second.
|
||||
intervalMs := int(every.Milliseconds())
|
||||
if intervalMs < 1000 {
|
||||
intervalMs = 1000
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "mactop",
|
||||
"--headless",
|
||||
"--format", "json",
|
||||
"--interval", fmt.Sprintf("%d", intervalMs),
|
||||
)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mactop stdout pipe failed: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("mactop start failed: %w", err)
|
||||
}
|
||||
|
||||
ch := make(chan []GpuStat, 1)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
// mactop's JSON objects can be large; allow generous line lengths.
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
stat := ParseMactopLine(line)
|
||||
if stat != nil {
|
||||
// mactop only reports whole-system memory; overlay ioreg's
|
||||
// GPU-attributed unified memory so both backends are consistent.
|
||||
overlayIoregMem(ctx, stat)
|
||||
select {
|
||||
case ch <- []GpuStat{*stat}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
cmd.Wait()
|
||||
}()
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
func readSysStats() (SysStat, error) {
|
||||
|
||||
@@ -264,3 +264,50 @@ func TestParseNvidiaSmiLine_ZeroMemoryTotal(t *testing.T) {
|
||||
require.NotNil(t, stat)
|
||||
assert.Equal(t, 0.0, stat.MemUtilPct)
|
||||
}
|
||||
|
||||
const ioregSample = `+-o AGXAcceleratorG13X <class AGXAcceleratorG13X, id 0x1000009a1, registered, matched, active, busy 0 (39191 ms), retain 108>
|
||||
{
|
||||
"model" = "Apple M1 Pro"
|
||||
"gpu-core-count" = 16
|
||||
"PerformanceStatistics" = {"In use system memory (driver)"=0,"Alloc system memory"=14511046656,"Tiler Utilization %"=34,"recoveryCount"=0,"Renderer Utilization %"=34,"Device Utilization %"=34,"In use system memory"=7688503296}
|
||||
"IOClass" = "AGXAcceleratorG13X"
|
||||
}`
|
||||
|
||||
func TestParseIoregOutput_ValidOutput(t *testing.T) {
|
||||
const memTotalMB = 32768
|
||||
|
||||
stat := ParseIoregOutput([]byte(ioregSample), memTotalMB)
|
||||
require.NotNil(t, stat)
|
||||
|
||||
assert.Equal(t, 0, stat.ID)
|
||||
assert.Equal(t, "Apple M1 Pro (16-core GPU)", stat.Name)
|
||||
assert.Equal(t, 34.0, stat.GpuUtilPct)
|
||||
assert.Equal(t, 7688503296/(1024*1024), stat.MemUsedMB)
|
||||
assert.Equal(t, memTotalMB, stat.MemTotalMB)
|
||||
assert.InDelta(t, float64(stat.MemUsedMB)/memTotalMB*100, stat.MemUtilPct, 0.01)
|
||||
// Not exposed by ioreg.
|
||||
assert.Equal(t, 0, stat.TempC)
|
||||
assert.Equal(t, 0.0, stat.PowerDrawW)
|
||||
assert.Equal(t, 0.0, stat.FanSpeedPct)
|
||||
}
|
||||
|
||||
func TestParseIoregOutput_NoGpuDevice(t *testing.T) {
|
||||
stat := ParseIoregOutput([]byte("no gpu here"), 32768)
|
||||
assert.Nil(t, stat)
|
||||
}
|
||||
|
||||
func TestParseIoregOutput_ZeroMemTotal(t *testing.T) {
|
||||
stat := ParseIoregOutput([]byte(ioregSample), 0)
|
||||
require.NotNil(t, stat)
|
||||
assert.Equal(t, 0.0, stat.MemUtilPct)
|
||||
}
|
||||
|
||||
func TestParseIoregOutput_MissingModel(t *testing.T) {
|
||||
const out = `"Device Utilization %"=50,"In use system memory"=1048576`
|
||||
|
||||
stat := ParseIoregOutput([]byte(out), 1024)
|
||||
require.NotNil(t, stat)
|
||||
assert.Equal(t, "Apple GPU", stat.Name)
|
||||
assert.Equal(t, 50.0, stat.GpuUtilPct)
|
||||
assert.Equal(t, 1, stat.MemUsedMB)
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
@@ -22,6 +21,22 @@ import (
|
||||
|
||||
var ErrStartAborted = fmt.Errorf("aborted")
|
||||
|
||||
// cmdWaitDelay is the upper bound the runtime will wait for child I/O to
|
||||
// drain after the process exits before force-closing the stdout/stderr
|
||||
// pipes. Required so that cmd.Wait() returns even when a forked grandchild
|
||||
// inherits and holds the pipes open (e.g. a shell wrapper that backgrounds
|
||||
// the real binary). killProcess sends the stop signal directly (not via the
|
||||
// cmd context), so this delay is measured from process exit rather than from
|
||||
// the stop request, and stays independent of the caller's graceful timeout.
|
||||
const cmdWaitDelay = 10 * time.Second
|
||||
|
||||
// parentCancelGraceTimeout is the graceful timeout used when the process is
|
||||
// torn down because parentCtx was cancelled (final router teardown or app
|
||||
// shutdown). In the normal flow the process has already been stopped via
|
||||
// Stop() by this point, so killProcess is a no-op kill; the short grace just
|
||||
// bounds the rare case where a process is still alive when its context is cut.
|
||||
const parentCancelGraceTimeout = time.Second
|
||||
|
||||
type runReq struct {
|
||||
timeout time.Duration
|
||||
respond chan error
|
||||
@@ -39,6 +54,7 @@ type waitReadyReq struct {
|
||||
type startResult struct {
|
||||
cmd *exec.Cmd
|
||||
cmdDone chan struct{}
|
||||
cancel context.CancelFunc
|
||||
handlerFn http.HandlerFunc
|
||||
err error
|
||||
}
|
||||
@@ -51,6 +67,11 @@ type ProcessCommand struct {
|
||||
processLogger *logmon.Monitor
|
||||
proxyLogger *logmon.Monitor
|
||||
|
||||
// waitDelay is assigned to cmd.WaitDelay when starting the upstream
|
||||
// process. Defaults to cmdWaitDelay; tests override it to keep the
|
||||
// pipe-close backstop from dominating their runtime.
|
||||
waitDelay time.Duration
|
||||
|
||||
runCh chan runReq
|
||||
stopCh chan stopReq
|
||||
waitReadyCh chan waitReadyReq
|
||||
@@ -85,6 +106,7 @@ func New(
|
||||
runCh: make(chan runReq),
|
||||
stopCh: make(chan stopReq),
|
||||
waitReadyCh: make(chan waitReadyReq),
|
||||
waitDelay: cmdWaitDelay,
|
||||
}
|
||||
p.state.Store(StateStopped)
|
||||
|
||||
@@ -122,6 +144,7 @@ func (p *ProcessCommand) run() {
|
||||
var (
|
||||
cmd *exec.Cmd
|
||||
cmdDone <-chan struct{}
|
||||
cmdCancel context.CancelFunc
|
||||
readyWaiters []waitReadyReq
|
||||
// runResp parks the in-flight Run caller's response channel. The
|
||||
// interface contract is that Run blocks until the process is
|
||||
@@ -164,9 +187,10 @@ func (p *ProcessCommand) run() {
|
||||
setState(StateShutdown)
|
||||
if cmd != nil {
|
||||
p.handler.Store(nil)
|
||||
p.killProcess(cmd, cmdDone, 100*time.Millisecond)
|
||||
p.killProcess(cmd, cmdCancel, cmdDone, parentCancelGraceTimeout)
|
||||
cmd = nil
|
||||
cmdDone = nil
|
||||
cmdCancel = nil
|
||||
}
|
||||
notifyWaiters(fmt.Errorf("[%s] shutdown", p.id))
|
||||
respondRun(fmt.Errorf("[%s] shutdown", p.id))
|
||||
@@ -177,8 +201,12 @@ func (p *ProcessCommand) run() {
|
||||
// cmdDone is nil while no process is running, so this case is
|
||||
// dormant outside of StateReady.
|
||||
case <-cmdDone:
|
||||
if cmdCancel != nil {
|
||||
cmdCancel()
|
||||
}
|
||||
cmd = nil
|
||||
cmdDone = nil
|
||||
cmdCancel = nil
|
||||
p.handler.Store(nil)
|
||||
setState(StateStopped)
|
||||
respondRun(fmt.Errorf("[%s] upstream exited unexpectedly", p.id))
|
||||
@@ -226,6 +254,7 @@ func (p *ProcessCommand) run() {
|
||||
if res.err == nil {
|
||||
cmd = res.cmd
|
||||
cmdDone = res.cmdDone
|
||||
cmdCancel = res.cancel
|
||||
fn := res.handlerFn
|
||||
p.handler.Store(&fn)
|
||||
setState(StateReady)
|
||||
@@ -273,7 +302,7 @@ func (p *ProcessCommand) run() {
|
||||
cancelStart()
|
||||
res := <-resultCh
|
||||
if res.cmd != nil {
|
||||
p.killProcess(res.cmd, res.cmdDone, stop.timeout)
|
||||
p.killProcess(res.cmd, res.cancel, res.cmdDone, stop.timeout)
|
||||
}
|
||||
setState(StateStopped)
|
||||
notifyWaiters(ErrStartAborted)
|
||||
@@ -293,7 +322,7 @@ func (p *ProcessCommand) run() {
|
||||
setState(StateShutdown)
|
||||
res := <-resultCh
|
||||
if res.cmd != nil {
|
||||
p.killProcess(res.cmd, res.cmdDone, 100*time.Millisecond)
|
||||
p.killProcess(res.cmd, res.cancel, res.cmdDone, parentCancelGraceTimeout)
|
||||
}
|
||||
notifyWaiters(fmt.Errorf("[%s] shutdown", p.id))
|
||||
respondRun(fmt.Errorf("[%s] shutdown", p.id))
|
||||
@@ -310,9 +339,10 @@ func (p *ProcessCommand) run() {
|
||||
case stop := <-p.stopCh:
|
||||
if cmd != nil {
|
||||
setState(StateStopping)
|
||||
p.killProcess(cmd, cmdDone, stop.timeout)
|
||||
p.killProcess(cmd, cmdCancel, cmdDone, stop.timeout)
|
||||
cmd = nil
|
||||
cmdDone = nil
|
||||
cmdCancel = nil
|
||||
p.handler.Store(nil)
|
||||
}
|
||||
// Stop is a no-op (and not an error) when already Stopped — this
|
||||
@@ -377,46 +407,71 @@ func (p *ProcessCommand) doStart(startCtx context.Context, healthCheckTimeout ti
|
||||
reverseProxy.ServeHTTP(w, r)
|
||||
})
|
||||
|
||||
cmd := exec.Command(args[0], args[1:]...)
|
||||
// cmdCtx + cmd.Cancel are wired as a safety net: if the context is ever
|
||||
// cancelled while the process is alive, cmd.Cancel sends SIGTERM / CmdStop
|
||||
// and the runtime escalates to SIGKILL after cmd.WaitDelay. In the normal
|
||||
// teardown path killProcess sends the stop signal directly instead, so
|
||||
// cmd.WaitDelay only acts as the inherited-pipe backstop measured from
|
||||
// process exit (see killProcess).
|
||||
cmdCtx, cmdCancel := context.WithCancel(context.Background())
|
||||
cmd := exec.CommandContext(cmdCtx, args[0], args[1:]...)
|
||||
cmd.Stderr = p.processLogger
|
||||
cmd.Stdout = p.processLogger
|
||||
cmd.Env = append(cmd.Environ(), p.config.Env...)
|
||||
cmd.Cancel = func() error { return p.sendStopSignal(cmd) }
|
||||
cmd.WaitDelay = p.waitDelay
|
||||
setProcAttributes(cmd)
|
||||
|
||||
p.proxyLogger.Debugf("<%s> Executing start command: %s, env: %s", p.id, strings.Join(args, " "), strings.Join(p.config.Env, ", "))
|
||||
|
||||
cmdDone := make(chan struct{})
|
||||
if err := cmd.Start(); err != nil {
|
||||
cmdCancel()
|
||||
return startResult{err: fmt.Errorf("failed to start command '%s': %w", strings.Join(args, " "), err)}
|
||||
}
|
||||
|
||||
go func() {
|
||||
waitErr := cmd.Wait()
|
||||
if exitErr, ok := waitErr.(*exec.ExitError); ok {
|
||||
p.proxyLogger.Debugf("<%s> process exited: code=%d, err=%v", p.id, exitErr.ExitCode(), waitErr)
|
||||
} else if waitErr != nil {
|
||||
p.proxyLogger.Debugf("<%s> process exited with error: %v", p.id, waitErr)
|
||||
} else {
|
||||
switch st := p.State(); {
|
||||
case waitErr == nil:
|
||||
p.proxyLogger.Debugf("<%s> process exited cleanly", p.id)
|
||||
case st == StateStopping || st == StateShutdown:
|
||||
// Expected: we force-terminated the process. A forced kill exits
|
||||
// the child with a non-zero code (e.g. taskkill /f on Windows
|
||||
// yields exit status 1), so this is not an error.
|
||||
p.proxyLogger.Debugf("<%s> process stopped by llama-swap: %v", p.id, waitErr)
|
||||
default:
|
||||
if exitErr, ok := waitErr.(*exec.ExitError); ok {
|
||||
p.proxyLogger.Debugf("<%s> process exited: code=%d, err=%v", p.id, exitErr.ExitCode(), waitErr)
|
||||
} else {
|
||||
p.proxyLogger.Debugf("<%s> process exited with error: %v", p.id, waitErr)
|
||||
}
|
||||
}
|
||||
close(cmdDone)
|
||||
}()
|
||||
|
||||
abort := func(err error) startResult {
|
||||
p.killProcess(cmd, cmdCancel, cmdDone, 5*time.Second)
|
||||
return startResult{err: err}
|
||||
}
|
||||
prematureExit := func() startResult {
|
||||
cmdCancel()
|
||||
return startResult{err: fmt.Errorf("upstream command exited prematurely")}
|
||||
}
|
||||
|
||||
if startCtx.Err() != nil {
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: ErrStartAborted}
|
||||
return abort(ErrStartAborted)
|
||||
}
|
||||
|
||||
checkEndpoint := strings.TrimSpace(p.config.CheckEndpoint)
|
||||
if checkEndpoint == "none" {
|
||||
return startResult{cmd: cmd, cmdDone: cmdDone, handlerFn: handlerFn}
|
||||
return startResult{cmd: cmd, cmdDone: cmdDone, cancel: cmdCancel, handlerFn: handlerFn}
|
||||
}
|
||||
|
||||
// Wait 250ms for the command to start up before health checking
|
||||
select {
|
||||
case <-startCtx.Done():
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: ErrStartAborted}
|
||||
return abort(ErrStartAborted)
|
||||
case <-time.After(250 * time.Millisecond):
|
||||
}
|
||||
|
||||
@@ -424,16 +479,14 @@ func (p *ProcessCommand) doStart(startCtx context.Context, healthCheckTimeout ti
|
||||
for {
|
||||
select {
|
||||
case <-startCtx.Done():
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: ErrStartAborted}
|
||||
return abort(ErrStartAborted)
|
||||
case <-cmdDone:
|
||||
return startResult{err: fmt.Errorf("upstream command exited prematurely")}
|
||||
return prematureExit()
|
||||
default:
|
||||
}
|
||||
|
||||
if time.Now().After(deadline) {
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: fmt.Errorf("health check timed out after %v", healthCheckTimeout)}
|
||||
return abort(fmt.Errorf("health check timed out after %v", healthCheckTimeout))
|
||||
}
|
||||
|
||||
req, _ := http.NewRequestWithContext(startCtx, "GET", p.config.CheckEndpoint, nil)
|
||||
@@ -445,42 +498,94 @@ func (p *ProcessCommand) doStart(startCtx context.Context, healthCheckTimeout ti
|
||||
p.proxyLogger.Infof("<%s> Health check passed on %s%s", p.id, p.config.Proxy, p.config.CheckEndpoint)
|
||||
break
|
||||
} else if startCtx.Err() != nil {
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: ErrStartAborted}
|
||||
return abort(ErrStartAborted)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-startCtx.Done():
|
||||
p.killProcess(cmd, cmdDone, 5*time.Second)
|
||||
return startResult{err: ErrStartAborted}
|
||||
return abort(ErrStartAborted)
|
||||
case <-cmdDone:
|
||||
return startResult{err: fmt.Errorf("upstream command exited prematurely")}
|
||||
return prematureExit()
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
}
|
||||
|
||||
return startResult{cmd: cmd, cmdDone: cmdDone, handlerFn: handlerFn}
|
||||
return startResult{cmd: cmd, cmdDone: cmdDone, cancel: cmdCancel, handlerFn: handlerFn}
|
||||
}
|
||||
|
||||
func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cmdDone <-chan struct{}, gracefulTimeout time.Duration) {
|
||||
// sendStopSignal runs the configured CmdStop (if any) or sends SIGTERM to
|
||||
// the upstream process. Wired up as cmd.Cancel so it fires whenever the
|
||||
// cmd's context is cancelled.
|
||||
func (p *ProcessCommand) sendStopSignal(cmd *exec.Cmd) error {
|
||||
if cmd == nil || cmd.Process == nil {
|
||||
return
|
||||
p.processLogger.Debugf("<%s> sendStopSignal() called with nil cmd or process, nothing to stop", p.id)
|
||||
return nil
|
||||
}
|
||||
|
||||
pid := cmd.Process.Pid
|
||||
if p.config.CmdStop != "" {
|
||||
p.processLogger.Debugf("<%s> sendStopSignal() using CmdStop %q for pid %d", p.id, p.config.CmdStop, pid)
|
||||
stopArgs, err := config.SanitizeCommand(
|
||||
strings.ReplaceAll(p.config.CmdStop, "${PID}", fmt.Sprintf("%d", cmd.Process.Pid)),
|
||||
strings.ReplaceAll(p.config.CmdStop, "${PID}", fmt.Sprintf("%d", pid)),
|
||||
)
|
||||
if err == nil {
|
||||
p.processLogger.Debugf("<%s> sendStopSignal() running stop command: %s", p.id, strings.Join(stopArgs, " "))
|
||||
stopCmd := exec.Command(stopArgs[0], stopArgs[1:]...)
|
||||
stopCmd.Env = cmd.Env
|
||||
setProcAttributes(stopCmd)
|
||||
stopCmd.Run()
|
||||
} else {
|
||||
cmd.Process.Signal(syscall.SIGTERM)
|
||||
runErr := stopCmd.Run()
|
||||
if runErr != nil {
|
||||
p.processLogger.Errorf("<%s> sendStopSignal() stop command failed: %v", p.id, runErr)
|
||||
} else {
|
||||
p.processLogger.Debugf("<%s> sendStopSignal() stop command completed for pid %d", p.id, pid)
|
||||
}
|
||||
return runErr
|
||||
}
|
||||
} else {
|
||||
cmd.Process.Signal(syscall.SIGTERM)
|
||||
// fall through to SIGTERM if sanitize failed
|
||||
p.processLogger.Errorf("<%s> sendStopSignal() failed to sanitize CmdStop %q: %v, falling back to terminateProcessTree", p.id, p.config.CmdStop, err)
|
||||
}
|
||||
// On Unix this SIGTERMs the whole process group so a forked grandchild
|
||||
// (e.g. a shell wrapper that backgrounds the real binary) is taken down
|
||||
// with the parent rather than orphaned.
|
||||
p.processLogger.Debugf("<%s> sendStopSignal() no CmdStop configured, calling terminateProcessTree for pid %d", p.id, pid)
|
||||
termErr := terminateProcessTree(cmd)
|
||||
if termErr != nil {
|
||||
p.processLogger.Errorf("<%s> sendStopSignal() terminateProcessTree failed for pid %d: %v", p.id, pid, termErr)
|
||||
}
|
||||
return termErr
|
||||
}
|
||||
|
||||
// killProcess terminates the upstream process. The flow:
|
||||
//
|
||||
// 1. Send the graceful stop signal (CmdStop / SIGTERM) directly — NOT by
|
||||
// cancelling cmdCtx. Cancelling the context would start cmd.WaitDelay
|
||||
// immediately, which force-kills the process WaitDelay after the signal
|
||||
// and would silently cap gracefulTimeout at WaitDelay whenever
|
||||
// gracefulTimeout is the longer of the two.
|
||||
// 2. We wait up to gracefulTimeout for the process to exit on its own.
|
||||
// 3. If still alive, we SIGKILL the process group directly (Unix) so any
|
||||
// forked descendant is force-terminated alongside the parent.
|
||||
// 4. We wait on cmdDone. cmd.WaitDelay (set when the cmd was built) is the
|
||||
// critical backstop here: once the process exits, if a forked grandchild
|
||||
// inherited the stdout/stderr pipes and is still holding them, the runtime
|
||||
// force-closes the pipes WaitDelay after the exit and cmd.Wait() unblocks.
|
||||
// Because we never cancelled the context, that WaitDelay timer measures
|
||||
// from process exit (see os/exec awaitGoroutines), not from this call.
|
||||
// Without WaitDelay this select would hang forever (the v219 bug).
|
||||
//
|
||||
// cancel() is still invoked (deferred) to release the context, but only after
|
||||
// the process has exited and os/exec's ctx watcher has already torn down, so it
|
||||
// never re-fires cmd.Cancel.
|
||||
func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cancel context.CancelFunc, cmdDone <-chan struct{}, gracefulTimeout time.Duration) {
|
||||
if cancel == nil {
|
||||
return
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
// Deliver CmdStop / SIGTERM in a goroutine so a slow or hanging CmdStop
|
||||
// cannot block the run() goroutine; the gracefulTimeout + Process.Kill
|
||||
// path below still guarantees teardown.
|
||||
if cmd != nil {
|
||||
go func() { _ = p.sendStopSignal(cmd) }()
|
||||
}
|
||||
|
||||
timer := time.NewTimer(gracefulTimeout)
|
||||
@@ -488,10 +593,16 @@ func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cmdDone <-chan struct{}, gra
|
||||
|
||||
select {
|
||||
case <-cmdDone:
|
||||
return
|
||||
case <-timer.C:
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone
|
||||
}
|
||||
|
||||
if cmd != nil {
|
||||
// SIGKILL the whole process group on Unix so any descendant that
|
||||
// ignored or outlived the graceful signal is force-terminated too.
|
||||
_ = killProcessTree(cmd)
|
||||
}
|
||||
<-cmdDone
|
||||
}
|
||||
|
||||
func (p *ProcessCommand) ID() string {
|
||||
|
||||
@@ -0,0 +1,262 @@
|
||||
//go:build !windows
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
)
|
||||
|
||||
// TestProcessCommand_StopForkingWrapper is a regression for the bug reported
|
||||
// against v219 where Stop would hang indefinitely when the upstream command
|
||||
// is a shell wrapper that forks the real binary (e.g. `#!/bin/bash` then
|
||||
// `"$@"`). After SIGTERM the wrapper dies but the grandchild inherits the
|
||||
// stdout/stderr pipes; cmd.Wait() blocks waiting for the pipe-copy goroutine
|
||||
// to drain EOF, which never happens while the grandchild holds the fds.
|
||||
//
|
||||
// The fix is cmd.WaitDelay (combined with exec.CommandContext + cmd.Cancel),
|
||||
// which causes the runtime to force-close the pipes after the delay so
|
||||
// cmd.Wait() — and therefore Stop — returns.
|
||||
func TestProcessCommand_StopForkingWrapper(t *testing.T) {
|
||||
skipIfNoSimpleResponder(t)
|
||||
|
||||
port := getFreePort(t)
|
||||
dir := t.TempDir()
|
||||
pidFile := filepath.Join(dir, "child.pid")
|
||||
|
||||
// Wrapper script: backgrounds the child (which inherits stdout/stderr),
|
||||
// records its PID for cleanup, then waits. When SIGTERM hits bash it
|
||||
// dies without forwarding the signal; the grandchild keeps running and
|
||||
// keeps the inherited pipe fds open. This is the scenario reported in
|
||||
// the v219 regression.
|
||||
wrapper := filepath.Join(dir, "wrapper.sh")
|
||||
script := fmt.Sprintf("#!/bin/bash\n%q -port %d -silent &\necho $! > %q\nwait\n",
|
||||
simpleResponderPath, port, pidFile)
|
||||
if err := os.WriteFile(wrapper, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("WriteFile: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { killChildFromPidFile(pidFile) })
|
||||
|
||||
p := newProcessCommand(t, config.ModelConfig{
|
||||
Cmd: wrapper,
|
||||
Proxy: fmt.Sprintf("http://127.0.0.1:%d", port),
|
||||
CheckEndpoint: "/health",
|
||||
HealthCheckTimeout: 10,
|
||||
})
|
||||
// Shrink the pipe-close backstop so the test doesn't sit at the
|
||||
// production default (10s). Must be set before Run() so doStart picks
|
||||
// it up when building the cmd.
|
||||
const testWaitDelay = 250 * time.Millisecond
|
||||
p.waitDelay = testWaitDelay
|
||||
|
||||
runErr := runAsync(t, p)
|
||||
|
||||
// Stop must return within a bounded time even though the grandchild
|
||||
// is still holding the pipe open. Budget is generous on top of
|
||||
// testWaitDelay to absorb scheduling jitter on slow CI runners; the
|
||||
// pre-fix behaviour was an unbounded hang, so any reasonable cap
|
||||
// distinguishes pass from fail.
|
||||
stopReturned := make(chan error, 1)
|
||||
stopStart := time.Now()
|
||||
go func() { stopReturned <- p.Stop(testStopTimeout) }()
|
||||
|
||||
const stopBudget = testWaitDelay + 2*time.Second
|
||||
select {
|
||||
case err := <-stopReturned:
|
||||
if err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
t.Logf("Stop returned in %v", time.Since(stopStart))
|
||||
case <-time.After(stopBudget):
|
||||
t.Fatalf("Stop did not return within %v — cmd.Wait() likely hung on inherited pipe", stopBudget)
|
||||
}
|
||||
|
||||
if got := p.State(); got != StateStopped {
|
||||
t.Errorf("after Stop: expected state %s, got %s", StateStopped, got)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-runErr:
|
||||
case <-time.After(testReturnTimeout):
|
||||
t.Errorf("Run did not return after Stop")
|
||||
}
|
||||
}
|
||||
|
||||
// TestProcessCommand_StopHonorsGracefulTimeout is a regression for the bug
|
||||
// where cmd.WaitDelay capped the graceful shutdown window. killProcess used to
|
||||
// cancel the cmd context to deliver SIGTERM, which starts cmd.WaitDelay
|
||||
// immediately; a process whose SIGTERM handler needs longer than WaitDelay to
|
||||
// finish was force-killed early even though Stop was given a much longer
|
||||
// timeout. The fix sends the signal directly so WaitDelay measures from process
|
||||
// exit (its inherited-pipe backstop role), leaving the graceful window to the
|
||||
// caller's Stop timeout.
|
||||
func TestProcessCommand_StopHonorsGracefulTimeout(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
marker := filepath.Join(dir, "graceful.done")
|
||||
ready := filepath.Join(dir, "trap.ready")
|
||||
|
||||
// On SIGTERM, sleep past the (short) WaitDelay, then write the marker and
|
||||
// exit cleanly. If WaitDelay still drove the kill, bash would be SIGKILLed
|
||||
// mid-handler and the marker would never be written. The ready file is
|
||||
// written only after the trap is installed so the test does not race
|
||||
// SIGTERM ahead of it (CheckEndpoint:none marks ready before bash runs).
|
||||
script := filepath.Join(dir, "graceful.sh")
|
||||
body := fmt.Sprintf(
|
||||
"#!/bin/bash\ncleanup() { sleep 0.6; echo done > %q; exit 0; }\ntrap cleanup SIGTERM\necho ready > %q\nwhile true; do sleep 0.1; done\n",
|
||||
marker, ready,
|
||||
)
|
||||
if err := os.WriteFile(script, []byte(body), 0o755); err != nil {
|
||||
t.Fatalf("WriteFile: %v", err)
|
||||
}
|
||||
|
||||
p := newProcessCommand(t, config.ModelConfig{
|
||||
Cmd: script,
|
||||
Proxy: "http://127.0.0.1:1", // unused: health check disabled
|
||||
CheckEndpoint: "none",
|
||||
})
|
||||
// WaitDelay shorter than the handler's 0.6s sleep, and far shorter than the
|
||||
// Stop timeout below — this is the window the old code mis-killed in.
|
||||
p.waitDelay = 200 * time.Millisecond
|
||||
|
||||
runErr := runAsync(t, p)
|
||||
|
||||
// Wait until the trap is installed before stopping.
|
||||
trapDeadline := time.Now().Add(2 * time.Second)
|
||||
for {
|
||||
if _, err := os.Stat(ready); err == nil {
|
||||
break
|
||||
}
|
||||
if time.Now().After(trapDeadline) {
|
||||
t.Fatalf("script did not install SIGTERM trap in time")
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
stopStart := time.Now()
|
||||
if err := p.Stop(5 * time.Second); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
elapsed := time.Since(stopStart)
|
||||
|
||||
// The handler must have run to completion (marker written) rather than
|
||||
// being force-killed at waitDelay.
|
||||
if _, err := os.Stat(marker); err != nil {
|
||||
t.Fatalf("graceful handler did not complete (marker missing): %v", err)
|
||||
}
|
||||
// And Stop must have waited for the handler (>~0.6s), not returned at the
|
||||
// 200ms waitDelay.
|
||||
if elapsed < 500*time.Millisecond {
|
||||
t.Fatalf("Stop returned in %v — process was killed before its graceful handler finished", elapsed)
|
||||
}
|
||||
|
||||
if got := p.State(); got != StateStopped {
|
||||
t.Errorf("after Stop: expected state %s, got %s", StateStopped, got)
|
||||
}
|
||||
select {
|
||||
case <-runErr:
|
||||
case <-time.After(testReturnTimeout):
|
||||
t.Errorf("Run did not return after Stop")
|
||||
}
|
||||
}
|
||||
|
||||
// TestProcessCommand_StopReapsForkedGrandchild verifies that stopping a forking
|
||||
// wrapper takes down the backgrounded grandchild too, rather than leaving it as
|
||||
// an orphan. The fix is Setpgid (runtime_unix.go): the wrapper leads its own
|
||||
// process group, so the stop signal is delivered to the whole group via the
|
||||
// negative PID and reaches the grandchild the wrapper never reaped.
|
||||
func TestProcessCommand_StopReapsForkedGrandchild(t *testing.T) {
|
||||
skipIfNoSimpleResponder(t)
|
||||
|
||||
port := getFreePort(t)
|
||||
dir := t.TempDir()
|
||||
pidFile := filepath.Join(dir, "child.pid")
|
||||
|
||||
wrapper := filepath.Join(dir, "wrapper.sh")
|
||||
script := fmt.Sprintf("#!/bin/bash\n%q -port %d -silent &\necho $! > %q\nwait\n",
|
||||
simpleResponderPath, port, pidFile)
|
||||
if err := os.WriteFile(wrapper, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("WriteFile: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { killChildFromPidFile(pidFile) })
|
||||
|
||||
p := newProcessCommand(t, config.ModelConfig{
|
||||
Cmd: wrapper,
|
||||
Proxy: fmt.Sprintf("http://127.0.0.1:%d", port),
|
||||
CheckEndpoint: "/health",
|
||||
HealthCheckTimeout: 10,
|
||||
})
|
||||
|
||||
runErr := runAsync(t, p)
|
||||
|
||||
// Read the grandchild PID the wrapper recorded.
|
||||
var childPID int
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for {
|
||||
data, err := os.ReadFile(pidFile)
|
||||
if err == nil {
|
||||
if pid, perr := strconv.Atoi(strings.TrimSpace(string(data))); perr == nil && pid > 0 {
|
||||
childPID = pid
|
||||
break
|
||||
}
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
t.Fatalf("wrapper did not record grandchild PID")
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := p.Stop(testStopTimeout); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
|
||||
// After Stop the grandchild must be gone. Signal 0 probes liveness without
|
||||
// actually sending a signal; give it a brief window to exit after the
|
||||
// group SIGTERM.
|
||||
proc, err := os.FindProcess(childPID)
|
||||
if err != nil {
|
||||
t.Fatalf("FindProcess: %v", err)
|
||||
}
|
||||
gone := false
|
||||
for i := 0; i < 100; i++ {
|
||||
if err := proc.Signal(syscall.Signal(0)); err != nil {
|
||||
gone = true
|
||||
break
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
if !gone {
|
||||
t.Errorf("grandchild PID %d still alive after Stop — process group was not reaped", childPID)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-runErr:
|
||||
case <-time.After(testReturnTimeout):
|
||||
t.Errorf("Run did not return after Stop")
|
||||
}
|
||||
}
|
||||
|
||||
// killChildFromPidFile reads a PID written by the wrapper script and SIGKILLs
|
||||
// it so leaked orphans don't accumulate between test runs. Best-effort.
|
||||
func killChildFromPidFile(pidFile string) {
|
||||
data, err := os.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
|
||||
if err != nil || pid <= 0 {
|
||||
return
|
||||
}
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_ = proc.Kill()
|
||||
}
|
||||
@@ -4,9 +4,41 @@ package process
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// setProcAttributes sets platform-specific process attributes
|
||||
// setProcAttributes starts the upstream in its own process group (Setpgid) so
|
||||
// the entire process tree can be signalled at once via its negative PID. This
|
||||
// is what lets us reap a forked grandchild — e.g. a shell wrapper that
|
||||
// backgrounds the real binary and exits — instead of leaking it as an orphan
|
||||
// that holds the inherited stdout/stderr pipes open.
|
||||
func setProcAttributes(cmd *exec.Cmd) {
|
||||
// No-op on Unix systems
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
}
|
||||
|
||||
// terminateProcessTree sends SIGTERM to the whole process group led by the
|
||||
// command, giving every process in the tree a chance to shut down gracefully.
|
||||
func terminateProcessTree(cmd *exec.Cmd) error {
|
||||
return signalProcessTree(cmd, syscall.SIGTERM)
|
||||
}
|
||||
|
||||
// killProcessTree sends SIGKILL to the whole process group, force-terminating
|
||||
// every process in the tree.
|
||||
func killProcessTree(cmd *exec.Cmd) error {
|
||||
return signalProcessTree(cmd, syscall.SIGKILL)
|
||||
}
|
||||
|
||||
// signalProcessTree signals the process group led by cmd.Process. Because the
|
||||
// child was started with Setpgid it is its own group leader (pgid == pid), so
|
||||
// targeting -pid reaches the child and every descendant still in the group.
|
||||
// Falls back to signalling just the child if the group send fails (e.g. the
|
||||
// group has already drained), so we never silently skip the signal.
|
||||
func signalProcessTree(cmd *exec.Cmd, sig syscall.Signal) error {
|
||||
if cmd == nil || cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
if err := syscall.Kill(-cmd.Process.Pid, sig); err != nil {
|
||||
return cmd.Process.Signal(sig)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -3,14 +3,51 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// setProcAttributes sets platform-specific process attributes
|
||||
// setProcAttributes sets platform-specific process attributes. CREATE_NO_WINDOW
|
||||
// keeps the upstream from spawning its own console window.
|
||||
func setProcAttributes(cmd *exec.Cmd) {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
HideWindow: true,
|
||||
CreationFlags: 0x08000000, // CREATE_NO_WINDOW
|
||||
}
|
||||
}
|
||||
|
||||
// terminateProcessTree requests a graceful shutdown of the whole process tree
|
||||
// rooted at cmd.Process. Windows has no SIGTERM or process-group signalling, so
|
||||
// we shell out to `taskkill /t`, which walks the child tree by PID — the
|
||||
// equivalent of signalling a Unix process group. Without /f, taskkill asks the
|
||||
// processes to close rather than force-killing them.
|
||||
func terminateProcessTree(cmd *exec.Cmd) error {
|
||||
return taskkillProcessTree(cmd, false)
|
||||
}
|
||||
|
||||
// killProcessTree force-terminates the whole process tree rooted at cmd.Process
|
||||
// via `taskkill /f /t`, so any descendant that ignored or outlived the graceful
|
||||
// request is killed alongside the parent rather than leaked as an orphan.
|
||||
func killProcessTree(cmd *exec.Cmd) error {
|
||||
return taskkillProcessTree(cmd, true)
|
||||
}
|
||||
|
||||
// taskkillProcessTree runs taskkill against cmd.Process.Pid. The /t flag
|
||||
// terminates the process together with any child processes it started, which is
|
||||
// the Windows analogue of signalling a Unix process group via its negative PID.
|
||||
// When force is true the /f flag force-kills; otherwise taskkill requests a
|
||||
// graceful close.
|
||||
func taskkillProcessTree(cmd *exec.Cmd, force bool) error {
|
||||
if cmd == nil || cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
args := make([]string, 0, 4)
|
||||
if force {
|
||||
args = append(args, "/f")
|
||||
}
|
||||
args = append(args, "/t", "/pid", fmt.Sprintf("%d", cmd.Process.Pid))
|
||||
kill := exec.Command("taskkill", args...)
|
||||
setProcAttributes(kill)
|
||||
return kill.Run()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
//go:build !windows
|
||||
|
||||
package process
|
||||
|
||||
// SetupTreeCleanup is a no-op on non-Windows platforms, where upstream process
|
||||
// teardown is handled via process-group signalling (see runtime_unix.go).
|
||||
func SetupTreeCleanup() error { return nil }
|
||||
@@ -0,0 +1,50 @@
|
||||
//go:build windows
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
// SetupTreeCleanup assigns the current process to a Windows Job Object
|
||||
// configured with JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE. Upstream processes
|
||||
// spawned afterwards are associated with the same job, so when llama-swap exits
|
||||
// for any reason — graceful shutdown, a forced second Ctrl+C, or a crash — the
|
||||
// OS terminates the whole job and reaps every child instead of leaving orphans
|
||||
// behind. It is the parent-side complement to the per-process teardown in
|
||||
// runtime_windows.go.
|
||||
//
|
||||
// The job handle is intentionally leaked for the lifetime of the process: the
|
||||
// kill-on-close behaviour fires when the last handle is released, which the OS
|
||||
// does when the process exits.
|
||||
func SetupTreeCleanup() error {
|
||||
job, err := windows.CreateJobObject(nil, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CreateJobObject: %w", err)
|
||||
}
|
||||
|
||||
info := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{
|
||||
BasicLimitInformation: windows.JOBOBJECT_BASIC_LIMIT_INFORMATION{
|
||||
LimitFlags: windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
|
||||
},
|
||||
}
|
||||
if _, err := windows.SetInformationJobObject(
|
||||
job,
|
||||
windows.JobObjectExtendedLimitInformation,
|
||||
uintptr(unsafe.Pointer(&info)),
|
||||
uint32(unsafe.Sizeof(info)),
|
||||
); err != nil {
|
||||
windows.CloseHandle(job)
|
||||
return fmt.Errorf("SetInformationJobObject: %w", err)
|
||||
}
|
||||
|
||||
if err := windows.AssignProcessToJobObject(job, windows.CurrentProcess()); err != nil {
|
||||
windows.CloseHandle(job)
|
||||
return fmt.Errorf("AssignProcessToJobObject: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
+37
-12
@@ -75,10 +75,21 @@ type baseRouter struct {
|
||||
logger *logmon.Monitor
|
||||
planner swapPlanner
|
||||
|
||||
// shutdownCtx governs the request machinery: cancelling it tells grant()
|
||||
// and ServeHTTP to stop granting and reject callers. It is deliberately
|
||||
// separate from procCtx — see procCtx below.
|
||||
shutdownCtx context.Context
|
||||
shutdownFn context.CancelFunc
|
||||
shuttingDown atomic.Bool
|
||||
|
||||
// procCtx is the parent context for every managed process and governs
|
||||
// process lifetime only. handleShutdown stops processes gracefully via
|
||||
// Stop() and cancels procCtx afterwards, so teardown is never a context
|
||||
// cancel racing the graceful path (which collapsed the grace to 100ms and
|
||||
// let the caller return before children were reaped — see process run loop).
|
||||
procCtx context.Context
|
||||
procCancel context.CancelFunc
|
||||
|
||||
handlerCh chan handlerReq
|
||||
shutdownCh chan shutdownReq
|
||||
unloadCh chan unloadReq
|
||||
@@ -97,6 +108,7 @@ type baseRouter struct {
|
||||
|
||||
func newBaseRouter(name string, conf config.Config, processes map[string]process.Process, planner swapPlanner, logger *logmon.Monitor) *baseRouter {
|
||||
shutdownCtx, shutdownFn := context.WithCancel(context.Background())
|
||||
procCtx, procCancel := context.WithCancel(context.Background())
|
||||
return &baseRouter{
|
||||
name: name,
|
||||
config: conf,
|
||||
@@ -105,6 +117,8 @@ func newBaseRouter(name string, conf config.Config, processes map[string]process
|
||||
planner: planner,
|
||||
shutdownCtx: shutdownCtx,
|
||||
shutdownFn: shutdownFn,
|
||||
procCtx: procCtx,
|
||||
procCancel: procCancel,
|
||||
handlerCh: make(chan handlerReq),
|
||||
shutdownCh: make(chan shutdownReq),
|
||||
unloadCh: make(chan unloadReq),
|
||||
@@ -492,6 +506,8 @@ func (b *baseRouter) handleShutdown(req shutdownReq, active map[string]*activeSw
|
||||
// The grant calls below then either land (waiter happened to receive
|
||||
// before noticing shutdown) or fall through immediately via grant's
|
||||
// shutdownCtx case — either way the waiter sees a non-OK response.
|
||||
// This does NOT touch processes: their lifetime is procCtx, cancelled
|
||||
// only after the graceful Stop() calls below have reaped them.
|
||||
b.shutdownFn()
|
||||
|
||||
for _, s := range active {
|
||||
@@ -535,6 +551,11 @@ func (b *baseRouter) handleShutdown(req shutdownReq, active map[string]*activeSw
|
||||
<-done
|
||||
}
|
||||
|
||||
// Every process is stopped (children reaped via Stop()). Cancel procCtx so
|
||||
// the process run-loop goroutines exit; they are already StateStopped, so
|
||||
// this is a clean no-op kill rather than a forced teardown.
|
||||
b.procCancel()
|
||||
|
||||
req.respond <- nil
|
||||
}
|
||||
|
||||
@@ -745,24 +766,28 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
}()
|
||||
}
|
||||
|
||||
// finishLoading stops the loading stream and fences its goroutine off from
|
||||
// the ResponseWriter before the real handler (or ServeHTTP's return)
|
||||
// reclaims it. release() must run even when waitForCompletion times out:
|
||||
// otherwise a still-streaming goroutine flushes a finalized response and
|
||||
// panics on the recycled *bufio.Writer.
|
||||
finishLoading := func() {
|
||||
cancelLoad()
|
||||
if lw != nil {
|
||||
lw.waitForCompletion(1 * time.Second)
|
||||
lw.release()
|
||||
}
|
||||
}
|
||||
|
||||
var resp handlerResp
|
||||
select {
|
||||
case resp = <-hr.respond:
|
||||
cancelLoad()
|
||||
if lw != nil {
|
||||
lw.waitForCompletion(1 * time.Second)
|
||||
}
|
||||
finishLoading()
|
||||
case <-req.Context().Done():
|
||||
cancelLoad()
|
||||
if lw != nil {
|
||||
lw.waitForCompletion(1 * time.Second)
|
||||
}
|
||||
finishLoading()
|
||||
return
|
||||
case <-b.shutdownCtx.Done():
|
||||
cancelLoad()
|
||||
if lw != nil {
|
||||
lw.waitForCompletion(1 * time.Second)
|
||||
}
|
||||
finishLoading()
|
||||
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -36,12 +36,14 @@ func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group
|
||||
modelCfg, _, ok := conf.FindConfig(mid)
|
||||
if !ok {
|
||||
base.shutdownFn()
|
||||
base.procCancel()
|
||||
return nil, fmt.Errorf("no model config for %q", mid)
|
||||
}
|
||||
procLog := logmon.NewWriter(upstreamlog)
|
||||
p, err := process.New(base.shutdownCtx, mid, modelCfg, procLog, proxylog)
|
||||
p, err := process.New(base.procCtx, mid, modelCfg, procLog, proxylog)
|
||||
if err != nil {
|
||||
base.shutdownFn()
|
||||
base.procCancel()
|
||||
return nil, fmt.Errorf("creating process for %q: %w", mid, err)
|
||||
}
|
||||
processes[mid] = p
|
||||
|
||||
@@ -38,6 +38,13 @@ type loadingWriter struct {
|
||||
pendingMu sync.Mutex
|
||||
pendingUpdate string
|
||||
|
||||
// writeMu serializes writes to the underlying writer and guards released.
|
||||
// Once released is set, the streaming goroutine must not touch the writer
|
||||
// again — ServeHTTP has reclaimed it (to run the real handler or to return)
|
||||
// and writing/flushing a finalized response panics.
|
||||
writeMu sync.Mutex
|
||||
released bool
|
||||
|
||||
// closed by start when the goroutine finishes (after cleanup messages)
|
||||
done chan struct{}
|
||||
|
||||
@@ -217,12 +224,33 @@ func (s *loadingWriter) sendData(data string) {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(s.writer, "data: %s\n\n", jsonData)
|
||||
if err != nil {
|
||||
s.writeMu.Lock()
|
||||
defer s.writeMu.Unlock()
|
||||
// Once ServeHTTP has reclaimed the writer (release), writing/flushing it
|
||||
// races the real handler or panics on a finalized response. Stop here.
|
||||
if s.released {
|
||||
return
|
||||
}
|
||||
|
||||
if _, err = fmt.Fprintf(s.writer, "data: %s\n\n", jsonData); err != nil {
|
||||
s.logger.Debugf("<%s> Failed to write SSE data (client likely disconnected): %v", s.modelName, err)
|
||||
return
|
||||
}
|
||||
s.Flush()
|
||||
if flusher, ok := s.writer.(http.Flusher); ok {
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
|
||||
// release fences the loadingWriter off from the underlying ResponseWriter.
|
||||
// After it returns, the streaming goroutine will not write to or flush the
|
||||
// writer again: any in-flight write completes under writeMu first, and later
|
||||
// writes short-circuit on released. The caller can then safely hand the writer
|
||||
// to the real handler or let ServeHTTP return without racing a finalized
|
||||
// response (a use-after-return Flush panics on the recycled *bufio.Writer).
|
||||
func (s *loadingWriter) release() {
|
||||
s.writeMu.Lock()
|
||||
s.released = true
|
||||
s.writeMu.Unlock()
|
||||
}
|
||||
|
||||
func (s *loadingWriter) Header() http.Header {
|
||||
|
||||
@@ -31,9 +31,10 @@ func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matr
|
||||
|
||||
for mid, modelCfg := range conf.Models {
|
||||
procLog := logmon.NewWriter(upstreamlog)
|
||||
p, err := process.New(base.shutdownCtx, mid, modelCfg, procLog, proxylog)
|
||||
p, err := process.New(base.procCtx, mid, modelCfg, procLog, proxylog)
|
||||
if err != nil {
|
||||
base.shutdownFn()
|
||||
base.procCancel()
|
||||
return nil, fmt.Errorf("creating process for %q: %w", mid, err)
|
||||
}
|
||||
processes[mid] = p
|
||||
|
||||
@@ -45,7 +45,9 @@ func CreateConcurrencyMiddleware(cfg config.Config) chain.Middleware {
|
||||
return
|
||||
}
|
||||
if !sem.TryAcquire(1) {
|
||||
http.Error(w, "Too many requests", http.StatusTooManyRequests)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
w.Write([]byte(`{"error":"Too many requests"}`))
|
||||
return
|
||||
}
|
||||
defer sem.Release(1)
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
placeholder so //go:embed ui_dist succeeds before the UI is built
|
||||
|
||||
+30
-2
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/mostlygeek/llama-swap/internal/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/server"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
"github.com/mostlygeek/llama-swap/internal/watcher"
|
||||
@@ -122,6 +123,13 @@ func main() {
|
||||
applyLogSettings(cfg)
|
||||
proxyLog.Debugf("PID: %d", os.Getpid())
|
||||
|
||||
// On Windows, bind the process tree to a Job Object so every upstream
|
||||
// process is reaped when llama-swap exits — even on a forced kill. No-op
|
||||
// elsewhere. Non-fatal: a failure just falls back to per-process teardown.
|
||||
if err := process.SetupTreeCleanup(); err != nil {
|
||||
proxyLog.Warnf("failed to set up process tree cleanup: %v", err)
|
||||
}
|
||||
|
||||
// perfMon outlives config reloads; its config is updated in place.
|
||||
var perfMon *perf.Monitor
|
||||
if !cfg.Performance.Disabled {
|
||||
@@ -267,6 +275,16 @@ func main() {
|
||||
proxyLog.Infof("received signal %v, shutting down", sig)
|
||||
watcherCancel()
|
||||
|
||||
// Backstop against a stalled shutdown: force the process to
|
||||
// exit once the whole graceful sequence has had its full budget.
|
||||
// On Windows the Job Object reaps upstream processes on exit, so
|
||||
// a forced exit still cleans up rather than orphaning children.
|
||||
go func() {
|
||||
time.Sleep(shutdownTimeout + 5*time.Second)
|
||||
proxyLog.Warnf("graceful shutdown exceeded %v, forcing exit", shutdownTimeout)
|
||||
os.Exit(1)
|
||||
}()
|
||||
|
||||
activeMu.RLock()
|
||||
srv := activeSrv
|
||||
activeMu.RUnlock()
|
||||
@@ -275,13 +293,23 @@ func main() {
|
||||
// drain without blocking on them for the full timeout.
|
||||
srv.CloseStreams()
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
|
||||
// Both phases share a single deadline so total shutdown is
|
||||
// bounded by shutdownTimeout rather than 2x it.
|
||||
deadline := time.Now().Add(shutdownTimeout)
|
||||
shutdownCtx, cancel := context.WithDeadline(context.Background(), deadline)
|
||||
defer cancel()
|
||||
if err := httpServer.Shutdown(shutdownCtx); err != nil {
|
||||
proxyLog.Warnf("http server shutdown error: %v", err)
|
||||
}
|
||||
|
||||
if err := srv.Shutdown(shutdownTimeout); err != nil {
|
||||
// Clamp the remaining budget to a small positive value: a
|
||||
// non-positive timeout makes the router fall back to its own
|
||||
// healthCheckTimeout, which would defeat the shared deadline.
|
||||
remaining := time.Until(deadline)
|
||||
if remaining <= 0 {
|
||||
remaining = time.Millisecond
|
||||
}
|
||||
if err := srv.Shutdown(remaining); err != nil {
|
||||
proxyLog.Warnf("router shutdown error: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,632 @@
|
||||
<script lang="ts">
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { streamChatCompletion } from "../../lib/chatApi";
|
||||
|
||||
type Status = "waiting" | "streaming" | "done" | "error";
|
||||
type Phase = "waiting" | "loading" | "reasoning" | "content";
|
||||
type RunState = {
|
||||
status: Status;
|
||||
loadingText: string;
|
||||
reasoningContent: string;
|
||||
content: string;
|
||||
loadingDone: boolean;
|
||||
waitingMs: number;
|
||||
loadingMs: number;
|
||||
reasoningMs: number;
|
||||
contentMs: number;
|
||||
phase: Phase;
|
||||
elapsedMs: number;
|
||||
error?: string;
|
||||
};
|
||||
type TestEntry = { id: string; model: string };
|
||||
|
||||
const LOAD_MARKER = "━━━━━";
|
||||
|
||||
const DEFAULT_PROMPT = "Write a few sentences about the history of computing.";
|
||||
const DEFAULT_MAX_TOKENS = 256;
|
||||
|
||||
const promptStore = persistentStore<string>("concurrency-prompt", DEFAULT_PROMPT);
|
||||
const maxTokensStore = persistentStore<number>("concurrency-max-tokens", DEFAULT_MAX_TOKENS);
|
||||
const testListStore = persistentStore<TestEntry[]>("concurrency-test-list", []);
|
||||
|
||||
let runs = $state<Record<string, RunState>>({});
|
||||
let isRunning = $state(false);
|
||||
let abortController: AbortController | null = null;
|
||||
let dragIndex = $state<number | null>(null);
|
||||
let dragOverIndex = $state<number | null>(null);
|
||||
|
||||
const timelineCollapsedStore = persistentStore<boolean>("concurrency-timeline-collapsed", false);
|
||||
|
||||
let timelineMaxMs = $derived(Math.max(100, ...Object.values(runs).map((r) => r.elapsedMs)));
|
||||
|
||||
let availableModels = $derived($models.filter((m) => !m.unlisted));
|
||||
let hasModels = $derived(availableModels.length > 0);
|
||||
let canRun = $derived(!isRunning && $testListStore.length > 0 && $promptStore.trim() !== "");
|
||||
|
||||
function newId(): string {
|
||||
if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
|
||||
return crypto.randomUUID();
|
||||
}
|
||||
return `${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
||||
}
|
||||
|
||||
function addModel(modelId: string) {
|
||||
if (isRunning) return;
|
||||
testListStore.update((list) => [...list, { id: newId(), model: modelId }]);
|
||||
}
|
||||
|
||||
function removeEntry(id: string) {
|
||||
if (isRunning) return;
|
||||
testListStore.update((list) => list.filter((e) => e.id !== id));
|
||||
const next = { ...runs };
|
||||
delete next[id];
|
||||
runs = next;
|
||||
}
|
||||
|
||||
function clearAll() {
|
||||
if (isRunning) return;
|
||||
testListStore.set([]);
|
||||
runs = {};
|
||||
}
|
||||
|
||||
function onDragStart(i: number, e: DragEvent) {
|
||||
if (isRunning) return;
|
||||
dragIndex = i;
|
||||
if (e.dataTransfer) {
|
||||
e.dataTransfer.effectAllowed = "move";
|
||||
e.dataTransfer.setData("text/plain", String(i));
|
||||
}
|
||||
}
|
||||
|
||||
function onDragOver(i: number, e: DragEvent) {
|
||||
if (isRunning || dragIndex === null) return;
|
||||
e.preventDefault();
|
||||
if (e.dataTransfer) e.dataTransfer.dropEffect = "move";
|
||||
dragOverIndex = i;
|
||||
}
|
||||
|
||||
function onDrop(i: number, e: DragEvent) {
|
||||
if (isRunning || dragIndex === null) return;
|
||||
e.preventDefault();
|
||||
const from = dragIndex;
|
||||
const to = i;
|
||||
dragIndex = null;
|
||||
dragOverIndex = null;
|
||||
if (from === to) return;
|
||||
testListStore.update((list) => {
|
||||
const next = [...list];
|
||||
const [moved] = next.splice(from, 1);
|
||||
next.splice(to, 0, moved);
|
||||
return next;
|
||||
});
|
||||
}
|
||||
|
||||
function onDragEnd() {
|
||||
dragIndex = null;
|
||||
dragOverIndex = null;
|
||||
}
|
||||
|
||||
function emptyRun(): RunState {
|
||||
return {
|
||||
status: "waiting",
|
||||
loadingText: "",
|
||||
reasoningContent: "",
|
||||
content: "",
|
||||
loadingDone: false,
|
||||
waitingMs: 0,
|
||||
loadingMs: 0,
|
||||
reasoningMs: 0,
|
||||
contentMs: 0,
|
||||
phase: "waiting",
|
||||
elapsedMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Detect and split the llama-swap loading block (wrapped in ━━━━━ markers,
|
||||
// delivered as reasoning_content) from the model's own reasoning tokens.
|
||||
function ingestReasoning(
|
||||
prev: RunState,
|
||||
chunk: string
|
||||
): { loadingText: string; reasoningContent: string; loadingDone: boolean; nowPhase: Phase } {
|
||||
if (prev.loadingDone) {
|
||||
return {
|
||||
loadingText: prev.loadingText,
|
||||
reasoningContent: prev.reasoningContent + chunk,
|
||||
loadingDone: true,
|
||||
nowPhase: "reasoning",
|
||||
};
|
||||
}
|
||||
|
||||
const combined = prev.loadingText + chunk;
|
||||
// Not enough to decide whether this is a loading marker
|
||||
if (combined.length < LOAD_MARKER.length) {
|
||||
if (LOAD_MARKER.startsWith(combined)) {
|
||||
return { loadingText: combined, reasoningContent: prev.reasoningContent, loadingDone: false, nowPhase: "loading" };
|
||||
}
|
||||
return {
|
||||
loadingText: "",
|
||||
reasoningContent: prev.reasoningContent + combined,
|
||||
loadingDone: true,
|
||||
nowPhase: "reasoning",
|
||||
};
|
||||
}
|
||||
|
||||
if (!combined.startsWith(LOAD_MARKER)) {
|
||||
return {
|
||||
loadingText: "",
|
||||
reasoningContent: prev.reasoningContent + combined,
|
||||
loadingDone: true,
|
||||
nowPhase: "reasoning",
|
||||
};
|
||||
}
|
||||
|
||||
// We're inside a loading block — look for the closing marker
|
||||
const closingIdx = combined.indexOf(LOAD_MARKER, LOAD_MARKER.length);
|
||||
if (closingIdx < 0) {
|
||||
return { loadingText: combined, reasoningContent: prev.reasoningContent, loadingDone: false, nowPhase: "loading" };
|
||||
}
|
||||
const newlineIdx = combined.indexOf("\n", closingIdx);
|
||||
const sliceEnd = newlineIdx >= 0 ? newlineIdx + 1 : combined.length;
|
||||
const loadingPart = combined.substring(0, sliceEnd);
|
||||
// Strip the trailing " \n" the loader sends after the closing marker
|
||||
const remainder = combined.substring(sliceEnd).replace(/^[ \t]*\n?/, "");
|
||||
return {
|
||||
loadingText: loadingPart,
|
||||
reasoningContent: prev.reasoningContent + remainder,
|
||||
loadingDone: true,
|
||||
nowPhase: remainder ? "reasoning" : "waiting",
|
||||
};
|
||||
}
|
||||
|
||||
async function runOne(entry: TestEntry, signal: AbortSignal) {
|
||||
const start = performance.now();
|
||||
let phaseStart = start;
|
||||
runs[entry.id] = { ...emptyRun(), status: "streaming" };
|
||||
|
||||
const accrue = (
|
||||
prev: RunState,
|
||||
now: number
|
||||
): { waitingMs: number; loadingMs: number; reasoningMs: number; contentMs: number } => {
|
||||
const delta = now - phaseStart;
|
||||
const base = {
|
||||
waitingMs: prev.waitingMs,
|
||||
loadingMs: prev.loadingMs,
|
||||
reasoningMs: prev.reasoningMs,
|
||||
contentMs: prev.contentMs,
|
||||
};
|
||||
if (prev.phase === "waiting") return { ...base, waitingMs: base.waitingMs + delta };
|
||||
if (prev.phase === "loading") return { ...base, loadingMs: base.loadingMs + delta };
|
||||
if (prev.phase === "reasoning") return { ...base, reasoningMs: base.reasoningMs + delta };
|
||||
if (prev.phase === "content") return { ...base, contentMs: base.contentMs + delta };
|
||||
return base;
|
||||
};
|
||||
|
||||
const ticker = window.setInterval(() => {
|
||||
const prev = runs[entry.id];
|
||||
if (!prev || prev.status !== "streaming") return;
|
||||
const now = performance.now();
|
||||
const accrued = accrue(prev, now);
|
||||
phaseStart = now;
|
||||
runs[entry.id] = { ...prev, ...accrued, elapsedMs: now - start };
|
||||
}, 50);
|
||||
|
||||
try {
|
||||
const stream = streamChatCompletion(entry.model, [{ role: "user", content: $promptStore }], signal, {
|
||||
endpoint: "v1/chat/completions",
|
||||
max_tokens: $maxTokensStore,
|
||||
});
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.done) break;
|
||||
const prev = runs[entry.id];
|
||||
if (!prev) break;
|
||||
const now = performance.now();
|
||||
const accrued = accrue(prev, now);
|
||||
phaseStart = now;
|
||||
|
||||
let nextPhase: Phase = prev.phase;
|
||||
let loadingText = prev.loadingText;
|
||||
let reasoningContent = prev.reasoningContent;
|
||||
let loadingDone = prev.loadingDone;
|
||||
|
||||
if (chunk.reasoning_content) {
|
||||
const parsed = ingestReasoning(prev, chunk.reasoning_content);
|
||||
loadingText = parsed.loadingText;
|
||||
reasoningContent = parsed.reasoningContent;
|
||||
loadingDone = parsed.loadingDone;
|
||||
nextPhase = parsed.nowPhase;
|
||||
}
|
||||
if (chunk.content) nextPhase = "content";
|
||||
|
||||
runs[entry.id] = {
|
||||
...prev,
|
||||
...accrued,
|
||||
loadingText,
|
||||
reasoningContent,
|
||||
content: prev.content + (chunk.content ?? ""),
|
||||
loadingDone,
|
||||
phase: nextPhase,
|
||||
elapsedMs: now - start,
|
||||
};
|
||||
}
|
||||
const prev = runs[entry.id];
|
||||
if (prev) {
|
||||
const now = performance.now();
|
||||
const accrued = accrue(prev, now);
|
||||
runs[entry.id] = { ...prev, ...accrued, status: "done", elapsedMs: now - start };
|
||||
}
|
||||
} catch (err) {
|
||||
const prev = runs[entry.id] ?? emptyRun();
|
||||
const now = performance.now();
|
||||
const accrued = accrue(prev, now);
|
||||
const aborted = err instanceof Error && err.name === "AbortError";
|
||||
runs[entry.id] = {
|
||||
...prev,
|
||||
...accrued,
|
||||
status: "error",
|
||||
elapsedMs: now - start,
|
||||
error: aborted ? "aborted" : err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
} finally {
|
||||
window.clearInterval(ticker);
|
||||
}
|
||||
}
|
||||
|
||||
async function run() {
|
||||
if (!canRun) return;
|
||||
const entries = $testListStore;
|
||||
const initial: Record<string, RunState> = {};
|
||||
for (const e of entries) {
|
||||
initial[e.id] = emptyRun();
|
||||
}
|
||||
runs = initial;
|
||||
isRunning = true;
|
||||
abortController = new AbortController();
|
||||
try {
|
||||
await Promise.allSettled(entries.map((e) => runOne(e, abortController!.signal)));
|
||||
} finally {
|
||||
isRunning = false;
|
||||
abortController = null;
|
||||
}
|
||||
}
|
||||
|
||||
function stop() {
|
||||
abortController?.abort();
|
||||
}
|
||||
|
||||
function waitingBarClass(run: RunState): string {
|
||||
if (run.status === "error" && run.phase === "waiting") return "bg-red-500";
|
||||
return "bg-slate-200 dark:bg-white/10";
|
||||
}
|
||||
|
||||
function loadingBarClass(run: RunState): string {
|
||||
if (run.status === "error" && run.phase === "loading") return "bg-red-500";
|
||||
return "bg-slate-400 dark:bg-slate-500";
|
||||
}
|
||||
|
||||
function reasoningBarClass(run: RunState): string {
|
||||
if (run.status === "error" && run.phase === "reasoning") return "bg-red-500";
|
||||
return "bg-purple-500";
|
||||
}
|
||||
|
||||
function contentBarClass(run: RunState): string {
|
||||
if (run.status === "error" && run.phase === "content") return "bg-red-500";
|
||||
if (run.status === "done") return "bg-green-500";
|
||||
return "bg-amber-400 dark:bg-amber-500";
|
||||
}
|
||||
|
||||
function niceStepMs(maxMs: number): number {
|
||||
if (maxMs <= 500) return 100;
|
||||
if (maxMs <= 2000) return 500;
|
||||
if (maxMs <= 5000) return 1000;
|
||||
if (maxMs <= 20000) return 5000;
|
||||
if (maxMs <= 60000) return 10000;
|
||||
return 30000;
|
||||
}
|
||||
|
||||
function formatTickMs(ms: number): string {
|
||||
if (ms < 1000) return `${ms}`;
|
||||
return `${(ms / 1000).toFixed(ms % 1000 === 0 ? 0 : 1)}s`;
|
||||
}
|
||||
|
||||
let timelineTicks = $derived.by(() => {
|
||||
const step = niceStepMs(timelineMaxMs);
|
||||
const ticks: number[] = [];
|
||||
for (let t = 0; t <= timelineMaxMs; t += step) ticks.push(t);
|
||||
return ticks;
|
||||
});
|
||||
|
||||
function statusBadgeClass(status: Status): string {
|
||||
switch (status) {
|
||||
case "waiting":
|
||||
return "bg-gray-200 text-gray-700 dark:bg-gray-700 dark:text-gray-200";
|
||||
case "streaming":
|
||||
return "bg-amber-200 text-amber-900 dark:bg-amber-500/30 dark:text-amber-200";
|
||||
case "done":
|
||||
return "bg-green-200 text-green-900 dark:bg-green-500/30 dark:text-green-200";
|
||||
case "error":
|
||||
return "bg-red-200 text-red-900 dark:bg-red-500/30 dark:text-red-200";
|
||||
}
|
||||
}
|
||||
|
||||
function formatElapsed(ms: number): string {
|
||||
if (ms < 1000) return `${Math.round(ms)}ms`;
|
||||
return `${(ms / 1000).toFixed(2)}s`;
|
||||
}
|
||||
|
||||
function resetDefaults() {
|
||||
promptStore.set(DEFAULT_PROMPT);
|
||||
maxTokensStore.set(DEFAULT_MAX_TOKENS);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col md:flex-row gap-4 h-full min-h-0">
|
||||
<!-- Left column: run controls, model picker, settings -->
|
||||
<div class="md:w-72 shrink-0 flex flex-col gap-3 min-h-0">
|
||||
<!-- Run controls -->
|
||||
<div class="flex items-center gap-2">
|
||||
{#if isRunning}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white border-red-500" onclick={stop}>
|
||||
<span class="inline-block w-3 h-3 bg-white align-middle mr-2"></span>Stop
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
onclick={run}
|
||||
disabled={!canRun}
|
||||
title={$testListStore.length === 0 ? "Add models from the list below" : "Run concurrent requests"}
|
||||
>
|
||||
<span class="inline-block align-middle mr-2" aria-hidden="true">▶</span>Go
|
||||
</button>
|
||||
{/if}
|
||||
<button class="btn btn--sm" onclick={clearAll} disabled={isRunning || $testListStore.length === 0}>
|
||||
Clear ({$testListStore.length})
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Available models -->
|
||||
<div class="flex flex-col min-h-0 flex-1">
|
||||
<div class="text-xs font-medium text-txtsecondary mb-1">
|
||||
Models <span class="text-[10px] font-normal">— click to queue (add the same model more than once to test parallel requests)</span>
|
||||
</div>
|
||||
<div class="flex-1 border border-gray-200 dark:border-white/10 rounded overflow-y-auto min-h-0">
|
||||
{#if !hasModels}
|
||||
<div class="p-3 text-sm text-txtsecondary text-center">No models configured.</div>
|
||||
{:else}
|
||||
<ul class="divide-y divide-gray-100 dark:divide-white/5">
|
||||
{#each availableModels as m (m.id)}
|
||||
<li>
|
||||
<button
|
||||
class="w-full text-left px-2 py-1.5 text-sm hover:bg-secondary-hover transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
onclick={() => addModel(m.id)}
|
||||
disabled={isRunning}
|
||||
title="Add {m.id}"
|
||||
>
|
||||
<span class="text-primary" aria-hidden="true">+</span>
|
||||
<span class="truncate flex-1">{m.id}</span>
|
||||
</button>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Settings -->
|
||||
<div class="flex flex-col gap-2 border-t border-gray-200 dark:border-white/10 pt-3">
|
||||
<div class="flex items-center justify-between">
|
||||
<label for="concurrency-prompt" class="text-xs font-medium text-txtsecondary">Prompt</label>
|
||||
<button
|
||||
class="text-[10px] text-txtsecondary hover:text-txtmain underline"
|
||||
onclick={resetDefaults}
|
||||
disabled={isRunning}
|
||||
>
|
||||
reset defaults
|
||||
</button>
|
||||
</div>
|
||||
<textarea
|
||||
id="concurrency-prompt"
|
||||
class="w-full px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
rows="3"
|
||||
bind:value={$promptStore}
|
||||
disabled={isRunning}
|
||||
></textarea>
|
||||
<label for="concurrency-max-tokens" class="text-xs font-medium text-txtsecondary">max_tokens</label>
|
||||
<input
|
||||
id="concurrency-max-tokens"
|
||||
type="number"
|
||||
min="1"
|
||||
class="w-full px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$maxTokensStore}
|
||||
disabled={isRunning}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Right column: result panels (draggable to reorder) -->
|
||||
<div class="flex-1 min-w-0 min-h-0 overflow-y-auto">
|
||||
{#if $testListStore.length === 0}
|
||||
<div class="h-full flex items-center justify-center px-6">
|
||||
<div class="max-w-md text-sm text-txtsecondary space-y-4">
|
||||
<h4 class="text-base font-semibold text-txtmain pb-0">Load Test</h4>
|
||||
<p>
|
||||
Fire several streaming chat completions at llama-swap at the same time to see how it handles parallel
|
||||
loading and concurrent inference. Each request streams into its own panel with a live timer and status.
|
||||
</p>
|
||||
<ol class="list-decimal list-inside space-y-1">
|
||||
<li>Click models on the left to queue them — repeat a model to hit it with parallel requests.</li>
|
||||
<li>Tweak the prompt and <code>max_tokens</code> if you want.</li>
|
||||
<li>Press <span class="font-semibold text-txtmain">Go</span> to launch them concurrently.</li>
|
||||
</ol>
|
||||
<p class="text-xs">Tip: drag a result card's header to reorder, or hit × to drop it.</p>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Gantt-style timeline -->
|
||||
<div class="mb-3 border border-gray-200 dark:border-white/10 rounded">
|
||||
<button
|
||||
class="w-full flex items-center gap-2 px-2 py-1.5 text-xs font-medium text-txtsecondary hover:bg-secondary-hover transition-colors {$timelineCollapsedStore ? 'rounded' : 'rounded-t border-b border-gray-200 dark:border-white/10'}"
|
||||
onclick={() => timelineCollapsedStore.update((v) => !v)}
|
||||
aria-expanded={!$timelineCollapsedStore}
|
||||
>
|
||||
<svg
|
||||
class="w-4 h-4 transition-transform {$timelineCollapsedStore ? '-rotate-90' : ''}"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7"></path>
|
||||
</svg>
|
||||
<span>Timeline</span>
|
||||
{#if !$timelineCollapsedStore}
|
||||
<span class="flex items-center gap-3 text-[10px] text-txtsecondary font-normal ml-3" aria-hidden="true">
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-slate-200 dark:bg-white/10 border border-gray-300 dark:border-white/10"></span>waiting</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-slate-400 dark:bg-slate-500"></span>loading</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-purple-500"></span>reasoning</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-amber-400 dark:bg-amber-500"></span>streaming</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-green-500"></span>done</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-red-500"></span>error</span>
|
||||
</span>
|
||||
{/if}
|
||||
<span class="ml-auto tabular-nums text-txtsecondary">
|
||||
max {formatElapsed(timelineMaxMs)} · {$testListStore.length} request{$testListStore.length === 1 ? "" : "s"}
|
||||
</span>
|
||||
</button>
|
||||
{#if !$timelineCollapsedStore}
|
||||
<div class="px-2 py-2">
|
||||
<!-- X axis ticks -->
|
||||
<div class="flex" aria-hidden="true">
|
||||
<div class="w-40 shrink-0"></div>
|
||||
<div class="relative flex-1 h-4 border-b border-gray-200 dark:border-white/10">
|
||||
{#each timelineTicks as t (t)}
|
||||
<div
|
||||
class="absolute top-0 bottom-0 border-l border-gray-200 dark:border-white/10"
|
||||
style="left: {(t / timelineMaxMs) * 100}%;"
|
||||
>
|
||||
<span class="absolute -top-0.5 left-1 text-[10px] text-txtsecondary tabular-nums">{formatTickMs(t)}</span>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
<div class="w-16 shrink-0"></div>
|
||||
</div>
|
||||
<!-- Bars -->
|
||||
<div class="flex flex-col gap-1 mt-1">
|
||||
{#each $testListStore as entry, i (entry.id)}
|
||||
{@const run = runs[entry.id]}
|
||||
{@const waitingPct = run ? (run.waitingMs / timelineMaxMs) * 100 : 0}
|
||||
{@const loadingPct = run ? (run.loadingMs / timelineMaxMs) * 100 : 0}
|
||||
{@const reasoningPct = run ? (run.reasoningMs / timelineMaxMs) * 100 : 0}
|
||||
{@const contentPct = run ? (run.contentMs / timelineMaxMs) * 100 : 0}
|
||||
<div class="flex items-center text-xs">
|
||||
<div class="w-40 shrink-0 flex items-center gap-1 pr-2 text-txtsecondary">
|
||||
<span class="tabular-nums w-5 text-right">{i + 1}.</span>
|
||||
<span class="truncate" title={entry.model}>{entry.model}</span>
|
||||
</div>
|
||||
<div class="relative flex-1 h-4">
|
||||
{#each timelineTicks as t (t)}
|
||||
<div
|
||||
class="absolute top-0 bottom-0 border-l border-gray-100 dark:border-white/5"
|
||||
style="left: {(t / timelineMaxMs) * 100}%;"
|
||||
aria-hidden="true"
|
||||
></div>
|
||||
{/each}
|
||||
{#if run && run.waitingMs > 0}
|
||||
<div
|
||||
class="absolute top-0.5 bottom-0.5 rounded-l-sm transition-all {waitingBarClass(run)}"
|
||||
style="left: 0; width: {waitingPct}%;"
|
||||
title="waiting {formatElapsed(run.waitingMs)}"
|
||||
></div>
|
||||
{/if}
|
||||
{#if run && run.loadingMs > 0}
|
||||
<div
|
||||
class="absolute top-0.5 bottom-0.5 transition-all {loadingBarClass(run)} {run.waitingMs === 0 ? 'rounded-l-sm' : ''}"
|
||||
style="left: {waitingPct}%; width: {loadingPct}%;"
|
||||
title="loading {formatElapsed(run.loadingMs)}"
|
||||
></div>
|
||||
{/if}
|
||||
{#if run && run.reasoningMs > 0}
|
||||
<div
|
||||
class="absolute top-0.5 bottom-0.5 transition-all {reasoningBarClass(run)} {run.waitingMs === 0 && run.loadingMs === 0 ? 'rounded-l-sm' : ''}"
|
||||
style="left: {waitingPct + loadingPct}%; width: {reasoningPct}%;"
|
||||
title="reasoning {formatElapsed(run.reasoningMs)}"
|
||||
></div>
|
||||
{/if}
|
||||
{#if run && run.contentMs > 0}
|
||||
<div
|
||||
class="absolute top-0.5 bottom-0.5 transition-all {contentBarClass(run)} {run.waitingMs === 0 && run.loadingMs === 0 && run.reasoningMs === 0 ? 'rounded-l-sm' : ''} {run.status === 'done' || run.status === 'error' ? 'rounded-r-sm' : ''}"
|
||||
style="left: {waitingPct + loadingPct + reasoningPct}%; width: {contentPct}%;"
|
||||
title="content {formatElapsed(run.contentMs)}"
|
||||
></div>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="w-16 shrink-0 pl-2 tabular-nums text-txtsecondary text-right">
|
||||
{run ? formatElapsed(run.elapsedMs) : "—"}
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="grid grid-cols-1 lg:grid-cols-2 xl:grid-cols-3 gap-3" role="list">
|
||||
{#each $testListStore as entry, i (entry.id)}
|
||||
{@const run = runs[entry.id]}
|
||||
{@const status = run?.status ?? "waiting"}
|
||||
<div
|
||||
class="border rounded flex flex-col min-h-0 transition-colors {dragOverIndex === i && dragIndex !== i
|
||||
? 'border-primary ring-2 ring-primary/40'
|
||||
: 'border-gray-200 dark:border-white/10'} {dragIndex === i ? 'opacity-40' : ''}"
|
||||
style="height: 280px;"
|
||||
role="listitem"
|
||||
ondragover={(e) => onDragOver(i, e)}
|
||||
ondrop={(e) => onDrop(i, e)}
|
||||
>
|
||||
<div
|
||||
class="shrink-0 flex items-center gap-2 px-2 py-1.5 border-b border-gray-200 dark:border-white/10 bg-secondary/40 rounded-t"
|
||||
draggable={!isRunning}
|
||||
role="button"
|
||||
tabindex="-1"
|
||||
aria-label="Drag to reorder {entry.model}"
|
||||
ondragstart={(e) => onDragStart(i, e)}
|
||||
ondragend={onDragEnd}
|
||||
class:cursor-grab={!isRunning}
|
||||
title={isRunning ? "" : "Drag to reorder"}
|
||||
>
|
||||
<span class="text-txtsecondary select-none" aria-hidden="true">⋮⋮</span>
|
||||
<span class="text-txtsecondary tabular-nums text-xs w-5 text-right">{i + 1}.</span>
|
||||
<span class="flex-1 truncate text-sm font-medium" title={entry.model}>{entry.model}</span>
|
||||
<span class="text-xs tabular-nums text-txtsecondary">
|
||||
{run ? formatElapsed(run.elapsedMs) : "—"}
|
||||
</span>
|
||||
<span class="status text-[10px] {statusBadgeClass(status)}">{status}</span>
|
||||
<button
|
||||
class="w-5 h-5 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
onclick={() => removeEntry(entry.id)}
|
||||
disabled={isRunning}
|
||||
aria-label="Remove"
|
||||
tabindex="-1"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</div>
|
||||
<div class="flex-1 min-h-0 overflow-y-auto font-mono text-xs px-2 py-1.5">
|
||||
{#if run?.loadingText}
|
||||
<div class="bg-secondary/40 dark:bg-white/5 text-txtsecondary rounded px-2 py-1 mb-2 whitespace-pre-wrap">{run.loadingText.trim()}</div>
|
||||
{/if}
|
||||
{#if run?.reasoningContent}
|
||||
<div class="text-purple-700 dark:text-purple-300 whitespace-pre-wrap">{run.reasoningContent}</div>
|
||||
{/if}
|
||||
{#if run?.content}
|
||||
<div class="whitespace-pre-wrap {run.reasoningContent ? 'mt-2' : ''}">{run.content}</div>
|
||||
{/if}
|
||||
{#if run?.status === "error" && run?.error}
|
||||
<div class="text-red-500 mt-2">[error] {run.error}</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
@@ -402,7 +402,7 @@
|
||||
<p class="text-sm text-txtsecondary">
|
||||
This is an experimental feature. Please use <a
|
||||
class="underline hover:text-txtmain"
|
||||
href="https://github.com/mostlygeek/llama-swap/discussions/771">discussion #711</a
|
||||
href="https://github.com/mostlygeek/llama-swap/discussions/771">discussion #771</a
|
||||
> for instructions and to share feedback.
|
||||
</p>
|
||||
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
||||
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
||||
import RerankInterface from "../components/playground/RerankInterface.svelte";
|
||||
import ConcurrencyInterface from "../components/playground/ConcurrencyInterface.svelte";
|
||||
|
||||
type Tab = "chat" | "images" | "speech" | "audio" | "rerank";
|
||||
type Tab = "chat" | "images" | "speech" | "audio" | "rerank" | "concurrency";
|
||||
|
||||
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
||||
let mobileMenuOpen = $state(false);
|
||||
@@ -17,6 +18,7 @@
|
||||
{ id: "speech", label: "Speech" },
|
||||
{ id: "audio", label: "Transcription" },
|
||||
{ id: "rerank", label: "Rerank" },
|
||||
{ id: "concurrency", label: "Load Test" },
|
||||
];
|
||||
|
||||
function selectTab(tab: Tab) {
|
||||
@@ -25,7 +27,7 @@
|
||||
}
|
||||
|
||||
function getTabLabel(tabId: Tab): string {
|
||||
return tabs.find(t => t.id === tabId)?.label || "";
|
||||
return tabs.find((t) => t.id === tabId)?.label || "";
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -49,10 +51,15 @@
|
||||
</svg>
|
||||
</button>
|
||||
{#if mobileMenuOpen}
|
||||
<div class="absolute top-full left-0 right-0 mt-1 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-10">
|
||||
<div
|
||||
class="absolute top-full left-0 right-0 mt-1 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-10"
|
||||
>
|
||||
{#each tabs as tab (tab.id)}
|
||||
<button
|
||||
class="w-full px-4 py-2 text-left hover:bg-secondary-hover transition-colors first:rounded-t last:rounded-b {$selectedTabStore === tab.id ? 'bg-primary/10 font-medium' : ''}"
|
||||
class="w-full px-4 py-2 text-left hover:bg-secondary-hover transition-colors first:rounded-t last:rounded-b {$selectedTabStore ===
|
||||
tab.id
|
||||
? 'bg-primary/10 font-medium'
|
||||
: ''}"
|
||||
onclick={() => selectTab(tab.id)}
|
||||
>
|
||||
{tab.label}
|
||||
@@ -94,6 +101,9 @@
|
||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "rerank"}>
|
||||
<RerankInterface />
|
||||
</div>
|
||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "concurrency"}>
|
||||
<ConcurrencyInterface />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ export default defineConfig({
|
||||
],
|
||||
base: "/ui/",
|
||||
build: {
|
||||
outDir: "../proxy/ui_dist",
|
||||
outDir: "../internal/server/ui_dist",
|
||||
assetsDir: "assets",
|
||||
},
|
||||
server: {
|
||||
|
||||
Reference in New Issue
Block a user