CodeRabbit Generated Unit Tests: Generate unit tests for PR changes

2026-06-16 12:47:43 +00:00
34 changed files with 614 additions and 2161 deletions
@@ -15,8 +15,6 @@ reviews:
  auto_review:
    enabled: false
    drafts: false
-  unit_tests:
-    enabled: false
 chat:
  auto_reply: true
 issue_enrichment:
@@ -572,24 +572,6 @@
            "default": {},
            "description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
        },
-        "upstream": {
-            "type": "object",
-            "description": "Controls behaviour of the /upstream passthrough endpoint. Recommended to only use in special use cases; leaving it as the default will typically be the best experience.",
-            "properties": {
-                "ignorePaths": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    },
-                    "default": [
-                        ".*\\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$"
-                    ],
-                    "description": "List of RE2 compatible regular expressions. Any request to a path matching any of the regular expressions will be ignored and not trigger a swap. When not specified, defaults to a pattern matching common static-asset suffixes (.js, .json, .css, .png, .gif, .jpg, .jpeg, .ico, .txt)."
-                }
-            },
-            "additionalProperties": false,
-            "default": {}
-        },
        "routing": {
            "type": "object",
            "description": "Canonical routing/scheduling configuration. Alternative to the legacy top-level 'groups'/'matrix' keys; a config must not use both styles.",
@@ -134,18 +134,6 @@ apiKeys:
  - "${env.API_KEY_1}"
  - "${env.API_KEY_2}"

-# upstream: controls behaviour of the /upstream passthrough endpoint
-# - optional, default: empty dictionary
-# - recommended to only use in special use cases. Leaving it as the
-#   default will typically be the best experience
-upstream:
-  # ignorePaths: list of RE2 compatible regular expressions
-  # - default: (see below)
-  # - any request to a path matching any of the regular expressions
-  #   will be ignored and not trigger a swap
-  ignorePaths:
-    - '.*\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$'
-
 # models: a dictionary of model configurations
 # - required
 # - each key is the model's ID, used in API requests
@@ -163,9 +163,6 @@ type Config struct {

 	// support remote peers, see issue #433, #296
 	Peers PeerDictionaryConfig `yaml:"peers"`
-
-	// upstream controls behaviour of the /upstream passthrough endpoint
-	Upstream UpstreamConfig `yaml:"upstream"`
 }

 // RoutingConfig is the canonical, normalized routing/scheduling configuration.
@@ -273,12 +270,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		return Config{}, fmt.Errorf("globalTTL must be >= 0")
 	}

-	// Apply default for upstream.ignorePaths when not specified. The default
-	// matches common static-asset suffixes so they do not trigger a swap.
-	if len(config.Upstream.IgnorePaths) == 0 {
-		config.Upstream.IgnorePaths = DefaultUpstreamIgnorePaths()
-	}
-
 	switch config.LogToStdout {
 	case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
 	default:
@@ -266,9 +266,6 @@ groups:
 			"mthree":    "model3",
 		},
 		Groups: expectedGroups,
-		Upstream: UpstreamConfig{
-			IgnorePaths: DefaultUpstreamIgnorePaths(),
-		},
 		Routing: RoutingConfig{
 			Router: RouterConfig{
 				Use: "group",
@@ -255,9 +255,6 @@ groups:
 			"mthree":    "model3",
 		},
 		Groups: expectedGroups,
-		Upstream: UpstreamConfig{
-			IgnorePaths: DefaultUpstreamIgnorePaths(),
-		},
 		Routing: RoutingConfig{
 			Router: RouterConfig{
 				Use: "group",
@@ -1,55 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"regexp"
-
-	"gopkg.in/yaml.v3"
-)
-
-// DefaultUpstreamIgnorePathsPattern is the default regular expression applied
-// to upstream.ignorePaths when the section is empty or absent from the config.
-// It matches common static-asset suffixes so requests for .js/.css/.png/etc.
-// files do not trigger a model swap.
-const DefaultUpstreamIgnorePathsPattern = `.*\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$`
-
-// DefaultUpstreamIgnorePaths returns the default compiled ignore paths used
-// when upstream.ignorePaths is not specified in the config. The returned slice
-// is fresh so callers may mutate it without affecting other configs.
-func DefaultUpstreamIgnorePaths() []*regexp.Regexp {
-	return []*regexp.Regexp{regexp.MustCompile(DefaultUpstreamIgnorePathsPattern)}
-}
-
-// UpstreamConfig controls behaviour of the /upstream passthrough endpoint.
-type UpstreamConfig struct {
-	// IgnorePaths is a slice of compiled regular expressions. Any request to
-	// /upstream/<model>/<path> whose remaining path matches any of these
-	// expressions will be ignored and not trigger a swap. When the config
-	// does not specify any patterns, DefaultUpstreamIgnorePaths is applied.
-	IgnorePaths []*regexp.Regexp `yaml:"-"`
-}
-
-// rawUpstreamConfig is the intermediate form used to unmarshal the YAML into
-// plain strings, which are then compiled into *regexp.Regexp.
-type rawUpstreamConfig struct {
-	IgnorePaths []string `yaml:"ignorePaths"`
-}
-
-// UnmarshalYAML compiles each ignorePaths entry into a *regexp.Regexp. If any
-// entry fails to compile, an error is returned.
-func (u *UpstreamConfig) UnmarshalYAML(value *yaml.Node) error {
-	var raw rawUpstreamConfig
-	if err := value.Decode(&raw); err != nil {
-		return err
-	}
-	patterns := make([]*regexp.Regexp, 0, len(raw.IgnorePaths))
-	for _, p := range raw.IgnorePaths {
-		re, err := regexp.Compile(p)
-		if err != nil {
-			return fmt.Errorf("upstream.ignorePaths: invalid regular expression %q: %w", p, err)
-		}
-		patterns = append(patterns, re)
-	}
-	u.IgnorePaths = patterns
-	return nil
-}
@@ -1,88 +0,0 @@
-package config
-
-import (
-	"regexp"
-	"strings"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-const upstreamConfigHeader = `
-models:
-  model1:
-    cmd: path/to/cmd --arg1 one
-    proxy: "http://localhost:8080"
-`
-
-func TestConfig_UpstreamIgnorePaths_DefaultWhenAbsent(t *testing.T) {
-	// When upstream is not specified at all, the default pattern is applied.
-	content := upstreamConfigHeader
-	cfg, err := LoadConfigFromReader(strings.NewReader(content))
-	require.NoError(t, err)
-	require.Len(t, cfg.Upstream.IgnorePaths, 1)
-
-	def := cfg.Upstream.IgnorePaths[0]
-	assert.IsType(t, &regexp.Regexp{}, def)
-	assert.Equal(t, DefaultUpstreamIgnorePathsPattern, def.String())
-
-	// The default matches common static-asset suffixes.
-	assert.True(t, def.MatchString("/foo.js"))
-	assert.True(t, def.MatchString("/bar/baz.json"))
-	assert.True(t, def.MatchString("/static/img.png"))
-	assert.True(t, def.MatchString("/notes.txt"))
-	assert.True(t, def.MatchString("/favicon.ico"))
-	// And does not match inference API paths.
-	assert.False(t, def.MatchString("/v1/chat/completions"))
-	assert.False(t, def.MatchString("/v1/models"))
-	assert.False(t, def.MatchString("/health"))
-}
-
-func TestConfig_UpstreamIgnorePaths_DefaultWhenSectionEmpty(t *testing.T) {
-	// When upstream is present but ignorePaths is omitted, the default is still
-	// applied.
-	content := `upstream: {}` + "\n" + upstreamConfigHeader
-	cfg, err := LoadConfigFromReader(strings.NewReader(content))
-	require.NoError(t, err)
-	require.Len(t, cfg.Upstream.IgnorePaths, 1)
-	assert.Equal(t, DefaultUpstreamIgnorePathsPattern, cfg.Upstream.IgnorePaths[0].String())
-}
-
-func TestConfig_UpstreamIgnorePaths_Compiles(t *testing.T) {
-	content := `
-upstream:
-  ignorePaths:
-    - ".*\\.(js|json|css|png|gif|jpg|jpeg|txt)$"
-    - "^/static/.*"
-` + upstreamConfigHeader
-
-	cfg, err := LoadConfigFromReader(strings.NewReader(content))
-	require.NoError(t, err)
-	require.Len(t, cfg.Upstream.IgnorePaths, 2)
-
-	// Verify the patterns are compiled into *regexp.Regexp and match as expected.
-	assert.True(t, cfg.Upstream.IgnorePaths[0].MatchString("/foo.js"))
-	assert.True(t, cfg.Upstream.IgnorePaths[0].MatchString("/bar/baz.json"))
-	assert.False(t, cfg.Upstream.IgnorePaths[0].MatchString("/v1/chat/completions"))
-	assert.True(t, cfg.Upstream.IgnorePaths[1].MatchString("/static/foo.png"))
-	assert.False(t, cfg.Upstream.IgnorePaths[1].MatchString("/v1/chat/completions"))
-
-	// Confirm the type is *regexp.Regexp to satisfy the API contract.
-	for _, re := range cfg.Upstream.IgnorePaths {
-		assert.IsType(t, &regexp.Regexp{}, re)
-	}
-}
-
-func TestConfig_UpstreamIgnorePaths_InvalidRegexReturnsError(t *testing.T) {
-	content := `
-upstream:
-  ignorePaths:
-    - "[invalid("
-` + upstreamConfigHeader
-
-	_, err := LoadConfigFromReader(strings.NewReader(content))
-	require.Error(t, err)
-	assert.Contains(t, err.Error(), "upstream.ignorePaths")
-	assert.Contains(t, err.Error(), "invalid regular expression")
-}
@@ -1,92 +0,0 @@
-package perf
-
-type LUID struct {
-	LowPart  uint32
-	HighPart int32
-}
-
-const maxEnumAdapters = 16
-
-type D3DKMT_ENUMADAPTERS2 struct {
-	NumAdapters uint32
-	pAdapters   uintptr
-}
-
-type D3DKMT_ADAPTERINFO struct {
-	hAdapter                     uint32
-	AdapterLuid                  LUID
-	NumOfSources                 uint32
-	bPresentMoveRegionsPreferred int32
-}
-
-type D3DKMT_OPENADAPTERFROMLUID struct {
-	AdapterLuid LUID
-	hAdapter    uint32
-}
-
-type D3DKMT_CLOSEADAPTER struct {
-	hAdapter uint32
-}
-
-type KMTQUERYADAPTERINFOTYPE int32
-
-const (
-	KMTQAITYPE_UMDRIVERPRIVATE          KMTQUERYADAPTERINFOTYPE = 0
-	KMTQAITYPE_ADAPTERREGISTRYINFO      KMTQUERYADAPTERINFOTYPE = 8
-	KMTQAITYPE_DRIVERVERSION            KMTQUERYADAPTERINFOTYPE = 13
-	KMTQAITYPE_PHYSICALADAPTERDEVICEIDS KMTQUERYADAPTERINFOTYPE = 31
-	KMTQAITYPE_NODEPERFDATA             KMTQUERYADAPTERINFOTYPE = 61
-	KMTQAITYPE_ADAPTERPERFDATA          KMTQUERYADAPTERINFOTYPE = 62
-	KMTQAITYPE_ADAPTERPERFDATA_CAPS     KMTQUERYADAPTERINFOTYPE = 63
-)
-
-type D3DKMT_QUERYADAPTERINFO struct {
-	hAdapter              uint32
-	Type                  KMTQUERYADAPTERINFOTYPE
-	pPrivateDriverData    uintptr
-	PrivateDriverDataSize uint32
-}
-
-type D3DKMT_ADAPTER_PERFDATA struct {
-	PhysicalAdapterIndex uint32
-	MemoryFrequency      uint64
-	MaxMemoryFrequency   uint64
-	MaxMemoryFrequencyOC uint64
-	MemoryBandwidth      uint64
-	PCIEBandwidth        uint64
-	FanRPM               uint32
-	Power                uint32
-	Temperature          uint32
-	PowerStateOverride   byte
-}
-
-type D3DKMT_QUERYSTATISTICS_TYPE int32
-
-const (
-	D3DKMT_QUERYSTATISTICS_ADAPTER             D3DKMT_QUERYSTATISTICS_TYPE = 0
-	D3DKMT_QUERYSTATISTICS_PROCESS             D3DKMT_QUERYSTATISTICS_TYPE = 1
-	D3DKMT_QUERYSTATISTICS_PROCESS_ADAPTER     D3DKMT_QUERYSTATISTICS_TYPE = 2
-	D3DKMT_QUERYSTATISTICS_SEGMENT             D3DKMT_QUERYSTATISTICS_TYPE = 3
-	D3DKMT_QUERYSTATISTICS_PROCESS_SEGMENT     D3DKMT_QUERYSTATISTICS_TYPE = 4
-	D3DKMT_QUERYSTATISTICS_NODE                D3DKMT_QUERYSTATISTICS_TYPE = 5
-	D3DKMT_QUERYSTATISTICS_PROCESS_NODE        D3DKMT_QUERYSTATISTICS_TYPE = 6
-	D3DKMT_QUERYSTATISTICS_VIDPNSOURCE         D3DKMT_QUERYSTATISTICS_TYPE = 7
-	D3DKMT_QUERYSTATISTICS_PROCESS_VIDPNSOURCE D3DKMT_QUERYSTATISTICS_TYPE = 8
-)
-
-type D3DKMT_ADAPTER_PERFDATACAPS struct {
-	PhysicalAdapterIndex uint32
-	MaxMemoryBandwidth   uint64
-	MaxPCIEBandwidth     uint64
-	MaxFanRPM            uint32
-	TemperatureMax       uint32
-	TemperatureWarning   uint32
-}
-
-type D3DKMT_QUERYSTATISTICS_QUERY_SEGMENT struct {
-	SegmentId uint32
-}
-
-type D3DKMT_QUERYSTATISTICS_QUERY_NODE struct {
-	NodeId uint32
-}
@@ -1,529 +0,0 @@
-//go:build windows
-
-package perf
-
-import (
-	"context"
-	"encoding/binary"
-	"fmt"
-	"sync"
-	"time"
-	"unsafe"
-
-	"github.com/mostlygeek/llama-swap/internal/logmon"
-	"golang.org/x/sys/windows"
-)
-
-var (
-	d3dkmDLL                *windows.LazyDLL
-	procEnumAdapters2       *windows.LazyProc
-	procOpenAdapterFromLuid *windows.LazyProc
-	procCloseAdapter        *windows.LazyProc
-	procQueryAdapterInfo    *windows.LazyProc
-	procQueryStatistics     *windows.LazyProc
-	d3dkmtInitOnce          sync.Once
-	d3dkmtInitErr           error
-)
-
-// initD3DKMT lazily loads gdi32.dll and resolves D3DKMT function pointers.
-// Safe for concurrent use via sync.Once.
-func initD3DKMT() error {
-	d3dkmtInitOnce.Do(func() {
-		d3dkmDLL = windows.NewLazySystemDLL("gdi32.dll")
-
-		procEnumAdapters2 = d3dkmDLL.NewProc("D3DKMTEnumAdapters2")
-		procOpenAdapterFromLuid = d3dkmDLL.NewProc("D3DKMTOpenAdapterFromLuid")
-		procCloseAdapter = d3dkmDLL.NewProc("D3DKMTCloseAdapter")
-		procQueryAdapterInfo = d3dkmDLL.NewProc("D3DKMTQueryAdapterInfo")
-		procQueryStatistics = d3dkmDLL.NewProc("D3DKMTQueryStatistics")
-
-		for name, p := range map[string]*windows.LazyProc{
-			"D3DKMTEnumAdapters2":       procEnumAdapters2,
-			"D3DKMTOpenAdapterFromLuid": procOpenAdapterFromLuid,
-			"D3DKMTCloseAdapter":        procCloseAdapter,
-			"D3DKMTQueryAdapterInfo":    procQueryAdapterInfo,
-			"D3DKMTQueryStatistics":     procQueryStatistics,
-		} {
-			if err := p.Find(); err != nil {
-				d3dkmtInitErr = fmt.Errorf("D3DKMT %s not found: %w", name, err)
-				return
-			}
-		}
-	})
-	return d3dkmtInitErr
-}
-
-// ntstatusCall invokes a D3DKMT function and returns a non-nil error if the
-// NTSTATUS result is not STATUS_SUCCESS (0).
-func ntstatusCall(proc *windows.LazyProc, arg unsafe.Pointer) error {
-	ret, _, _ := proc.Call(uintptr(arg))
-	if ret != 0 {
-		return fmt.Errorf("NTSTATUS 0x%08x", uint32(ret))
-	}
-	return nil
-}
-
-// d3dkmEnumerateAdapters enumerates all available graphics adapters via
-// D3DKMTEnumAdapters2.
-func d3dkmEnumerateAdapters() ([]D3DKMT_ADAPTERINFO, error) {
-	var adapters [maxEnumAdapters]D3DKMT_ADAPTERINFO
-	enum := D3DKMT_ENUMADAPTERS2{
-		NumAdapters: maxEnumAdapters,
-		pAdapters:   uintptr(unsafe.Pointer(&adapters[0])),
-	}
-	if err := ntstatusCall(procEnumAdapters2, unsafe.Pointer(&enum)); err != nil {
-		return nil, fmt.Errorf("EnumAdapters2: %w", err)
-	}
-	if enum.NumAdapters == 0 {
-		return nil, fmt.Errorf("no adapters found")
-	}
-	result := make([]D3DKMT_ADAPTERINFO, enum.NumAdapters)
-	for i := uint32(0); i < enum.NumAdapters; i++ {
-		result[i] = adapters[i]
-	}
-	return result, nil
-}
-
-// d3dkmOpenAdapter opens a D3DKMT adapter handle for the given LUID.
-func d3dkmOpenAdapter(luid LUID) (uint32, error) {
-	req := D3DKMT_OPENADAPTERFROMLUID{
-		AdapterLuid: luid,
-	}
-	if err := ntstatusCall(procOpenAdapterFromLuid, unsafe.Pointer(&req)); err != nil {
-		return 0, fmt.Errorf("OpenAdapterFromLuid: %w", err)
-	}
-	return req.hAdapter, nil
-}
-
-// d3dkmCloseAdapter closes a previously opened D3DKMT adapter handle.
-func d3dkmCloseAdapter(hAdapter uint32) error {
-	req := D3DKMT_CLOSEADAPTER{hAdapter: hAdapter}
-	return ntstatusCall(procCloseAdapter, unsafe.Pointer(&req))
-}
-
-// d3dkmGetAdapterPerfData queries per-adapter performance data (temperature,
-// fan RPM, power, bandwidth) via KMTQAITYPE_ADAPTERPERFDATA.
-func d3dkmGetAdapterPerfData(hAdapter uint32) (*D3DKMT_ADAPTER_PERFDATA, error) {
-	var data D3DKMT_ADAPTER_PERFDATA
-	req := D3DKMT_QUERYADAPTERINFO{
-		hAdapter:              hAdapter,
-		Type:                  KMTQAITYPE_ADAPTERPERFDATA,
-		pPrivateDriverData:    uintptr(unsafe.Pointer(&data)),
-		PrivateDriverDataSize: uint32(unsafe.Sizeof(data)),
-	}
-	if err := ntstatusCall(procQueryAdapterInfo, unsafe.Pointer(&req)); err != nil {
-		return nil, fmt.Errorf("QueryAdapterInfo(ADAPTERPERFDATA): %w", err)
-	}
-	return &data, nil
-}
-
-// d3dkmGetAdapterPerfDataCaps queries static adapter performance capabilities
-// (max fan RPM, temperature limits, max bandwidth) via KMTQAITYPE_ADAPTERPERFDATA_CAPS.
-func d3dkmGetAdapterPerfDataCaps(hAdapter uint32) (*D3DKMT_ADAPTER_PERFDATACAPS, error) {
-	var data D3DKMT_ADAPTER_PERFDATACAPS
-	req := D3DKMT_QUERYADAPTERINFO{
-		hAdapter:              hAdapter,
-		Type:                  KMTQAITYPE_ADAPTERPERFDATA_CAPS,
-		pPrivateDriverData:    uintptr(unsafe.Pointer(&data)),
-		PrivateDriverDataSize: uint32(unsafe.Sizeof(data)),
-	}
-	if err := ntstatusCall(procQueryAdapterInfo, unsafe.Pointer(&req)); err != nil {
-		return nil, fmt.Errorf("QueryAdapterInfo(ADAPTERPERFDATACAPS): %w", err)
-	}
-	return &data, nil
-}
-
-type queryStatsBuffer struct {
-	Type        int32   // offset 0
-	AdapterLuid LUID    // offset 4
-	hProcess    uintptr // offset 16
-	// _result mirrors the D3DKMT_QUERYSTATISTICS_RESULT union.
-	// sizeof(D3DKMT_QUERYSTATISTICS) == 0x328 (808 bytes) on x64.
-	//
-	// The C struct layout (x64):
-	//   offset  0: Type (int32, 4 bytes)
-	//   offset  4: AdapterLuid (LUID, 8 bytes)
-	//   offset 12: 4 bytes padding (for 8-byte alignment of hProcess)
-	//   offset 16: hProcess (HANDLE, 8 bytes)
-	//   offset 24: QueryResult (union, 780 bytes — largest member is AdapterInformation)
-	//   offset 804: anonymous input union (QueryNode.NodeId / QuerySegment.SegmentId, 4 bytes)
-	//
-	// Previous bug: _result was [776]byte, placing QueryId at offset 800 instead of 804.
-	// The kernel read NodeId/SegmentId from offset 804 (always zero from _pad),
-	// causing all NODE and SEGMENT queries to use index 0 regardless of the value
-	// passed in QueryId. This produced alternating behavior where only GPU util OR
-	// memory util appeared to work, depending on which test variant happened to put
-	// non-zero data near offset 804 in the result buffer.
-	_result [780]byte // offset 24, size 780 — places QueryId at offset 804
-	QueryId int32     // offset 804 — matches C anonymous union for NodeId/SegmentId
-}
-
-func init() {
-	var buf queryStatsBuffer
-	if unsafe.Sizeof(buf) != 808 {
-		panic(fmt.Sprintf("queryStatsBuffer size %d != expected 808 (sizeof D3DKMT_QUERYSTATISTICS on x64)", unsafe.Sizeof(buf)))
-	}
-	if unsafe.Offsetof(buf.QueryId) != 804 {
-		panic(fmt.Sprintf("queryStatsBuffer.QueryId offset %d != expected 804 (C anonymous union offset)", unsafe.Offsetof(buf.QueryId)))
-	}
-
-	var perfData D3DKMT_ADAPTER_PERFDATA
-	if unsafe.Sizeof(perfData) != 64 {
-		panic(fmt.Sprintf("D3DKMT_ADAPTER_PERFDATA size %d != expected 64 on x64", unsafe.Sizeof(perfData)))
-	}
-
-	var caps D3DKMT_ADAPTER_PERFDATACAPS
-	if unsafe.Sizeof(caps) != 40 {
-		panic(fmt.Sprintf("D3DKMT_ADAPTER_PERFDATACAPS size %d != expected 40 on x64", unsafe.Sizeof(caps)))
-	}
-}
-
-const (
-	qsoffsetNbSegments        = 0
-	qsoffsetNodeCount         = 4
-	qsoffsetCommitLimit       = 0
-	qsoffsetBytesCommitted    = 8
-	qsoffsetBytesResident     = 16
-	qsoffsetRunningTime       = 0
-	qsoffsetSystemRunningTime = 272
-)
-
-// d3dkmQueryAdapterStats returns the number of memory segments and compute
-// nodes for the adapter identified by luid.
-func d3dkmQueryAdapterStats(luid LUID) (nbSegments uint32, nodeCount uint32, err error) {
-	buf := queryStatsBuffer{
-		Type:        int32(D3DKMT_QUERYSTATISTICS_ADAPTER),
-		AdapterLuid: luid,
-	}
-	if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
-		return 0, 0, fmt.Errorf("QueryStatistics(ADAPTER): %w", err)
-	}
-	nbSegments = binary.LittleEndian.Uint32(buf._result[qsoffsetNbSegments : qsoffsetNbSegments+4])
-	nodeCount = binary.LittleEndian.Uint32(buf._result[qsoffsetNodeCount : qsoffsetNodeCount+4])
-	return nbSegments, nodeCount, nil
-}
-
-// d3dkmQuerySegmentStats returns the commit limit (total) and resident
-// (used) bytes for the given memory segment of an adapter.
-func d3dkmQuerySegmentStats(luid LUID, segmentID uint32) (commitLimit uint64, bytesResident uint64, err error) {
-	buf := queryStatsBuffer{
-		Type:        int32(D3DKMT_QUERYSTATISTICS_SEGMENT),
-		AdapterLuid: luid,
-		QueryId:     int32(segmentID),
-	}
-	if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
-		return 0, 0, fmt.Errorf("QueryStatistics(SEGMENT %d): %w", segmentID, err)
-	}
-	commitLimit = binary.LittleEndian.Uint64(buf._result[qsoffsetCommitLimit : qsoffsetCommitLimit+8])
-	bytesResident = binary.LittleEndian.Uint64(buf._result[qsoffsetBytesResident : qsoffsetBytesResident+8])
-	if bytesResident == 0 {
-		bytesResident = binary.LittleEndian.Uint64(buf._result[qsoffsetBytesCommitted : qsoffsetBytesCommitted+8])
-	}
-	return commitLimit, bytesResident, nil
-}
-
-// d3dkmQueryNodeStats returns the global and system running time counters
-// (in 100ns units) for the given compute node of an adapter.
-func d3dkmQueryNodeStats(luid LUID, nodeID uint32) (runningTime uint64, systemRunningTime uint64, err error) {
-	buf := queryStatsBuffer{
-		Type:        int32(D3DKMT_QUERYSTATISTICS_NODE),
-		AdapterLuid: luid,
-		QueryId:     int32(nodeID),
-	}
-	if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
-		return 0, 0, fmt.Errorf("QueryStatistics(NODE %d): %w", nodeID, err)
-	}
-	runningTime = binary.LittleEndian.Uint64(buf._result[qsoffsetRunningTime : qsoffsetRunningTime+8])
-	systemRunningTime = binary.LittleEndian.Uint64(buf._result[qsoffsetSystemRunningTime : qsoffsetSystemRunningTime+8])
-	return runningTime, systemRunningTime, nil
-}
-
-type nodeRunningTimes struct {
-	Global uint64
-	System uint64
-}
-
-// d3dkmtNodeUtil computes GPU node utilization as a percentage from running
-// time deltas. Returns -1 if counters went backwards (wrap/reset), 0 if idle.
-func d3dkmtNodeUtil(prevRT, curRT nodeRunningTimes, elapsed100ns int64) float64 {
-	if curRT.Global < prevRT.Global || curRT.System < prevRT.System {
-		return -1
-	}
-	gd := curRT.Global - prevRT.Global
-	sd := curRT.System - prevRT.System
-
-	if gd > 0 && sd > 0 {
-		util := float64(gd) / float64(sd)
-		if util > 1.0 {
-			util = 1.0
-		}
-		return util * 100.0
-	} else if gd > 0 && elapsed100ns > 0 {
-		util := float64(gd) / float64(elapsed100ns) * 100.0
-		if util > 100.0 {
-			util = 100.0
-		}
-		return util
-	}
-	return 0
-}
-
-// d3dkmtFanPct returns fan speed as a percentage of maxFanRPM, clamped to
-// 100%. Returns 0 if maxFanRPM is unavailable or fan is not spinning.
-func d3dkmtFanPct(fanRPM, maxFanRPM uint32) float64 {
-	if maxFanRPM > 0 && fanRPM > 0 {
-		pct := float64(fanRPM) / float64(maxFanRPM) * 100.0
-		if pct > 100.0 {
-			pct = 100.0
-		}
-		return pct
-	}
-	return 0
-}
-
-// d3dkmtPowerW converts power from deci-watts (as reported by D3DKMT) to
-// watts. Returns 0 if the power value is zero.
-func d3dkmtPowerW(power uint32) float64 {
-	if power > 0 {
-		return float64(power) / 10.0
-	}
-	return 0
-}
-
-// d3dkmtTempC converts temperature from deci-Celsius (as reported by D3DKMT)
-// to degrees Celsius.
-func d3dkmtTempC(tempDeciC uint32) int {
-	return int(tempDeciC / 10)
-}
-
-type d3dkmtAdapterState struct {
-	luid       LUID
-	hAdapter   uint32
-	nbSegments uint32
-	nodeCount  uint32
-	maxFanRPM  uint32
-	prevNodeRT map[uint32]nodeRunningTimes
-	prevTime   time.Time
-}
-
-// tryD3DKMT attempts to start GPU monitoring using D3DKMT and optional PDH
-// counters. It returns a channel of GpuStat snapshots or an error if no
-// usable adapters are found.
-func tryD3DKMT(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
-	if err := initD3DKMT(); err != nil {
-		return nil, err
-	}
-
-	adapterInfos, err := d3dkmEnumerateAdapters()
-	if err != nil {
-		return nil, err
-	}
-
-	type adapterMeta struct {
-		luid       LUID
-		nbSegments uint32
-		nodeCount  uint32
-		maxFanRPM  uint32
-	}
-
-	var metaList []adapterMeta
-
-	for i, ai := range adapterInfos {
-		hAdapter, err := d3dkmOpenAdapter(ai.AdapterLuid)
-		if err != nil {
-			logger.Debugf("adapter %d: open failed: %s", i, err.Error())
-			continue
-		}
-
-		nbSegments, nodeCount, err := d3dkmQueryAdapterStats(ai.AdapterLuid)
-		if err != nil {
-			logger.Debugf("adapter %d: query stats failed: %s", i, err.Error())
-			d3dkmCloseAdapter(hAdapter)
-			continue
-		}
-
-		caps, err := d3dkmGetAdapterPerfDataCaps(hAdapter)
-		if err != nil {
-			logger.Debugf("adapter %d: perf caps failed: %s", i, err.Error())
-		}
-
-		d3dkmCloseAdapter(hAdapter)
-
-		var maxFanRPM uint32
-		if caps != nil {
-			maxFanRPM = caps.MaxFanRPM
-		}
-
-		metaList = append(metaList, adapterMeta{
-			luid:       ai.AdapterLuid,
-			nbSegments: nbSegments,
-			nodeCount:  nodeCount,
-			maxFanRPM:  maxFanRPM,
-		})
-		logger.Debugf("adapter %d: segments=%d nodes=%d fan_max=%d luid=%d:%d", i, nbSegments, nodeCount, maxFanRPM, ai.AdapterLuid.HighPart, ai.AdapterLuid.LowPart)
-	}
-
-	if len(metaList) == 0 {
-		return nil, fmt.Errorf("no usable D3DKMT adapters found")
-	}
-
-	pdhUtil, pdhErr := initPdhGpuUtil()
-	if pdhErr != nil {
-		logger.Debugf("PDH GPU utilization not available: %s", pdhErr.Error())
-	} else {
-		logger.Info("using PDH performance counters for GPU utilization")
-	}
-
-	ch := make(chan []GpuStat, 1)
-
-	go func() {
-		defer close(ch)
-		if pdhUtil != nil {
-			defer pdhUtil.close()
-		}
-
-		var adapters []d3dkmtAdapterState
-		for _, m := range metaList {
-			hAdapter, err := d3dkmOpenAdapter(m.luid)
-			if err != nil {
-				logger.Debugf("reopen adapter failed: %s", err.Error())
-				continue
-			}
-			adapters = append(adapters, d3dkmtAdapterState{
-				luid:       m.luid,
-				hAdapter:   hAdapter,
-				nbSegments: m.nbSegments,
-				nodeCount:  m.nodeCount,
-				maxFanRPM:  m.maxFanRPM,
-				prevNodeRT: make(map[uint32]nodeRunningTimes),
-			})
-		}
-
-		if len(adapters) == 0 {
-			return
-		}
-
-		defer func() {
-			for _, a := range adapters {
-				d3dkmCloseAdapter(a.hAdapter)
-			}
-		}()
-
-		for i := range adapters {
-			a := &adapters[i]
-			for node := uint32(0); node < a.nodeCount; node++ {
-				globalRT, systemRT, err := d3dkmQueryNodeStats(a.luid, node)
-				if err != nil {
-					continue
-				}
-				a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
-			}
-			a.prevTime = time.Now()
-		}
-
-		ticker := time.NewTicker(every)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				stats := make([]GpuStat, 0, len(adapters))
-				now := time.Now()
-
-				var pdhUtilMap map[LUID]float64
-				if pdhUtil != nil {
-					pdhUtilMap = pdhUtil.collect()
-				}
-
-				for i := range adapters {
-					a := &adapters[i]
-
-					perfData, err := d3dkmGetAdapterPerfData(a.hAdapter)
-					if err != nil {
-						logger.Debugf("adapter %d perfdata: %s", i, err.Error())
-						continue
-					}
-
-					var memUsedMB, memTotalMB int
-					for seg := uint32(0); seg < a.nbSegments; seg++ {
-						limit, resident, err := d3dkmQuerySegmentStats(a.luid, seg)
-						if err != nil {
-							continue
-						}
-						memUsedMB += int(resident / (1024 * 1024))
-						memTotalMB += int(limit / (1024 * 1024))
-					}
-
-					var gpuUtil float64
-					pdhGaveValue := false
-					if pdhUtilMap != nil {
-						if util, ok := pdhUtilMap[a.luid]; ok {
-							gpuUtil = util
-							pdhGaveValue = true
-						}
-					}
-
-					if !pdhGaveValue && a.nodeCount > 0 {
-						elapsedNs := now.Sub(a.prevTime).Nanoseconds()
-						elapsed100ns := elapsedNs / 100
-
-						for node := uint32(0); node < a.nodeCount; node++ {
-							globalRT, systemRT, err := d3dkmQueryNodeStats(a.luid, node)
-							if err != nil {
-								continue
-							}
-
-							if prevRT, ok := a.prevNodeRT[node]; ok {
-								if globalRT < prevRT.Global || systemRT < prevRT.System {
-									a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
-									continue
-								}
-								nodeUtil := d3dkmtNodeUtil(prevRT, nodeRunningTimes{Global: globalRT, System: systemRT}, elapsed100ns)
-								if nodeUtil > gpuUtil {
-									gpuUtil = nodeUtil
-								}
-							}
-							a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
-						}
-
-						a.prevTime = now
-					}
-
-					tempC := d3dkmtTempC(perfData.Temperature)
-
-					fanSpeedPct := d3dkmtFanPct(perfData.FanRPM, a.maxFanRPM)
-					powerDrawW := d3dkmtPowerW(perfData.Power)
-
-					var memUtilPct float64
-					if memTotalMB > 0 {
-						memUtilPct = float64(memUsedMB) / float64(memTotalMB) * 100.0
-					}
-
-					stats = append(stats, GpuStat{
-						Timestamp:   now,
-						ID:          i,
-						Name:        fmt.Sprintf("GPU %d", i),
-						TempC:       tempC,
-						GpuUtilPct:  gpuUtil,
-						MemUtilPct:  memUtilPct,
-						MemUsedMB:   memUsedMB,
-						MemTotalMB:  memTotalMB,
-						FanSpeedPct: fanSpeedPct,
-						PowerDrawW:  powerDrawW,
-					})
-				}
-
-				if len(stats) > 0 {
-					select {
-					case ch <- stats:
-					default:
-					}
-				}
-			}
-		}
-	}()
-
-	return ch, nil
-}
@@ -1,98 +0,0 @@
-//go:build windows
-
-package perf
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestD3dkmtNodeUtil_FullLoad(t *testing.T) {
-	prev := nodeRunningTimes{Global: 1000, System: 10000}
-	cur := nodeRunningTimes{Global: 5000, System: 14000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, 100.0, got)
-}
-
-func TestD3dkmtNodeUtil_PartialUtil(t *testing.T) {
-	prev := nodeRunningTimes{Global: 1000, System: 10000}
-	cur := nodeRunningTimes{Global: 3000, System: 14000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, 50.0, got)
-}
-
-func TestD3dkmtNodeUtil_Identical(t *testing.T) {
-	prev := nodeRunningTimes{Global: 10000, System: 10000}
-	cur := nodeRunningTimes{Global: 20000, System: 20000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, 100.0, got)
-}
-
-func TestD3dkmtNodeUtil_CounterWrap(t *testing.T) {
-	prev := nodeRunningTimes{Global: 9000, System: 10000}
-	cur := nodeRunningTimes{Global: 1000, System: 10000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, -1.0, got)
-}
-
-func TestD3dkmtNodeUtil_SystemWrap(t *testing.T) {
-	prev := nodeRunningTimes{Global: 1000, System: 9000}
-	cur := nodeRunningTimes{Global: 5000, System: 1000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, -1.0, got)
-}
-
-func TestD3dkmtNodeUtil_ZeroDelta(t *testing.T) {
-	prev := nodeRunningTimes{Global: 1000, System: 10000}
-	cur := nodeRunningTimes{Global: 1000, System: 10000}
-	got := d3dkmtNodeUtil(prev, cur, 100000)
-	assert.Equal(t, 0.0, got)
-}
-
-func TestD3dkmtNodeUtil_ElapsedFallback(t *testing.T) {
-	prev := nodeRunningTimes{Global: 1000, System: 10000}
-	cur := nodeRunningTimes{Global: 6000, System: 10000}
-	got := d3dkmtNodeUtil(prev, cur, 50000)
-	assert.InDelta(t, 10.0, got, 0.01)
-}
-
-func TestD3dkmtFanPct_Normal(t *testing.T) {
-	assert.Equal(t, 50.0, d3dkmtFanPct(1500, 3000))
-}
-
-func TestD3dkmtFanPct_MaxFan(t *testing.T) {
-	assert.Equal(t, 100.0, d3dkmtFanPct(3000, 3000))
-}
-
-func TestD3dkmtFanPct_OverMaxClamped(t *testing.T) {
-	assert.Equal(t, 100.0, d3dkmtFanPct(4000, 3000))
-}
-
-func TestD3dkmtFanPct_ZeroMaxFan(t *testing.T) {
-	assert.Equal(t, 0.0, d3dkmtFanPct(1500, 0))
-}
-
-func TestD3dkmtFanPct_ZeroFanRPM(t *testing.T) {
-	assert.Equal(t, 0.0, d3dkmtFanPct(0, 3000))
-}
-
-func TestD3dkmtFanPct_BothZero(t *testing.T) {
-	assert.Equal(t, 0.0, d3dkmtFanPct(0, 0))
-}
-
-func TestD3dkmtPowerW(t *testing.T) {
-	assert.Equal(t, 250.0, d3dkmtPowerW(2500))
-}
-
-func TestD3dkmtPowerW_Zero(t *testing.T) {
-	assert.Equal(t, 0.0, d3dkmtPowerW(0))
-}
-
-func TestD3dkmtTempC(t *testing.T) {
-	assert.Equal(t, 65, d3dkmtTempC(650))
-}
-
-func TestD3dkmtTempC_Zero(t *testing.T) {
-	assert.Equal(t, 0, d3dkmtTempC(0))
-}
@@ -22,13 +22,6 @@ func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monito
 		logger.Debugf("nvidia-smi: %s", err.Error())
 	}

-	if ch, err := tryD3DKMT(ctx, every, logger); err == nil {
-		logger.Info("using D3DKMT for GPU monitoring")
-		return ch, nil
-	} else {
-		logger.Debugf("D3DKMT: %s", err.Error())
-	}
-
 	return nil, ErrNoGpuTool
 }

@@ -1,159 +0,0 @@
-//go:build windows
-
-package perf
-
-import (
-	"fmt"
-	"strconv"
-	"strings"
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-)
-
-var (
-	pdhDLL                          = windows.NewLazySystemDLL("pdh.dll")
-	procPdhOpenQuery                = pdhDLL.NewProc("PdhOpenQueryW")
-	procPdhAddEnglishCounter        = pdhDLL.NewProc("PdhAddEnglishCounterW")
-	procPdhCollectQueryData         = pdhDLL.NewProc("PdhCollectQueryData")
-	procPdhGetFormattedCounterArray = pdhDLL.NewProc("PdhGetFormattedCounterArrayW")
-	procPdhCloseQuery               = pdhDLL.NewProc("PdhCloseQuery")
-)
-
-const (
-	pdhFmtDouble = 0x00000200
-	pdhMoreData  = 0x800007D2
-	pdhNoData    = 0x800007D5
-)
-
-type pdhCounterValue struct {
-	CStatus uint32
-	DblVal  float64
-}
-
-type pdhCounterValueItem struct {
-	SzName   *uint16
-	FmtValue pdhCounterValue
-}
-
-func init() {
-	var item pdhCounterValueItem
-	if unsafe.Sizeof(item) != 24 {
-		panic(fmt.Sprintf("pdhCounterValueItem size %d != expected 24 on x64", unsafe.Sizeof(item)))
-	}
-}
-
-type pdhGpuUtil struct {
-	query   uintptr
-	counter uintptr
-}
-
-// initPdhGpuUtil creates a PDH query for the GPU Engine utilization counter.
-// Returns nil with an error if PDH or the counter is unavailable.
-func initPdhGpuUtil() (*pdhGpuUtil, error) {
-	var query uintptr
-	if ret, _, _ := procPdhOpenQuery.Call(0, 0, uintptr(unsafe.Pointer(&query))); ret != 0 {
-		return nil, fmt.Errorf("PdhOpenQuery: 0x%x", ret)
-	}
-
-	path, _ := windows.UTF16PtrFromString(`\GPU Engine(*)\Utilization Percentage`)
-	var counter uintptr
-	if ret, _, _ := procPdhAddEnglishCounter.Call(
-		query, uintptr(unsafe.Pointer(path)), 0, uintptr(unsafe.Pointer(&counter)),
-	); ret != 0 {
-		procPdhCloseQuery.Call(query)
-		return nil, fmt.Errorf("PdhAddEnglishCounter(GPU Engine): 0x%x", ret)
-	}
-
-	procPdhCollectQueryData.Call(query)
-
-	return &pdhGpuUtil{query: query, counter: counter}, nil
-}
-
-// close releases the PDH query handle.
-func (p *pdhGpuUtil) close() {
-	if p.query != 0 {
-		procPdhCloseQuery.Call(p.query)
-		p.query = 0
-	}
-}
-
-// collect reads the PDH counter and returns a map of adapter LUID to
-// aggregated GPU utilization percentage, summed across all engine instances
-// per adapter and clamped to 100%.
-func (p *pdhGpuUtil) collect() map[LUID]float64 {
-	ret, _, _ := procPdhCollectQueryData.Call(p.query)
-	if ret != 0 && ret != pdhNoData {
-		return nil
-	}
-
-	var bufSize uint32
-	var itemCount uint32
-	ret, _, _ = procPdhGetFormattedCounterArray.Call(
-		p.counter, pdhFmtDouble,
-		uintptr(unsafe.Pointer(&bufSize)),
-		uintptr(unsafe.Pointer(&itemCount)),
-		0,
-	)
-	if ret != pdhMoreData || itemCount == 0 {
-		return nil
-	}
-
-	buf := make([]byte, bufSize)
-	ret, _, _ = procPdhGetFormattedCounterArray.Call(
-		p.counter, pdhFmtDouble,
-		uintptr(unsafe.Pointer(&bufSize)),
-		uintptr(unsafe.Pointer(&itemCount)),
-		uintptr(unsafe.Pointer(&buf[0])),
-	)
-	if ret != 0 {
-		return nil
-	}
-
-	itemSize := uint32(unsafe.Sizeof(pdhCounterValueItem{}))
-	result := make(map[LUID]float64)
-
-	for i := uint32(0); i < itemCount; i++ {
-		item := (*pdhCounterValueItem)(unsafe.Pointer(&buf[i*itemSize]))
-		if item.FmtValue.CStatus != 0 {
-			continue
-		}
-		luid, ok := parsePdhLuid(windows.UTF16PtrToString(item.SzName))
-		if !ok {
-			continue
-		}
-		result[luid] += item.FmtValue.DblVal
-	}
-
-	for luid := range result {
-		if result[luid] > 100.0 {
-			result[luid] = 100.0
-		}
-	}
-
-	return result
-}
-
-// parsePdhLuid extracts the adapter LUID (high and low parts) from a PDH
-// GPU Engine instance name (e.g. "pid_1234_luid_0x00000000_0x000148BF_phys_0_eng_2_engtype_Compute").
-func parsePdhLuid(name string) (LUID, bool) {
-	idx := strings.Index(name, "luid_0x")
-	if idx < 0 {
-		return LUID{}, false
-	}
-	rest := name[idx+7:]
-	parts := strings.SplitN(rest, "_", 4)
-	if len(parts) < 3 {
-		return LUID{}, false
-	}
-	hp, err := strconv.ParseUint(parts[0], 16, 32)
-	if err != nil {
-		return LUID{}, false
-	}
-	lpStr := strings.TrimPrefix(parts[1], "0x")
-	lp, err := strconv.ParseUint(lpStr, 16, 32)
-	if err != nil {
-		return LUID{}, false
-	}
-	return LUID{LowPart: uint32(lp), HighPart: int32(hp)}, true
-}
@@ -1,53 +0,0 @@
-//go:build windows
-
-package perf
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestParsePdhLuid_Valid(t *testing.T) {
-	name := `pid_25312_luid_0x00000000_0x000148BF_phys_0_eng_2_engtype_Compute`
-	got, ok := parsePdhLuid(name)
-	assert.True(t, ok)
-	assert.Equal(t, uint32(0x000148BF), got.LowPart)
-	assert.Equal(t, int32(0x00000000), got.HighPart)
-}
-
-func TestParsePdhLuid_ValidNvidia(t *testing.T) {
-	name := `pid_1388_luid_0x00000000_0x00011372_phys_0_eng_8_engtype_Compute_1`
-	got, ok := parsePdhLuid(name)
-	assert.True(t, ok)
-	assert.Equal(t, uint32(0x00011372), got.LowPart)
-	assert.Equal(t, int32(0x00000000), got.HighPart)
-}
-
-func TestParsePdhLuid_NonZeroHighPart(t *testing.T) {
-	name := `pid_1234_luid_0x00000001_0x0000C85A_phys_0_eng_5_engtype_Copy`
-	got, ok := parsePdhLuid(name)
-	assert.True(t, ok)
-	assert.Equal(t, uint32(0x0000C85A), got.LowPart)
-	assert.Equal(t, int32(0x00000001), got.HighPart)
-}
-
-func TestParsePdhLuid_InvalidNoLuid(t *testing.T) {
-	_, ok := parsePdhLuid("invalid_string_without_luid")
-	assert.False(t, ok)
-}
-
-func TestParsePdhLuid_InvalidEmpty(t *testing.T) {
-	_, ok := parsePdhLuid("")
-	assert.False(t, ok)
-}
-
-func TestParsePdhLuid_InvalidHex(t *testing.T) {
-	_, ok := parsePdhLuid("pid_1234_luid_0xZZZZ_0xGGGG_phys_0")
-	assert.False(t, ok)
-}
-
-func TestParsePdhLuid_ShortAfterLuid(t *testing.T) {
-	_, ok := parsePdhLuid("pid_1234_luid_0x00000000")
-	assert.False(t, ok)
-}
@@ -3,7 +3,6 @@ package scheduler
 import (
 	"fmt"
 	"sort"
-	"strconv"
 	"time"

 	"github.com/mostlygeek/llama-swap/internal/config"
@@ -279,11 +278,6 @@ func (s *FIFO) grantHandler(req HandlerReq, modelID string) {
 		s.effects.GrantError(req, shared.ConcurrencyLimitError{})
 		return
 	}
-
-	if err := shared.SetReqData(req.Ctx, "fifo_priority", strconv.Itoa(s.cfg.Priority[req.Model])); err != nil {
-		s.logger.Debugf("failed to set fifo_priority metadata: %v", err)
-	}
-
 	if s.effects.GrantServe(req, modelID) {
 		s.inFlight[modelID]++
 	}
@@ -193,6 +193,78 @@ func TestFIFO_GrantSetsPriorityMetadata(t *testing.T) {
 	}
 }

+func TestFIFO_GrantSetsPriorityMetadata_DefaultZero(t *testing.T) {
+	// A model that is not listed in the Priority map should get fifo_priority="0".
+	eff := newFakeEffects()
+	eff.states["unlisted"] = process.StateReady
+	cfg := config.FifoConfig{Priority: map[string]int{"other": 5}} // "unlisted" absent
+	s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
+
+	ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "unlisted", Metadata: make(map[string]string)})
+	s.OnRequest(HandlerReq{Model: "unlisted", Ctx: ctx})
+
+	if got := eff.served("unlisted"); got != 1 {
+		t.Fatalf("served(unlisted)=%d want 1", got)
+	}
+	data, ok := shared.ReadContext(eff.lastServeReq.Ctx)
+	if !ok {
+		t.Fatal("context data missing from granted request")
+	}
+	if data.Metadata["fifo_priority"] != "0" {
+		t.Errorf("fifo_priority = %q, want %q", data.Metadata["fifo_priority"], "0")
+	}
+}
+
+func TestFIFO_GrantSetsPriorityMetadata_NoMetadataMap(t *testing.T) {
+	// When the request context has no Metadata map, grantHandler must not crash.
+	// It should log a debug message and still grant the request.
+	eff := newFakeEffects()
+	eff.states["a"] = process.StateReady
+	cfg := config.FifoConfig{Priority: map[string]int{"a": 3}}
+	s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
+
+	// No Metadata map in the context data — SetReqData will return an error.
+	ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "a"})
+	s.OnRequest(HandlerReq{Model: "a", Ctx: ctx})
+
+	// The grant must still succeed despite the missing metadata map.
+	if got := eff.served("a"); got != 1 {
+		t.Fatalf("served(a)=%d want 1 (metadata error must not prevent grant)", got)
+	}
+}
+
+func TestFIFO_GrantSetsPriorityMetadata_AfterSwapCompletion(t *testing.T) {
+	// Priority metadata must be set for waiters granted via OnSwapDone, not just
+	// requests that hit the fast path.
+	eff := newFakeEffects()
+	eff.states["a"] = process.StateStopped // forces a swap
+	cfg := config.FifoConfig{Priority: map[string]int{"a": 9}}
+	s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
+
+	ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "a", Metadata: make(map[string]string)})
+	s.OnRequest(HandlerReq{Model: "a", Ctx: ctx})
+
+	// Swap is in flight; no grant yet.
+	if got := eff.served("a"); got != 0 {
+		t.Fatalf("served(a)=%d want 0 before swap done", got)
+	}
+
+	// Complete the swap.
+	eff.states["a"] = process.StateReady
+	s.OnSwapDone(SwapDone{ModelID: "a"})
+
+	if got := eff.served("a"); got != 1 {
+		t.Fatalf("served(a)=%d want 1 after swap done", got)
+	}
+	data, ok := shared.ReadContext(eff.lastServeReq.Ctx)
+	if !ok {
+		t.Fatal("context data missing from granted request after swap")
+	}
+	if data.Metadata["fifo_priority"] != "9" {
+		t.Errorf("fifo_priority = %q, want %q", data.Metadata["fifo_priority"], "9")
+	}
+}
+
 func TestFIFO_ModelNotFound(t *testing.T) {
 	eff := newFakeEffects() // no states => model unknown
 	s := newFIFO(&stubPlanner{}, eff)
@@ -2,7 +2,6 @@ package server

 import (
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"sort"
 	"strings"
@@ -10,7 +9,6 @@ import (

 	"github.com/mostlygeek/llama-swap/internal/config"
 	"github.com/mostlygeek/llama-swap/internal/event"
-	"github.com/mostlygeek/llama-swap/internal/process"
 	"github.com/mostlygeek/llama-swap/internal/shared"
 )

@@ -273,7 +271,7 @@ func (s *Server) startPreload() {
 			if err != nil {
 				continue
 			}
-			req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID, Metadata: make(map[string]string)}))
+			req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID}))

 			dw := &discardResponseWriter{status: http.StatusOK}
 			s.local.ServeHTTP(dw, req)
@@ -316,7 +314,7 @@ func handleUpstreamRedirect(w http.ResponseWriter, r *http.Request) {
 func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
 	upstreamPath := r.PathValue("upstreamPath")

-	searchName, modelID, remainingPath, found := shared.FindModelInPath(s.cfg, "/"+upstreamPath)
+	searchName, modelID, remainingPath, found := findModelInPath(s.cfg, "/"+upstreamPath)
 	if !found {
 		shared.SendResponse(w, r, http.StatusNotFound, "model not found")
 		return
@@ -340,29 +338,7 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
 	// Strip the /upstream/<model> prefix before forwarding.
 	r.URL.Path = remainingPath
 	// Pin the resolved model so the router skips body/query extraction.
-	*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID, Metadata: make(map[string]string)}))
-
-	// If the path matches an upstream.ignorePaths entry and the model is
-	// not already loaded, refuse the request without triggering a swap. The
-	// server was not able to process the response because the model was not
-	// already loaded.
-	for _, re := range s.cfg.Upstream.IgnorePaths {
-		if !re.MatchString(remainingPath) {
-			continue
-		}
-		if s.local.Handles(modelID) {
-			state, ok := s.local.RunningModels()[modelID]
-			if !ok || state != process.StateReady {
-				shared.SendResponse(w, r, http.StatusConflict,
-					fmt.Sprintf("model %s is not loaded; path matches upstream.ignorePaths", modelID))
-				return
-			}
-		}
-		// Either the model is already loaded (no swap would be triggered)
-		// or this is a peer model (peer proxying never swaps). Fall through
-		// to normal dispatch.
-		break
-	}
+	*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID}))

 	switch {
 	case s.local.Handles(modelID):
@@ -373,3 +349,29 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
 		shared.SendResponse(w, r, http.StatusNotFound, "no router for model "+modelID)
 	}
 }
+
+// findModelInPath walks a slash-separated path, building up segments until one
+// matches a configured model. This resolves model names that contain slashes
+// (e.g. "author/model"). Returns the matched name, its real model ID, the
+// remaining path, and whether a match was found.
+func findModelInPath(cfg config.Config, path string) (searchName, realName, remainingPath string, found bool) {
+	parts := strings.Split(strings.TrimSpace(path), "/")
+	name := ""
+
+	for i, part := range parts {
+		if part == "" {
+			continue
+		}
+		if name == "" {
+			name = part
+		} else {
+			name = name + "/" + part
+		}
+
+		if modelID, ok := cfg.RealModelName(name); ok {
+			return name, modelID, "/" + strings.Join(parts[i+1:], "/"), true
+		}
+	}
+
+	return "", "", "", false
+}
@@ -2,17 +2,11 @@ package server

 import (
 	"encoding/json"
-	"io"
 	"net/http"
 	"net/http/httptest"
-	"regexp"
-	"strings"
 	"testing"

 	"github.com/mostlygeek/llama-swap/internal/config"
-	"github.com/mostlygeek/llama-swap/internal/logmon"
-	"github.com/mostlygeek/llama-swap/internal/process"
-	"github.com/mostlygeek/llama-swap/internal/shared"
 )

 func TestServer_HandleListModels(t *testing.T) {
@@ -84,7 +78,6 @@ func TestServer_HandleListModels_Aliases(t *testing.T) {

 func TestServer_FindModelInPath(t *testing.T) {
 	cfg := config.Config{Models: map[string]config.ModelConfig{
-		"author":       {},
 		"author/model": {},
 		"simple":       {},
 	}}
@@ -98,14 +91,13 @@ func TestServer_FindModelInPath(t *testing.T) {
 		{"/simple/v1/chat", "simple", "/v1/chat", true},
 		{"/author/model/v1/chat", "author/model", "/v1/chat", true},
 		{"/author/model", "author/model", "/", true},
-		{"/author/v1/chat", "author", "/v1/chat", true},
 		{"/missing/v1", "", "", false},
 		{"/", "", "", false},
 	}
 	for _, c := range cases {
-		name, _, rem, found := shared.FindModelInPath(cfg, c.path)
+		name, _, rem, found := findModelInPath(cfg, c.path)
 		if found != c.wantFound || name != c.wantName || (found && rem != c.wantRem) {
-			t.Errorf("FindModelInPath(%q) = (%q,%q,%v), want (%q,%q,%v)",
+			t.Errorf("findModelInPath(%q) = (%q,%q,%v), want (%q,%q,%v)",
 				c.path, name, rem, found, c.wantName, c.wantRem, c.wantFound)
 		}
 	}
@@ -141,165 +133,6 @@ func TestServer_HandleUpstream(t *testing.T) {
 	})
 }

-func upstreamMetricsServer(response string) *Server {
-	cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
-	proxylog := logmon.NewWriter(io.Discard)
-	s := &Server{
-		cfg:         cfg,
-		muxlog:      logmon.NewWriter(io.Discard),
-		proxylog:    proxylog,
-		upstreamlog: logmon.NewWriter(io.Discard),
-		inflight:    &inflightCounter{},
-		metrics:     newMetricsMonitor(proxylog, 10, 0),
-		local:       newStubRouter([]string{"m1"}, response),
-		peer:        newStubRouter(nil, ""),
-	}
-	s.routes()
-	return s
-}
-
-func TestServer_HandleUpstream_IgnorePaths(t *testing.T) {
-	// Compile a pattern that matches static asset suffixes.
-	pattern := regexp.MustCompile(`.*\.(js|json|css|png|gif|jpg|jpeg|txt)$`)
-
-	t.Run("matched path, model not loaded, returns 409", func(t *testing.T) {
-		local := newStubRouter([]string{"m1"}, "upstream-body")
-		// running is nil/empty: model is not in RunningModels() => not loaded.
-		s := newTestServer(local, newStubRouter(nil, ""))
-		s.cfg = config.Config{
-			Models: map[string]config.ModelConfig{"m1": {}},
-			Upstream: config.UpstreamConfig{
-				IgnorePaths: []*regexp.Regexp{pattern},
-			},
-		}
-
-		w := httptest.NewRecorder()
-		s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
-
-		if w.Code != http.StatusConflict {
-			t.Fatalf("status = %d, want %d (body=%q)", w.Code, http.StatusConflict, w.Body.String())
-		}
-		if !strings.Contains(w.Body.String(), "not loaded") {
-			t.Errorf("body = %q, want it to contain 'not loaded'", w.Body.String())
-		}
-	})
-
-	t.Run("matched path, model already loaded, serves normally", func(t *testing.T) {
-		local := newStubRouter([]string{"m1"}, "upstream-body")
-		local.running = map[string]process.ProcessState{"m1": process.StateReady}
-		s := newTestServer(local, newStubRouter(nil, ""))
-		s.cfg = config.Config{
-			Models: map[string]config.ModelConfig{"m1": {}},
-			Upstream: config.UpstreamConfig{
-				IgnorePaths: []*regexp.Regexp{pattern},
-			},
-		}
-
-		w := httptest.NewRecorder()
-		s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
-
-		if w.Code != http.StatusOK || w.Body.String() != "upstream-body" {
-			t.Fatalf("status=%d body=%q, want 200 'upstream-body'", w.Code, w.Body.String())
-		}
-	})
-
-	t.Run("non-matched path, model not loaded, serves normally", func(t *testing.T) {
-		local := newStubRouter([]string{"m1"}, "upstream-body")
-		s := newTestServer(local, newStubRouter(nil, ""))
-		s.cfg = config.Config{
-			Models: map[string]config.ModelConfig{"m1": {}},
-			Upstream: config.UpstreamConfig{
-				IgnorePaths: []*regexp.Regexp{pattern},
-			},
-		}
-
-		w := httptest.NewRecorder()
-		s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/v1/chat/completions", nil))
-
-		if w.Code != http.StatusOK || w.Body.String() != "upstream-body" {
-			t.Fatalf("status=%d body=%q, want 200 'upstream-body'", w.Code, w.Body.String())
-		}
-	})
-
-	t.Run("matched path, peer model, serves normally", func(t *testing.T) {
-		// Peer routers do not appear via RunningModels on the local router;
-		// they should fall through to normal dispatch without 409.
-		local := newStubRouter(nil, "")
-		peer := newStubRouter([]string{"m1"}, "peer-body")
-		s := newTestServer(local, peer)
-		s.cfg = config.Config{
-			Models: map[string]config.ModelConfig{"m1": {}},
-			Upstream: config.UpstreamConfig{
-				IgnorePaths: []*regexp.Regexp{pattern},
-			},
-		}
-
-		w := httptest.NewRecorder()
-		s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
-
-		if w.Code != http.StatusOK || w.Body.String() != "peer-body" {
-			t.Fatalf("status=%d body=%q, want 200 'peer-body'", w.Code, w.Body.String())
-		}
-	})
-}
-
-func TestServer_HandleUpstream_MetricsRecordsSupportedPath(t *testing.T) {
-	resp := `{"usage":{"prompt_tokens":3,"completion_tokens":5}}`
-	s := upstreamMetricsServer(resp)
-
-	w := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/chat/completions", strings.NewReader(`{}`))
-	req.Header.Set("Content-Type", "application/json")
-	s.ServeHTTP(w, req)
-
-	if w.Code != http.StatusOK || w.Body.String() != resp {
-		t.Fatalf("status=%d body=%q", w.Code, w.Body.String())
-	}
-	entries := s.metrics.getMetrics()
-	if len(entries) != 1 {
-		t.Fatalf("want 1 metrics entry, got %d", len(entries))
-	}
-	if entries[0].Model != "m1" {
-		t.Errorf("model = %q, want m1", entries[0].Model)
-	}
-	if entries[0].ReqPath != "/v1/chat/completions" {
-		t.Errorf("req_path = %q, want /v1/chat/completions", entries[0].ReqPath)
-	}
-	if entries[0].Tokens.InputTokens != 3 || entries[0].Tokens.OutputTokens != 5 {
-		t.Errorf("tokens = %+v, want input=3 output=5", entries[0].Tokens)
-	}
-}
-
-func TestServer_HandleUpstream_MetricsSkipsUnsupportedPath(t *testing.T) {
-	s := upstreamMetricsServer("ok")
-
-	w := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/upstream/m1/probe", strings.NewReader(`{}`))
-	req.Header.Set("Content-Type", "application/json")
-	s.ServeHTTP(w, req)
-
-	if w.Code != http.StatusOK || w.Body.String() != "ok" {
-		t.Fatalf("status=%d body=%q", w.Code, w.Body.String())
-	}
-	if len(s.metrics.getMetrics()) != 0 {
-		t.Errorf("want no metrics entries for unsupported path, got %d", len(s.metrics.getMetrics()))
-	}
-}
-
-func TestServer_HandleUpstream_MetricsSkipsGET(t *testing.T) {
-	s := upstreamMetricsServer(`{"usage":{}}`)
-
-	w := httptest.NewRecorder()
-	s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/v1/chat/completions", nil))
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("status=%d", w.Code)
-	}
-	if len(s.metrics.getMetrics()) != 0 {
-		t.Errorf("want no metrics entries for GET upstream, got %d", len(s.metrics.getMetrics()))
-	}
-}
-
 func TestServer_HandleMetrics_Unavailable(t *testing.T) {
 	s := newTestServer(newStubRouter(nil, ""), newStubRouter(nil, ""))

@@ -105,9 +105,7 @@ func (s *Server) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
 // filtered to samples after the ?after=<RFC3339> timestamp.
 func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
 	if s.perf == nil {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusServiceUnavailable)
-		json.NewEncoder(w).Encode(map[string]bool{"enabled": false})
+		shared.SendResponse(w, r, http.StatusServiceUnavailable, "performance monitor not available")
 		return
 	}

@@ -138,7 +136,6 @@ func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {

 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(map[string]any{
-		"enabled":   true,
 		"sys_stats": sysStats,
 		"gpu_stats": gpuStats,
 	})
@@ -76,7 +76,7 @@ func (s *Server) getLogger(logMonitorID string) (*logmon.Monitor, error) {
 	case "upstream":
 		return s.upstreamlog, nil
 	default:
-		if _, modelID, _, found := shared.FindModelInPath(s.cfg, "/"+logMonitorID); found {
+		if _, modelID, _, found := findModelInPath(s.cfg, "/"+logMonitorID); found {
 			if log, ok := s.local.ProcessLogger(modelID); ok {
 				return log, nil
 			}
@@ -25,8 +25,6 @@ import (
 // TokenMetrics holds token usage and performance metrics.
 type TokenMetrics struct {
 	CachedTokens    int     `json:"cache_tokens"`
-	DraftTokens     int     `json:"draft_tokens"`
-	DraftAccTokens  int     `json:"draft_acc_tokens"`
 	InputTokens     int     `json:"input_tokens"`
 	OutputTokens    int     `json:"output_tokens"`
 	PromptPerSecond float64 `json:"prompt_per_second"`
@@ -35,17 +33,15 @@ type TokenMetrics struct {

 // ActivityLogEntry represents parsed token statistics from llama-server logs.
 type ActivityLogEntry struct {
-	ID              int               `json:"id"`
-	Timestamp       time.Time         `json:"timestamp"`
-	Model           string            `json:"model"`
-	ReqPath         string            `json:"req_path"`
-	RespContentType string            `json:"resp_content_type"`
-	RespStatusCode  int               `json:"resp_status_code"`
-	Tokens          TokenMetrics      `json:"tokens"`
-	DurationMs      int               `json:"duration_ms"`
-	HasCapture      bool              `json:"has_capture"`
-	ErrorMsg        string            `json:"error_msg,omitempty"`
-	Metadata        map[string]string `json:"metadata,omitempty"`
+	ID              int          `json:"id"`
+	Timestamp       time.Time    `json:"timestamp"`
+	Model           string       `json:"model"`
+	ReqPath         string       `json:"req_path"`
+	RespContentType string       `json:"resp_content_type"`
+	RespStatusCode  int          `json:"resp_status_code"`
+	Tokens          TokenMetrics `json:"tokens"`
+	DurationMs      int          `json:"duration_ms"`
+	HasCapture      bool         `json:"has_capture"`
 }

 // ActivityLogEvent carries a single activity log entry to event subscribers.
@@ -126,11 +122,9 @@ func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) {
 }

 // record parses a completed response body and stores/emits an activity entry.
-// Successful requests store a zstd+CBOR capture (when enabled) with cf
-// controlling which parts are retained. Failed (non-200) requests capture the
-// request only and set ErrorMsg to a description of the failure, so the error
-// can be inspected without storing unreadable raw response bytes. reqBody and
-// reqHeaders are the request data buffered before dispatch.
+// When captures are enabled, a zstd+CBOR capture is stored for successful
+// requests, with cf controlling which request/response parts are retained.
+// reqBody and reqHeaders are the request data buffered before dispatch.
 func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *responseBodyCopier, cf captureFields, reqBody []byte, reqHeaders map[string]string) {
 	tm := ActivityLogEntry{
 		Timestamp:       time.Now(),
@@ -141,13 +135,6 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
 		DurationMs:      int(time.Since(recorder.StartTime()).Milliseconds()),
 	}

-	if ctxData, ok := shared.ReadContext(r.Context()); ok && len(ctxData.Metadata) > 0 {
-		tm.Metadata = make(map[string]string, len(ctxData.Metadata))
-		for k, v := range ctxData.Metadata {
-			tm.Metadata[k] = v
-		}
-	}
-
 	queueAndEmit := func() {
 		tm.ID = mp.queueMetrics(tm)
 		mp.emitMetric(tm)
@@ -155,13 +142,7 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp

 	if recorder.Status() != http.StatusOK {
 		mp.logger.Warnf("non-200 response, recording partial metrics: status=%d, path=%s", recorder.Status(), r.URL.Path)
-		decoded, decErr := mp.decodeResponseBody(recorder, r.URL.Path)
-		tm.ErrorMsg = failedErrorMessage(recorder.Status(), decoded, decErr)
-		tm.ID = mp.queueMetrics(tm)
-		// Capture the request only; the failure is surfaced via ErrorMsg
-		// rather than storing the (possibly undisplayable) response body.
-		tm.HasCapture = mp.storeCapture(tm.ID, r, recorder, cf&^captureRespBody, reqBody, reqHeaders, nil)
-		mp.emitMetric(tm)
+		queueAndEmit()
 		return
 	}

@@ -176,7 +157,6 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
 		decoded, err := decompressBody(body, encoding)
 		if err != nil {
 			mp.logger.Warnf("metrics: decompression failed: %v, path=%s, recording minimal metrics", err, r.URL.Path)
-			tm.ErrorMsg = fmt.Sprintf("response decompression failed: %v", err)
 			queueAndEmit()
 			return
 		}
@@ -215,101 +195,30 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
 	}

 	tm.ID = mp.queueMetrics(tm)
-	tm.HasCapture = mp.storeCapture(tm.ID, r, recorder, cf, reqBody, reqHeaders, body)
+	if mp.enableCaptures {
+		capture := ReqRespCapture{
+			ID:         tm.ID,
+			ReqPath:    r.URL.Path,
+			ReqHeaders: reqHeaders,
+		}
+		if cf&captureReqBody != 0 {
+			capture.ReqBody = reqBody
+		}
+		if cf&captureRespHeaders != 0 {
+			capture.RespHeaders = headerMap(recorder.Header())
+			redactHeaders(capture.RespHeaders)
+			delete(capture.RespHeaders, "Content-Encoding")
+		}
+		if cf&captureRespBody != 0 {
+			capture.RespBody = body
+		}
+		if mp.addCapture(capture) {
+			tm.HasCapture = true
+		}
+	}
 	mp.emitMetric(tm)
 }

-// storeCapture assembles a ReqRespCapture for id, honoring the captureFields
-// mask, and stores it when captures are enabled. body is the response body to
-// capture (already decompressed by the caller); pass nil to omit it. Returns
-// true if a capture was stored.
-func (mp *metricsMonitor) storeCapture(id int, r *http.Request, recorder *responseBodyCopier, cf captureFields, reqBody []byte, reqHeaders map[string]string, body []byte) bool {
-	if !mp.enableCaptures {
-		return false
-	}
-	capture := ReqRespCapture{
-		ID:         id,
-		ReqPath:    r.URL.Path,
-		ReqHeaders: reqHeaders,
-	}
-	if cf&captureReqBody != 0 {
-		capture.ReqBody = reqBody
-	}
-	if cf&captureRespHeaders != 0 {
-		capture.RespHeaders = headerMap(recorder.Header())
-		redactHeaders(capture.RespHeaders)
-		delete(capture.RespHeaders, "Content-Encoding")
-	}
-	if cf&captureRespBody != 0 {
-		capture.RespBody = body
-	}
-	return mp.addCapture(capture)
-}
-
-// decodeResponseBody returns the buffered response body, decompressing it when
-// the upstream set a Content-Encoding we recognize. On decompression failure it
-// logs a warning and returns an error so the caller can record a description
-// (via ErrorMsg) instead of storing unreadable raw bytes.
-func (mp *metricsMonitor) decodeResponseBody(recorder *responseBodyCopier, path string) ([]byte, error) {
-	body := recorder.body.Bytes()
-	if len(body) == 0 {
-		return nil, nil
-	}
-	encoding := recorder.Header().Get("Content-Encoding")
-	if encoding == "" {
-		return body, nil
-	}
-	decoded, err := decompressBody(body, encoding)
-	if err != nil {
-		mp.logger.Warnf("metrics: response decompression failed: %v, path=%s", err, path)
-		return nil, err
-	}
-	return decoded, nil
-}
-
-// errorMessagePaths lists JSON paths where a human-readable error message can
-// live across OpenAI- and llama.cpp-style error responses.
-var errorMessagePaths = []string{"error.message", "error", "message", "detail"}
-
-// extractErrorMessage pulls a human-readable error string from a JSON error
-// response. Returns "" if no message is found or the body is not valid JSON.
-func extractErrorMessage(body []byte) string {
-	if !gjson.ValidBytes(body) {
-		return ""
-	}
-	parsed := gjson.ParseBytes(body)
-	for _, path := range errorMessagePaths {
-		v := parsed.Get(path)
-		if v.Exists() && v.Type == gjson.String {
-			if s := strings.TrimSpace(v.String()); s != "" {
-				return s
-			}
-		}
-	}
-	return ""
-}
-
-// failedErrorMessage builds a human-readable description for a non-200 response.
-// It prefers an error message parsed from the (decompressed) body and falls back
-// to the HTTP status text. A non-nil decErr indicates the body could not be
-// decoded, in which case the decode error is described instead.
-func failedErrorMessage(status int, body []byte, decErr error) string {
-	const maxLen = 500
-	if decErr != nil {
-		return fmt.Sprintf("response decode failed: %v", decErr)
-	}
-	if msg := extractErrorMessage(body); msg != "" {
-		if len(msg) > maxLen {
-			msg = msg[:maxLen] + "..."
-		}
-		return msg
-	}
-	if text := http.StatusText(status); text != "" {
-		return fmt.Sprintf("%d %s", status, text)
-	}
-	return fmt.Sprintf("HTTP %d", status)
-}
-
 // usagePaths lists the JSON paths where a per-event usage object can live.
 var usagePaths = []string{"usage", "response.usage", "message.usage"}

@@ -428,8 +337,6 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
 	durationMs := wallDurationMs
 	tokensPerSecond := -1.0
 	promptPerSecond := -1.0
-	draftTokens := -1
-	draftAccTokens := -1

 	if timings.Exists() {
 		inputTokens = timings.Get("prompt_n").Int()
@@ -443,10 +350,6 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
 		if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
 			cachedTokens = cachedValue.Int()
 		}
-		if timings.Get("draft_n").Exists() && timings.Get("draft_n_accepted").Exists() {
-			draftTokens = int(timings.Get("draft_n").Int())
-			draftAccTokens = int(timings.Get("draft_n_accepted").Int())
-		}
 	}

 	return ActivityLogEntry{
@@ -454,8 +357,6 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
 		Model:     modelID,
 		Tokens: TokenMetrics{
 			CachedTokens:    int(cachedTokens),
-			DraftTokens:     draftTokens,
-			DraftAccTokens:  draftAccTokens,
 			InputTokens:     int(inputTokens),
 			OutputTokens:    int(outputTokens),
 			PromptPerSecond: promptPerSecond,
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"io"
 	"net/http"
-	"strings"

 	"github.com/mostlygeek/llama-swap/internal/chain"
 	"github.com/mostlygeek/llama-swap/internal/config"
@@ -22,27 +21,8 @@ func CreateMetricsMiddleware(mm *metricsMonitor, cfg config.Config) chain.Middle
 				return
 			}

-			// Determine the model-routed endpoint path. Regular routes are
-			// already meterable; /upstream/<model>/<path> is metered only when
-			// the remaining path matches a model-dispatched endpoint.
-			checkPath := r.URL.Path
-			if strings.HasPrefix(r.URL.Path, "/upstream/") {
-				var found bool
-				_, _, checkPath, found = shared.FindModelInPath(cfg, strings.TrimPrefix(r.URL.Path, "/upstream"))
-				if !found {
-					next.ServeHTTP(w, r)
-					return
-				}
-			}
-
-			if !isMetricsRecordPath(checkPath) {
-				next.ServeHTTP(w, r)
-				return
-			}
-
 			// Resolve the model now so downstream dispatch hits the context
-			// fast path; FetchContext restores the request body for regular
-			// routes and extracts the model from the URL for /upstream routes.
+			// fast path; FetchContext restores the request body.
 			data, err := shared.FetchContext(r, cfg)
 			if err != nil {
 				shared.SendError(w, r, shared.ErrNoModelInContext)
@@ -51,7 +31,7 @@ func CreateMetricsMiddleware(mm *metricsMonitor, cfg config.Config) chain.Middle

 			// Buffer the request body/headers for capture before dispatch
 			// consumes them.
-			cf := captureFieldsFor(checkPath)
+			cf := captureFieldsFor(r.URL.Path)
 			var reqBody []byte
 			var reqHeaders map[string]string
 			if mm.enableCaptures {
@@ -1,15 +1,12 @@
 package server

 import (
-	"io"
 	"net/http"
 	"net/http/httptest"
 	"strings"
 	"testing"
 	"time"

-	"github.com/mostlygeek/llama-swap/internal/config"
-	"github.com/mostlygeek/llama-swap/internal/logmon"
 	"github.com/mostlygeek/llama-swap/internal/shared"
 	"github.com/tidwall/gjson"
 )
@@ -90,169 +87,79 @@ func TestMetricsMonitor_RecordMetadata(t *testing.T) {
 	}
 }

-func TestMetricsMonitor_RecordFailedRequestCapture(t *testing.T) {
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
-	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-	reqHeaders := map[string]string{"content-type": "application/json"}
+func TestMetricsMonitor_RecordMetadata_EmptyMap(t *testing.T) {
+	// An empty Metadata map in context must NOT set tm.Metadata (omitempty semantics).
+	mm := newMetricsMonitor(nil, 10, 0)
+	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{}`))
+	r = r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{
+		ModelID:  "m",
+		Metadata: map[string]string{}, // empty, not nil
+	}))

 	w := httptest.NewRecorder()
 	copier := newBodyCopier(w)
-	copier.Header().Set("Content-Type", "application/json")
-	copier.WriteHeader(http.StatusBadGateway)
-	copier.Write([]byte(`{"error":{"message":"model unavailable"}}`))
-
-	reqBody := []byte(`{"model":"m","messages":[]}`)
-	mm.record("m", r, copier, captureAll, reqBody, reqHeaders)
-
-	entries := mm.getMetrics()
-	if len(entries) != 1 {
-		t.Fatalf("want 1 entry, got %d", len(entries))
-	}
-	entry := entries[0]
-	if entry.RespStatusCode != http.StatusBadGateway {
-		t.Errorf("status = %d, want %d", entry.RespStatusCode, http.StatusBadGateway)
-	}
-	if entry.ErrorMsg != "model unavailable" {
-		t.Errorf("error_msg = %q, want extracted message", entry.ErrorMsg)
-	}
-	if !entry.HasCapture {
-		t.Fatal("failed request should capture the request so it can be inspected")
-	}
-
-	got := mm.getCaptureByID(entry.ID)
-	if got == nil {
-		t.Fatal("capture not found")
-	}
-	if string(got.ReqBody) != `{"model":"m","messages":[]}` {
-		t.Errorf("req body = %q", got.ReqBody)
-	}
-	if len(got.RespBody) != 0 {
-		t.Errorf("resp body stored for failed request (len=%d); want none", len(got.RespBody))
-	}
-	if got.RespHeaders["Content-Type"] != "application/json" {
-		t.Errorf("resp Content-Type = %q", got.RespHeaders["Content-Type"])
-	}
-}
-
-func TestMetricsMonitor_RecordFailedRequestStatusFallback(t *testing.T) {
-	// Non-JSON error body: ErrorMsg falls back to the HTTP status text.
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
-	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	w := httptest.NewRecorder()
-	copier := newBodyCopier(w)
-	copier.WriteHeader(http.StatusBadGateway)
-	copier.Write([]byte("<html>upstream down</html>"))
-
-	mm.record("m", r, copier, captureAll, nil, nil)
-
-	entries := mm.getMetrics()
-	if len(entries) != 1 {
-		t.Fatalf("want 1 entry, got %d", len(entries))
-	}
-	if entries[0].ErrorMsg != "502 Bad Gateway" {
-		t.Errorf("error_msg = %q, want status text", entries[0].ErrorMsg)
-	}
-}
-
-func TestMetricsMonitor_RecordFailedRequestCaptureDisabled(t *testing.T) {
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 0) // captures disabled
-	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	w := httptest.NewRecorder()
-	copier := newBodyCopier(w)
-	copier.WriteHeader(http.StatusInternalServerError)
-	copier.Write([]byte(`{"error":"boom"}`))
-
-	mm.record("m", r, copier, captureAll, []byte("req"), nil)
-
-	entries := mm.getMetrics()
-	if len(entries) != 1 {
-		t.Fatalf("want 1 entry, got %d", len(entries))
-	}
-	if entries[0].HasCapture {
-		t.Fatal("captures disabled, HasCapture should be false")
-	}
-	// ErrorMsg is independent of whether captures are enabled.
-	if entries[0].ErrorMsg != "boom" {
-		t.Errorf("error_msg = %q, want boom", entries[0].ErrorMsg)
-	}
-	if mm.getCaptureByID(entries[0].ID) != nil {
-		t.Fatal("no capture should be stored when disabled")
-	}
-}
-
-func TestMetricsMonitor_RecordDecompressionFailureSetsErrorMsg(t *testing.T) {
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
-	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	w := httptest.NewRecorder()
-	copier := newBodyCopier(w)
-	copier.Header().Set("Content-Encoding", "gzip")
 	copier.WriteHeader(http.StatusOK)
-	copier.Write([]byte("not-really-gzip"))
+	copier.Write([]byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2}}`))

-	mm.record("m", r, copier, captureAll, []byte("req"), nil)
+	mm.record("m", r, copier, 0, nil, nil)

 	entries := mm.getMetrics()
 	if len(entries) != 1 {
 		t.Fatalf("want 1 entry, got %d", len(entries))
 	}
-	if entries[0].ErrorMsg == "" {
-		t.Fatal("expected ErrorMsg for decompression failure")
-	}
-	// Raw bytes must not be stored when the body could not be decoded.
-	if entries[0].HasCapture {
-		t.Fatal("decompression failure should not store a capture")
+	if entries[0].Metadata != nil {
+		t.Errorf("Metadata should be nil for empty context metadata, got %v", entries[0].Metadata)
 	}
 }

-func TestMetricsMonitor_DecodeResponseBody(t *testing.T) {
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
+func TestMetricsMonitor_RecordMetadata_NoContextData(t *testing.T) {
+	// A request with no ReqContextData in context should produce nil Metadata.
+	mm := newMetricsMonitor(nil, 10, 0)
+	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{}`))
+	// No shared.SetContext call — no ReqContextData in context.

-	// No Content-Encoding: body returned unchanged.
 	w := httptest.NewRecorder()
 	copier := newBodyCopier(w)
-	copier.Write([]byte("plain"))
-	got, err := mm.decodeResponseBody(copier, "/p")
-	if err != nil || string(got) != "plain" {
-		t.Fatalf("plain body = %q, err = %v", got, err)
-	}
+	copier.WriteHeader(http.StatusOK)
+	copier.Write([]byte(`{"usage":{"prompt_tokens":3,"completion_tokens":4}}`))

-	// Bogus gzip payload: returns an error and no body (no raw bytes kept).
-	w2 := httptest.NewRecorder()
-	copier2 := newBodyCopier(w2)
-	copier2.Header().Set("Content-Encoding", "gzip")
-	copier2.Write([]byte("not-really-gzip"))
-	got, err = mm.decodeResponseBody(copier2, "/p")
-	if err == nil {
-		t.Fatal("expected decompression error")
+	mm.record("m", r, copier, 0, nil, nil)
+
+	entries := mm.getMetrics()
+	if len(entries) != 1 {
+		t.Fatalf("want 1 entry, got %d", len(entries))
 	}
-	if got != nil {
-		t.Errorf("expected nil body on failure, got %q", got)
+	if entries[0].Metadata != nil {
+		t.Errorf("Metadata should be nil when no context data, got %v", entries[0].Metadata)
 	}
 }

-func TestServer_ExtractErrorMessage(t *testing.T) {
-	cases := []struct {
-		name string
-		body string
-		want string
-	}{
-		{"openai object", `{"error":{"message":"rate limited"}}`, "rate limited"},
-		{"string error", `{"error":"bad request"}`, "bad request"},
-		{"message field", `{"message":"nope"}`, "nope"},
-		{"detail field", `{"detail":"oops"}`, "oops"},
-		{"object error ignored", `{"error":{"code":42}}`, ""},
-		{"no error", `{"usage":{}}`, ""},
-		{"invalid json", `not-json`, ""},
+func TestMetricsMonitor_RecordMetadata_DeepCopy(t *testing.T) {
+	// Mutating the original context metadata after record() must not affect the stored entry.
+	mm := newMetricsMonitor(nil, 10, 0)
+	original := map[string]string{"key": "before"}
+	r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{}`))
+	r = r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{
+		ModelID:  "m",
+		Metadata: original,
+	}))
+
+	w := httptest.NewRecorder()
+	copier := newBodyCopier(w)
+	copier.WriteHeader(http.StatusOK)
+	copier.Write([]byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2}}`))
+
+	mm.record("m", r, copier, 0, nil, nil)
+
+	// Mutate the original map after record.
+	original["key"] = "after"
+
+	entries := mm.getMetrics()
+	if len(entries) != 1 {
+		t.Fatalf("want 1 entry, got %d", len(entries))
 	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			if got := extractErrorMessage([]byte(tc.body)); got != tc.want {
-				t.Errorf("extractErrorMessage = %q, want %q", got, tc.want)
-			}
-		})
+	if entries[0].Metadata["key"] != "before" {
+		t.Errorf("Metadata[key] = %q, want %q (deep copy expected)", entries[0].Metadata["key"], "before")
 	}
 }

@@ -272,40 +179,3 @@ func TestServer_ParseMetrics_Infill(t *testing.T) {
 		t.Fatalf("tokens = %+v", entry.Tokens)
 	}
 }
-
-// TestServer_MetricsMiddleware_UpstreamAudioCaptureSkipsRespBody verifies that
-// an /upstream/<model>/v1/audio/speech request uses the path-specific capture
-// mask (headers only) rather than falling back to captureAll.
-func TestServer_MetricsMiddleware_UpstreamAudioCaptureSkipsRespBody(t *testing.T) {
-	mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 100, 5)
-	cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
-
-	inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Content-Type", "audio/mpeg")
-		w.WriteHeader(http.StatusOK)
-		w.Write([]byte("BINARY-AUDIO-DATA"))
-	})
-	handler := CreateMetricsMiddleware(mm, cfg)(inner)
-
-	req := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/audio/speech", strings.NewReader(`{"model":"m1"}`))
-	handler.ServeHTTP(httptest.NewRecorder(), req)
-
-	entries := mm.getMetrics()
-	if len(entries) == 0 {
-		t.Fatal("no metrics recorded")
-	}
-	last := entries[len(entries)-1]
-	if !last.HasCapture {
-		t.Fatal("expected capture to be stored")
-	}
-	cap := mm.getCaptureByID(last.ID)
-	if cap == nil {
-		t.Fatal("capture not found")
-	}
-	if len(cap.RespBody) != 0 {
-		t.Errorf("RespBody stored for /upstream audio route (len=%d); want path-specific mask to skip body", len(cap.RespBody))
-	}
-	if len(cap.RespHeaders) == 0 {
-		t.Error("RespHeaders not stored; want captureRespHeaders mask")
-	}
-}
@@ -89,27 +89,6 @@ var modelGetRoutes = []string{
 	"/sdapi/v1/loras",
 }

-// isMetricsRecordPath reports whether path is one of the model-dispatched
-// endpoints that the metrics middleware records in the activity log.
-func isMetricsRecordPath(path string) bool {
-	for _, p := range modelPostJSONRoutes {
-		if p == path {
-			return true
-		}
-	}
-	for _, p := range modelPostFormRoutes {
-		if p == path {
-			return true
-		}
-	}
-	for _, p := range modelGetRoutes {
-		if p == path {
-			return true
-		}
-	}
-	return false
-}
-
 // BuildInfo carries version metadata surfaced by GET /api/version.
 type BuildInfo struct {
 	Version string
@@ -240,11 +219,9 @@ func (s *Server) routes() {
 	mux.Handle("GET /unload", apiChain.ThenFunc(s.handleUnload))
 	mux.Handle("GET /running", apiChain.ThenFunc(s.handleRunning))

-	// Upstream passthrough. Meter only the model-dispatched endpoints that can
-	// produce token usage/timings.
-	upstreamChain := apiChain.Append(CreateMetricsMiddleware(s.metrics, s.cfg))
+	// Upstream passthrough.
 	mux.HandleFunc("GET /upstream", handleUpstreamRedirect)
-	mux.Handle("/upstream/{upstreamPath...}", upstreamChain.ThenFunc(s.handleUpstream))
+	mux.Handle("/upstream/{upstreamPath...}", apiChain.ThenFunc(s.handleUpstream))

 	// API group (API-key protected) consumed by the UI.
 	mux.Handle("POST /api/models/unload", apiChain.ThenFunc(s.handleAPIUnloadAll))
@@ -26,9 +26,6 @@ type ReqContextData struct {
 	ModelID          string
 	Streaming        bool
 	SendLoadingState bool
-	// Metadata is a request-scoped key/value bag that handlers may mutate
-	// while processing. The metrics middleware copies it into ActivityLogEntry.
-	Metadata map[string]string
 }

 var (
@@ -91,24 +88,16 @@ func SendResponse(w http.ResponseWriter, r *http.Request, status int, message st
 	w.Write(resp)
 }

-// FetchContext will attempt to get the model id from the context, then
-// from an /upstream/<model> path prefix, then from the request body/query.
-// If it extracts the model it will store it in the context for downstream
-// handlers. An error will be returned when a model cannot be identified.
+// FetchContext will attempt to get the model id from the context then
+// from the model body. If it extracts the model from the body it will
+// store the model in the context for downstream handlers. An error
+// will be returned when model can not be fetch from either location.
 func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
 	data, ok := ReadContext(r.Context())
 	if ok {
 		return data, nil
 	}

-	if strings.HasPrefix(r.URL.Path, "/upstream/") {
-		if data, ok := extractUpstreamContext(r, cfg); ok {
-			*r = *r.WithContext(SetContext(r.Context(), data))
-			return data, nil
-		}
-		return ReqContextData{}, ErrNoModelInContext
-	}
-
 	if data, err := extractContext(r); err == nil && data.Model != "" {
 		realName, _ := cfg.RealModelName(data.Model)
 		if realName == "" {
@@ -125,59 +114,6 @@ func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
 	return ReqContextData{}, ErrNoModelInContext
 }

-// extractUpstreamContext resolves the model from an /upstream/<model>/... path.
-func extractUpstreamContext(r *http.Request, cfg config.Config) (ReqContextData, bool) {
-	searchName, realName, _, found := FindModelInPath(cfg, strings.TrimPrefix(r.URL.Path, "/upstream"))
-	if !found {
-		return ReqContextData{}, false
-	}
-	return ReqContextData{
-		Model:            searchName,
-		ModelID:          realName,
-		ApiKey:           ExtractAPIKey(r),
-		Streaming:        r.URL.Query().Get("stream") == "true",
-		SendLoadingState: sendLoadingState(cfg, realName),
-		Metadata:         make(map[string]string),
-	}, true
-}
-
-// sendLoadingState reports whether the configured model wants loading-state SSEs.
-func sendLoadingState(cfg config.Config, modelID string) bool {
-	if mc, ok := cfg.Models[modelID]; ok {
-		return mc.SendLoadingState != nil && *mc.SendLoadingState
-	}
-	return false
-}
-
-// FindModelInPath walks a slash-separated path, building up segments until one
-// matches a configured model. This resolves model names that contain slashes
-// (e.g. "author/model"). Returns the matched name, its real model ID, the
-// remaining path, and whether a match was found.
-func FindModelInPath(cfg config.Config, path string) (searchName, realName, remainingPath string, found bool) {
-	parts := strings.Split(strings.TrimSpace(path), "/")
-	name := ""
-
-	for i, part := range parts {
-		if part == "" {
-			continue
-		}
-		if name == "" {
-			name = part
-		} else {
-			name = name + "/" + part
-		}
-
-		if modelID, ok := cfg.RealModelName(name); ok {
-			searchName = name
-			realName = modelID
-			remainingPath = "/" + strings.Join(parts[i+1:], "/")
-			found = true
-		}
-	}
-
-	return
-}
-
 func SetContext(ctx context.Context, data ReqContextData) context.Context {
 	return context.WithValue(ctx, ReqContextKey, data)
 }
@@ -187,25 +123,6 @@ func ReadContext(ctx context.Context) (ReqContextData, bool) {
 	return data, ok
 }

-// SetReqData attaches a key/value pair to the request context's metadata map.
-// The metadata map must already exist in the context's ReqContextData; callers
-// should ensure FetchContext has run or initialize the map themselves.
-// It returns an error for nil contexts or contexts without request data.
-func SetReqData(ctx context.Context, key, value string) error {
-	if ctx == nil {
-		return fmt.Errorf("cannot set request metadata on nil context")
-	}
-	data, ok := ReadContext(ctx)
-	if !ok {
-		return fmt.Errorf("no request context data found")
-	}
-	if data.Metadata == nil {
-		return fmt.Errorf("no metadata map in request context")
-	}
-	data.Metadata[key] = value
-	return nil
-}
-
 // extractContext pulls fields from an HTTP request into a ReqContextData,
 // returning whatever is available. For GET requests it reads query parameters.
 // For POST requests it inspects Content-Type and parses JSON,
@@ -222,7 +139,6 @@ func extractContext(r *http.Request) (ReqContextData, error) {
 			Model:     q.Get("model"),
 			Streaming: q.Get("stream") == "true",
 			ApiKey:    apiKey,
-			Metadata:  make(map[string]string),
 		}, nil
 	}

@@ -241,7 +157,6 @@ func extractContext(r *http.Request) (ReqContextData, error) {
 			Model:     gjson.GetBytes(bodyBytes, "model").String(),
 			Streaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
 			ApiKey:    apiKey,
-			Metadata:  make(map[string]string),
 		}, nil
 	}

@@ -263,7 +178,6 @@ func extractContext(r *http.Request) (ReqContextData, error) {
 		Model:     r.FormValue("model"),
 		Streaming: r.FormValue("stream") == "true",
 		ApiKey:    apiKey,
-		Metadata:  make(map[string]string),
 	}, nil
 }

@@ -11,8 +11,6 @@ import (
 	"net/url"
 	"strings"
 	"testing"
-
-	"github.com/mostlygeek/llama-swap/internal/config"
 )

 func TestExtractContext_GET(t *testing.T) {
@@ -421,6 +419,73 @@ func TestSetReqData_Errors(t *testing.T) {
 	}
 }

+func TestSetReqData_NilContext(t *testing.T) {
+	// nil context must return an error without panicking.
+	err := SetReqData(nil, "k", "v")
+	if err == nil {
+		t.Error("expected error for nil context, got nil")
+	}
+}
+
+func TestSetReqData_OverwritesExistingKey(t *testing.T) {
+	ctx := SetContext(context.Background(), ReqContextData{
+		Model:    "m",
+		Metadata: map[string]string{"key": "old"},
+	})
+	if err := SetReqData(ctx, "key", "new"); err != nil {
+		t.Fatalf("SetReqData: %v", err)
+	}
+	data, _ := ReadContext(ctx)
+	if data.Metadata["key"] != "new" {
+		t.Errorf("key = %q, want %q", data.Metadata["key"], "new")
+	}
+}
+
+func TestExtractContext_MetadataInitialized_GET(t *testing.T) {
+	r, _ := http.NewRequest(http.MethodGet, "/?model=llama3", nil)
+	got, err := extractContext(r)
+	if err != nil {
+		t.Fatalf("extractContext: %v", err)
+	}
+	if got.Metadata == nil {
+		t.Error("Metadata should be initialized (not nil) for GET requests")
+	}
+}
+
+func TestExtractContext_MetadataInitialized_JSON(t *testing.T) {
+	r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"llama3"}`))
+	r.Header.Set("Content-Type", "application/json")
+	got, err := extractContext(r)
+	if err != nil {
+		t.Fatalf("extractContext: %v", err)
+	}
+	if got.Metadata == nil {
+		t.Error("Metadata should be initialized (not nil) for JSON POST requests")
+	}
+}
+
+func TestExtractContext_MetadataInitialized_Form(t *testing.T) {
+	r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader("model=whisper-1"))
+	r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	got, err := extractContext(r)
+	if err != nil {
+		t.Fatalf("extractContext: %v", err)
+	}
+	if got.Metadata == nil {
+		t.Error("Metadata should be initialized (not nil) for form POST requests")
+	}
+}
+
+func TestExtractContext_MetadataIsWritable(t *testing.T) {
+	// Verify the initialized map is writable — i.e. SetReqData can use it.
+	r, _ := http.NewRequest(http.MethodGet, "/?model=llama3", nil)
+	got, _ := extractContext(r)
+	ctx := SetContext(context.Background(), got)
+	if err := SetReqData(ctx, "x", "y"); err != nil {
+		t.Fatalf("SetReqData on extractContext Metadata: %v", err)
+	}
+}
+
 func TestServer_ExtractAPIKey(t *testing.T) {
 	basicHeader := func(user, pass string) string {
 		return "Basic " + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))
@@ -458,68 +523,3 @@ func TestServer_ExtractAPIKey(t *testing.T) {
 		})
 	}
 }
-
-func TestFetchContext_UpstreamPath(t *testing.T) {
-	cfg := config.Config{
-		Models: map[string]config.ModelConfig{
-			"m1":           {},
-			"author/model": {},
-			"real":         {Aliases: []string{"nick"}},
-		},
-	}
-
-	cases := []struct {
-		name        string
-		path        string
-		wantModel   string
-		wantModelID string
-		wantErr     bool
-	}{
-		{"known model", "/upstream/m1/v1/chat/completions", "m1", "m1", false},
-		{"model with slash", "/upstream/author/model/v1/chat", "author/model", "author/model", false},
-		{"unknown model", "/upstream/nope/v1/chat/completions", "", "", true},
-		{"bare model path", "/upstream/m1/", "m1", "m1", false},
-	}
-
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			r := httptest.NewRequest(http.MethodPost, c.path, strings.NewReader(`{}`))
-			data, err := FetchContext(r, cfg)
-			if (err != nil) != c.wantErr {
-				t.Fatalf("wantErr=%v got err=%v", c.wantErr, err)
-			}
-			if c.wantErr {
-				return
-			}
-			if data.Model != c.wantModel {
-				t.Errorf("model = %q, want %q", data.Model, c.wantModel)
-			}
-			if data.ModelID != c.wantModelID {
-				t.Errorf("modelID = %q, want %q", data.ModelID, c.wantModelID)
-			}
-			if data.Metadata == nil {
-				t.Error("metadata map not initialized")
-			}
-		})
-	}
-}
-
-func TestFetchContext_UpstreamPath_DoesNotReadBody(t *testing.T) {
-	cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
-	body := `{"model":"should-not-matter"}`
-	r := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/chat/completions", strings.NewReader(body))
-
-	_, err := FetchContext(r, cfg)
-	if err != nil {
-		t.Fatalf("FetchContext: %v", err)
-	}
-
-	// The body should be untouched so the upstream handler can still read it.
-	got, err := io.ReadAll(r.Body)
-	if err != nil {
-		t.Fatalf("read body: %v", err)
-	}
-	if string(got) != body {
-		t.Errorf("body was consumed: %q", string(got))
-	}
-}
@@ -8,7 +8,7 @@
  import Performance from "./routes/Performance.svelte";
  import Playground from "./routes/Playground.svelte";
  import PlaygroundStub from "./routes/PlaygroundStub.svelte";
-  import { enableAPIEvents, checkPerformanceEnabled } from "./stores/api";
+  import { enableAPIEvents } from "./stores/api";
  import { initScreenWidth, initSystemThemeListener, isDarkMode, appTitle, connectionState } from "./stores/theme";
  import { currentRoute } from "./stores/route";

@@ -39,7 +39,6 @@
    const cleanupScreenWidth = initScreenWidth();
    const cleanupSystemTheme = initSystemThemeListener();
    enableAPIEvents(true);
-    checkPerformanceEnabled();

    return () => {
      cleanupScreenWidth();
@@ -193,7 +193,7 @@
 <dialog
  bind:this={dialogEl}
  onclose={handleDialogClose}
-  class="bg-surface text-txtmain rounded-lg shadow-xl max-w-[80%] w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
+  class="bg-surface text-txtmain rounded-lg shadow-xl max-w-4xl w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
 >
  {#if capture}
    <div class="flex flex-col max-h-[90vh]">
@@ -3,7 +3,6 @@
  import { screenWidth, toggleTheme, themeMode, appTitle, isNarrow } from "../stores/theme";
  import { currentRoute } from "../stores/route";
  import { playgroundActivity } from "../stores/playgroundActivity";
-  import { performanceEnabled } from "../stores/api";
  import ConnectionStatus from "./ConnectionStatus.svelte";

  function handleTitleChange(newTitle: string): void {
@@ -85,18 +84,16 @@
    >
      Logs
    </a>
-    {#if $performanceEnabled}
-      <a
-        href="/performance"
-        use:link
-        class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
-        class:font-semibold={isActive("/performance", $currentRoute)}
-        class:underline={isActive("/performance", $currentRoute)}
-        class:underline-offset-4={isActive("/performance", $currentRoute)}
-      >
-        Performance
-      </a>
-    {/if}
+    <a
+      href="/performance"
+      use:link
+      class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
+      class:font-semibold={isActive("/performance", $currentRoute)}
+      class:underline={isActive("/performance", $currentRoute)}
+      class:underline-offset-4={isActive("/performance", $currentRoute)}
+    >
+      Performance
+    </a>
    <button onclick={toggleTheme} title="Toggle theme (current: {$themeMode})">
      {#if $themeMode === "system"}
        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
@@ -1,85 +0,0 @@
-<script lang="ts">
-  import type { Snippet } from "svelte";
-
-  interface Props {
-    metadata: Record<string, string> | undefined;
-    children: Snippet;
-  }
-
-  let { metadata, children }: Props = $props();
-
-  let entries = $derived(Object.entries(metadata || {}));
-  let triggerEl: HTMLElement | undefined = $state();
-  let tooltipEl: HTMLDivElement | undefined = $state();
-  let show = $state(false);
-  let tooltipStyle = $state("");
-
-  function positionTooltip() {
-    if (!triggerEl || !tooltipEl) return;
-    const triggerRect = triggerEl.getBoundingClientRect();
-    const tooltipRect = tooltipEl.getBoundingClientRect();
-    const margin = 8;
-    const viewportWidth = window.innerWidth;
-    const viewportHeight = window.innerHeight;
-
-    let left = triggerRect.left;
-    let top = triggerRect.bottom + margin;
-
-    // Keep tooltip within horizontal viewport bounds
-    if (left + tooltipRect.width > viewportWidth - margin) {
-      left = triggerRect.right - tooltipRect.width;
-    }
-    if (left < margin) {
-      left = margin;
-    }
-
-    // Flip above trigger if it would overflow the bottom
-    if (top + tooltipRect.height > viewportHeight - margin) {
-      top = triggerRect.top - tooltipRect.height - margin;
-    }
-
-    tooltipStyle = `left: ${left}px; top: ${top}px; max-width: calc(100vw - ${margin * 2}px);`;
-  }
-
-  function onEnter() {
-    show = true;
-    requestAnimationFrame(positionTooltip);
-  }
-
-  function onLeave() {
-    show = false;
-  }
-</script>
-
-<span
-  bind:this={triggerEl}
-  onmouseenter={onEnter}
-  onmouseleave={onLeave}
-  onfocus={onEnter}
-  onblur={onLeave}
-  class="inline-flex"
-  role="button"
-  tabindex="0"
-  aria-label="Show metadata"
->
-  {@render children()}
-</span>
-
-{#if show && entries.length > 0}
-  <div
-    bind:this={tooltipEl}
-    style={tooltipStyle}
-    class="fixed px-3 py-2 bg-gray-900 text-white text-sm rounded-md z-50 normal-case min-w-[12rem] max-w-[24rem] shadow-lg whitespace-normal"
-  >
-    <table class="w-full text-left">
-      <tbody>
-        {#each entries as [key, value]}
-          <tr class="border-b border-white/10 last:border-0">
-            <td class="py-1 pr-3 font-medium whitespace-nowrap text-primary">{key}</td>
-            <td class="py-1 break-all">{value}</td>
-          </tr>
-        {/each}
-      </tbody>
-    </table>
-  </div>
-{/if}
@@ -0,0 +1,217 @@
+import { describe, it, expect } from "vitest";
+import type { ActivityLogEntry, TokenMetrics } from "./types";
+
+// Baseline token metrics used across tests.
+const baseTokens: TokenMetrics = {
+  cache_tokens: 0,
+  input_tokens: 10,
+  output_tokens: 5,
+  prompt_per_second: 100,
+  tokens_per_second: 50,
+};
+
+function makeEntry(overrides: Partial<ActivityLogEntry> = {}): ActivityLogEntry {
+  return {
+    id: 0,
+    timestamp: "2024-01-01T00:00:00Z",
+    model: "llama3",
+    req_path: "/v1/chat/completions",
+    resp_content_type: "application/json",
+    resp_status_code: 200,
+    tokens: baseTokens,
+    duration_ms: 100,
+    has_capture: false,
+    ...overrides,
+  };
+}
+
+describe("ActivityLogEntry", () => {
+  describe("metadata field", () => {
+    it("accepts an entry without metadata (undefined)", () => {
+      const entry = makeEntry();
+      expect(entry.metadata).toBeUndefined();
+    });
+
+    it("accepts an entry with metadata populated", () => {
+      const entry = makeEntry({ metadata: { client: "web", trace: "abc123" } });
+      expect(entry.metadata).toEqual({ client: "web", trace: "abc123" });
+    });
+
+    it("accepts an empty metadata object", () => {
+      const entry = makeEntry({ metadata: {} });
+      expect(entry.metadata).toEqual({});
+    });
+
+    it("allows reading a key from metadata when present", () => {
+      const entry = makeEntry({ metadata: { fifo_priority: "7" } });
+      expect(entry.metadata?.["fifo_priority"]).toBe("7");
+    });
+
+    it("returns undefined when accessing a missing key via optional chaining", () => {
+      const entry = makeEntry();
+      expect(entry.metadata?.["fifo_priority"]).toBeUndefined();
+    });
+
+    it("returns undefined for a missing key when metadata is an empty object", () => {
+      const entry = makeEntry({ metadata: {} });
+      expect(entry.metadata?.["missing"]).toBeUndefined();
+    });
+
+    it("supports metadata with multiple entries", () => {
+      const meta: Record<string, string> = { a: "1", b: "2", c: "3" };
+      const entry = makeEntry({ metadata: meta });
+      expect(Object.keys(entry.metadata!)).toHaveLength(3);
+      expect(entry.metadata!["b"]).toBe("2");
+    });
+  });
+
+  describe("ActivityLogEntry structure", () => {
+    it("round-trips through JSON with metadata", () => {
+      const entry = makeEntry({ metadata: { client: "web" } });
+      const json = JSON.stringify(entry);
+      const parsed: ActivityLogEntry = JSON.parse(json);
+      expect(parsed.metadata).toEqual({ client: "web" });
+    });
+
+    it("round-trips through JSON without metadata (field omitted)", () => {
+      const entry = makeEntry();
+      const json = JSON.stringify(entry);
+      const parsed: ActivityLogEntry = JSON.parse(json);
+      // When metadata is undefined it is dropped by JSON.stringify.
+      expect(parsed.metadata).toBeUndefined();
+    });
+
+    it("round-trips through JSON with null metadata preserved as-is", () => {
+      // Explicit null from a server that omits the field still satisfies
+      // the optional type when accessed with optional chaining.
+      const raw = { ...makeEntry(), metadata: null };
+      const json = JSON.stringify(raw);
+      const parsed = JSON.parse(json) as ActivityLogEntry;
+      // null is falsy — optional chaining returns undefined for null too.
+      expect(parsed.metadata ?? undefined).toBeUndefined();
+    });
+  });
+});
+
+// META_PREFIX helpers — mirror the logic in Activity.svelte so we can test it
+// without importing the Svelte component.
+const META_PREFIX = "meta:";
+
+function isMetaKey(key: string): boolean {
+  return key.startsWith(META_PREFIX);
+}
+
+function metaKey(name: string): string {
+  return META_PREFIX + name;
+}
+
+function metaLabel(key: string): string {
+  return key.slice(META_PREFIX.length);
+}
+
+describe("Activity.svelte META_PREFIX helpers", () => {
+  describe("isMetaKey", () => {
+    it("returns true for keys with meta: prefix", () => {
+      expect(isMetaKey("meta:fifo_priority")).toBe(true);
+      expect(isMetaKey("meta:client")).toBe(true);
+      expect(isMetaKey("meta:")).toBe(true); // empty suffix is still prefixed
+    });
+
+    it("returns false for standard column keys", () => {
+      expect(isMetaKey("id")).toBe(false);
+      expect(isMetaKey("model")).toBe(false);
+      expect(isMetaKey("capture")).toBe(false);
+      expect(isMetaKey("duration")).toBe(false);
+    });
+
+    it("returns false for partial or incorrect prefixes", () => {
+      expect(isMetaKey("meta")).toBe(false);
+      expect(isMetaKey("Meta:key")).toBe(false); // case-sensitive
+      expect(isMetaKey("")).toBe(false);
+    });
+  });
+
+  describe("metaKey", () => {
+    it("prepends META_PREFIX to the name", () => {
+      expect(metaKey("fifo_priority")).toBe("meta:fifo_priority");
+      expect(metaKey("client")).toBe("meta:client");
+    });
+
+    it("handles empty name", () => {
+      expect(metaKey("")).toBe("meta:");
+    });
+
+    it("is the inverse of metaLabel", () => {
+      const name = "some_key";
+      expect(metaLabel(metaKey(name))).toBe(name);
+    });
+  });
+
+  describe("metaLabel", () => {
+    it("strips META_PREFIX and returns the bare name", () => {
+      expect(metaLabel("meta:fifo_priority")).toBe("fifo_priority");
+      expect(metaLabel("meta:client")).toBe("client");
+    });
+
+    it("handles empty suffix", () => {
+      expect(metaLabel("meta:")).toBe("");
+    });
+
+    it("is the inverse of metaKey", () => {
+      const key = "meta:trace_id";
+      expect(metaKey(metaLabel(key))).toBe(key);
+    });
+  });
+
+  describe("metadata column derivation", () => {
+    it("derives unique metadata keys from a list of entries", () => {
+      const entries: ActivityLogEntry[] = [
+        makeEntry({ metadata: { client: "web", trace: "a" } }),
+        makeEntry({ metadata: { client: "mobile" } }),
+        makeEntry({ metadata: { fifo_priority: "3" } }),
+        makeEntry({}), // no metadata
+      ];
+
+      const keys = Array.from(
+        new Set(entries.flatMap((m) => Object.keys(m.metadata || {})))
+      ).sort();
+
+      expect(keys).toEqual(["client", "fifo_priority", "trace"]);
+    });
+
+    it("returns empty array when no entries have metadata", () => {
+      const entries: ActivityLogEntry[] = [makeEntry(), makeEntry()];
+      const keys = Array.from(
+        new Set(entries.flatMap((m) => Object.keys(m.metadata || {})))
+      ).sort();
+      expect(keys).toHaveLength(0);
+    });
+
+    it("maps metadata keys to meta:-prefixed column keys", () => {
+      const metaKeys = ["client", "fifo_priority"];
+      const columnKeys = metaKeys.map(metaKey);
+      expect(columnKeys).toEqual(["meta:client", "meta:fifo_priority"]);
+    });
+
+    it("resolves metadata value for a column key", () => {
+      const entry = makeEntry({ metadata: { fifo_priority: "7", client: "web" } });
+      const key = "meta:fifo_priority";
+      const value = entry.metadata?.[metaLabel(key)] ?? "-";
+      expect(value).toBe("7");
+    });
+
+    it("falls back to '-' for a column key not present in entry metadata", () => {
+      const entry = makeEntry({ metadata: { client: "web" } });
+      const key = "meta:fifo_priority";
+      const value = entry.metadata?.[metaLabel(key)] ?? "-";
+      expect(value).toBe("-");
+    });
+
+    it("falls back to '-' when entry has no metadata at all", () => {
+      const entry = makeEntry(); // metadata is undefined
+      const key = "meta:anything";
+      const value = entry.metadata?.[metaLabel(key)] ?? "-";
+      expect(value).toBe("-");
+    });
+  });
+});
@@ -25,8 +25,6 @@ export interface Model {

 export interface TokenMetrics {
  cache_tokens: number;
-  draft_tokens: number;
-  draft_acc_tokens: number;
  input_tokens: number;
  output_tokens: number;
  prompt_per_second: number;
@@ -43,8 +41,6 @@ export interface ActivityLogEntry {
  tokens: TokenMetrics;
  duration_ms: number;
  has_capture: boolean;
-  error_msg?: string;
-  metadata?: Record<string, string>;
 }

 export interface ReqRespCapture {
@@ -2,13 +2,25 @@
  import { metrics, getCapture } from "../stores/api";
  import ActivityStats from "../components/ActivityStats.svelte";
  import Tooltip from "../components/Tooltip.svelte";
-  import MetadataTooltip from "../components/MetadataTooltip.svelte";
  import CaptureDialog from "../components/CaptureDialog.svelte";
  import { persistentStore } from "../stores/persistent";
  import { onMount } from "svelte";
  import type { ReqRespCapture } from "../lib/types";

-  type ColumnKey = string;
+  type ColumnKey =
+    | "id"
+    | "time"
+    | "model"
+    | "req_path"
+    | "resp_status_code"
+    | "resp_content_type"
+    | "cached"
+    | "prompt"
+    | "generated"
+    | "prompt_speed"
+    | "gen_speed"
+    | "duration"
+    | "capture";

  interface ColumnDef {
    key: ColumnKey;
@@ -21,31 +33,26 @@
    { key: "time", label: "Time", defaultVisible: true },
    { key: "model", label: "Model", defaultVisible: true },
    { key: "req_path", label: "Path", defaultVisible: false },
-    { key: "resp_status_code", label: "Status", defaultVisible: true },
+    { key: "resp_status_code", label: "Status", defaultVisible: false },
    { key: "resp_content_type", label: "Content-Type", defaultVisible: false },
    { key: "cached", label: "Cached", defaultVisible: true },
    { key: "prompt", label: "Prompt", defaultVisible: true },
    { key: "generated", label: "Generated", defaultVisible: true },
-    { key: "drafted", label: "Drafted", defaultVisible: false },
    { key: "prompt_speed", label: "Prompt Speed", defaultVisible: true },
    { key: "gen_speed", label: "Gen Speed", defaultVisible: true },
    { key: "duration", label: "Duration", defaultVisible: true },
    { key: "capture", label: "Capture", defaultVisible: true },
-    { key: "meta", label: "Meta", defaultVisible: false },
  ];

  const defaultVisibleKeys = columns.filter((c) => c.defaultVisible).map((c) => c.key);

-  const visibleColumns = persistentStore<ColumnKey[]>("activity-columns", defaultVisibleKeys);
-  const columnOrder = persistentStore<ColumnKey[]>(
-    "activity-column-order",
-    columns.map((c) => c.key)
+  const visibleColumns = persistentStore<ColumnKey[]>(
+    "activity-columns",
+    defaultVisibleKeys
  );

  let columnsMenuOpen = $state(false);
  let dropdownContainer: HTMLDivElement | null = null;
-  let dragKey: ColumnKey | null = $state(null);
-  let dragOverKey: ColumnKey | null = $state(null);

  onMount(() => {
    function handleKeydown(e: KeyboardEvent) {
@@ -77,92 +84,10 @@
    }
  }

-  function isColumnVisible(key: ColumnKey): boolean {
-    return $visibleColumns.includes(key);
-  }
-
-  function handleDragStart(e: DragEvent, key: ColumnKey) {
-    dragKey = key;
-    e.dataTransfer?.setData("text/plain", key);
-    if (e.dataTransfer) {
-      e.dataTransfer.effectAllowed = "move";
-    }
-  }
-
-  function handleDragOver(e: DragEvent, key: ColumnKey) {
-    e.preventDefault();
-    if (e.dataTransfer) {
-      e.dataTransfer.dropEffect = "move";
-    }
-    dragOverKey = key;
-  }
-
-  function handleDrop(e: DragEvent, targetKey: ColumnKey) {
-    e.preventDefault();
-    if (!dragKey || dragKey === targetKey) return;
-    const order = [...$columnOrder];
-    const fromIndex = order.indexOf(dragKey);
-    let toIndex = order.indexOf(targetKey);
-    if (fromIndex === -1 || toIndex === -1) return;
-    order.splice(fromIndex, 1);
-    if (fromIndex < toIndex) {
-      toIndex -= 1;
-    }
-    order.splice(toIndex, 0, dragKey);
-    columnOrder.set(order);
-  }
-
-  function handleDragEnd() {
-    dragKey = null;
-    dragOverKey = null;
-  }
-
-  let orderedColumns = $derived(
-    columns.slice().sort((a, b) => {
-      const aIndex = $columnOrder.indexOf(a.key);
-      const bIndex = $columnOrder.indexOf(b.key);
-      if (aIndex === -1 && bIndex === -1) return 0;
-      if (aIndex === -1) return 1;
-      if (bIndex === -1) return -1;
-      return aIndex - bIndex;
-    })
-  );
-
-  let activeVisibleColumns = $derived(
-    columns
-      .filter((c) => isColumnVisible(c.key))
-      .sort((a, b) => {
-        const aIndex = $columnOrder.indexOf(a.key);
-        const bIndex = $columnOrder.indexOf(b.key);
-        if (aIndex === -1 && bIndex === -1) return 0;
-        if (aIndex === -1) return 1;
-        if (bIndex === -1) return -1;
-        return aIndex - bIndex;
-      })
-      .map((c) => c.key)
-  );
-
-  let columnLabelMap = $derived(Object.fromEntries(columns.map((c) => [c.key, c.label])));
-
-  $effect(() => {
-    const staticKeys = new Set(columns.map((c) => c.key));
-    const order = $columnOrder;
-    const hasStale = order.some((k) => !staticKeys.has(k));
-    const missing = columns.filter((c) => !order.includes(c.key)).map((c) => c.key);
-    if (hasStale || missing.length > 0) {
-      const cleaned = order.filter((k) => staticKeys.has(k));
-      columnOrder.set([...cleaned, ...missing]);
-    }
-  });
-
  function formatSpeed(speed: number): string {
    return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
  }

-  function formatDrafted(drafted: number, accepted: number): string {
-    return drafted > 0 ? (accepted * 100 / drafted).toFixed(1) + "% (" + accepted + "/" + drafted + ")" : "-";
-  }
-
  function formatDuration(ms: number): string {
    return (ms / 1000).toFixed(2) + "s";
  }
@@ -232,37 +157,22 @@
          </svg>
        </button>
        {#if columnsMenuOpen}
-          <div class="absolute right-0 top-full mt-1 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-10 py-1 min-w-[16rem]" role="list">
-            <div class="px-3 py-2 text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400 border-b border-gray-200 dark:border-white/10" role="presentation">
+          <div class="absolute right-0 top-full mt-1 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-10 py-1 min-w-[16rem]">
+            <div class="px-3 py-2 text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400 border-b border-gray-200 dark:border-white/10">
              Columns
            </div>
-            {#each orderedColumns as col (col.key)}
-              {@const key = col.key}
-              <div
-                class="flex items-center gap-2 px-3 py-1.5 text-sm hover:bg-secondary-hover transition-colors {dragOverKey === key && dragKey !== key ? 'bg-primary/10 ring-1 ring-primary/40' : ''} {dragKey === key ? 'opacity-40' : ''}"
-                role="listitem"
-                ondragover={(e) => handleDragOver(e, key)}
-                ondrop={(e) => handleDrop(e, key)}
+            {#each columns as col (col.key)}
+              <label
+                class="flex items-center gap-2 px-3 py-1.5 text-sm cursor-pointer hover:bg-secondary-hover transition-colors"
              >
-                <span
-                  class="text-txtsecondary select-none cursor-grab"
-                  draggable={true}
-                  role="button"
-                  tabindex="-1"
-                  aria-label="Drag to reorder {col.label}"
-                  ondragstart={(e) => handleDragStart(e, key)}
-                  ondragend={handleDragEnd}
-                >⋮⋮</span>
-                <label class="flex items-center gap-2 flex-1 cursor-pointer">
-                  <input
-                    type="checkbox"
-                    checked={isColumnVisible(key)}
-                    onchange={() => toggleColumn(key)}
-                    class="rounded"
-                  />
-                  {col.label}
-                </label>
-              </div>
+                <input
+                  type="checkbox"
+                  checked={$visibleColumns.includes(col.key)}
+                  onchange={() => toggleColumn(col.key)}
+                  class="rounded"
+                />
+                {col.label}
+              </label>
            {/each}
          </div>
        {/if}
@@ -272,90 +182,112 @@
    <table class="min-w-full divide-y">
      <thead class="border-gray-200 dark:border-white/10">
        <tr class="text-left text-xs uppercase tracking-wider">
-          {#each activeVisibleColumns as key (key)}
+          {#if $visibleColumns.includes("id")}
+            <th class="px-6 py-3">ID</th>
+          {/if}
+          {#if $visibleColumns.includes("time")}
+            <th class="px-6 py-3">Time</th>
+          {/if}
+          {#if $visibleColumns.includes("model")}
+            <th class="px-6 py-3">Model</th>
+          {/if}
+          {#if $visibleColumns.includes("req_path")}
+            <th class="px-6 py-3">Path</th>
+          {/if}
+          {#if $visibleColumns.includes("resp_status_code")}
+            <th class="px-6 py-3">Status</th>
+          {/if}
+          {#if $visibleColumns.includes("resp_content_type")}
+            <th class="px-6 py-3">Content-Type</th>
+          {/if}
+          {#if $visibleColumns.includes("cached")}
            <th class="px-6 py-3">
-              {#if key === "cached"}
-                Cached <Tooltip content="prompt tokens from cache" />
-              {:else if key === "prompt"}
-                Prompt <Tooltip content="new prompt tokens processed" />
-              {:else if key === "drafted"}
-                Drafted <Tooltip content="acceptance rate (accepted/drafted)" />
-              {:else}
-                {columnLabelMap[key] ?? key}
-              {/if}
+              Cached <Tooltip content="prompt tokens from cache" />
            </th>
-          {/each}
+          {/if}
+          {#if $visibleColumns.includes("prompt")}
+            <th class="px-6 py-3">
+              Prompt <Tooltip content="new prompt tokens processed" />
+            </th>
+          {/if}
+          {#if $visibleColumns.includes("generated")}
+            <th class="px-6 py-3">Generated</th>
+          {/if}
+          {#if $visibleColumns.includes("prompt_speed")}
+            <th class="px-6 py-3">Prompt Speed</th>
+          {/if}
+          {#if $visibleColumns.includes("gen_speed")}
+            <th class="px-6 py-3">Gen Speed</th>
+          {/if}
+          {#if $visibleColumns.includes("duration")}
+            <th class="px-6 py-3">Duration</th>
+          {/if}
+          {#if $visibleColumns.includes("capture")}
+            <th class="px-6 py-3">Capture</th>
+          {/if}
        </tr>
      </thead>
      <tbody class="divide-y">
        {#if sortedMetrics.length === 0}
          <tr>
-            <td colspan={activeVisibleColumns.length} class="px-6 py-8 text-center text-sm text-gray-500 dark:text-gray-400">
+            <td colspan={$visibleColumns.length} class="px-6 py-8 text-center text-sm text-gray-500 dark:text-gray-400">
              No activity recorded
            </td>
          </tr>
        {:else}
          {#each sortedMetrics as metric (metric.id)}
            <tr class="whitespace-nowrap text-sm border-gray-200 dark:border-white/10">
-              {#each activeVisibleColumns as key (key)}
+              {#if $visibleColumns.includes("id")}
+                <td class="px-4 py-4">{metric.id + 1}</td>
+              {/if}
+              {#if $visibleColumns.includes("time")}
+                <td class="px-6 py-4">{formatRelativeTime(metric.timestamp)}</td>
+              {/if}
+              {#if $visibleColumns.includes("model")}
+                <td class="px-6 py-4">{metric.model}</td>
+              {/if}
+              {#if $visibleColumns.includes("req_path")}
+                <td class="px-6 py-4">{metric.req_path || "-"}</td>
+              {/if}
+              {#if $visibleColumns.includes("resp_status_code")}
+                <td class="px-6 py-4">{metric.resp_status_code || "-"}</td>
+              {/if}
+              {#if $visibleColumns.includes("resp_content_type")}
+                <td class="px-6 py-4">{metric.resp_content_type || "-"}</td>
+              {/if}
+              {#if $visibleColumns.includes("cached")}
+                <td class="px-6 py-4">{metric.tokens.cache_tokens > 0 ? metric.tokens.cache_tokens.toLocaleString() : "-"}</td>
+              {/if}
+              {#if $visibleColumns.includes("prompt")}
+                <td class="px-6 py-4">{metric.tokens.input_tokens.toLocaleString()}</td>
+              {/if}
+              {#if $visibleColumns.includes("generated")}
+                <td class="px-6 py-4">{metric.tokens.output_tokens.toLocaleString()}</td>
+              {/if}
+              {#if $visibleColumns.includes("prompt_speed")}
+                <td class="px-6 py-4">{formatSpeed(metric.tokens.prompt_per_second)}</td>
+              {/if}
+              {#if $visibleColumns.includes("gen_speed")}
+                <td class="px-6 py-4">{formatSpeed(metric.tokens.tokens_per_second)}</td>
+              {/if}
+              {#if $visibleColumns.includes("duration")}
+                <td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
+              {/if}
+              {#if $visibleColumns.includes("capture")}
                <td class="px-6 py-4">
-                  {#if key === "id"}
-                    {metric.id + 1}
-                  {:else if key === "time"}
-                    {formatRelativeTime(metric.timestamp)}
-                  {:else if key === "model"}
-                    {metric.model}
-                  {:else if key === "req_path"}
-                    {metric.req_path || "-"}
-                  {:else if key === "resp_status_code"}
-                    {#if metric.error_msg}
-                      <span class="text-red-500 dark:text-red-400 cursor-help" title={metric.error_msg}>
-                        {metric.resp_status_code || "-"}
-                      </span>
-                    {:else}
-                      {metric.resp_status_code || "-"}
-                    {/if}
-                  {:else if key === "resp_content_type"}
-                    {metric.resp_content_type || "-"}
-                  {:else if key === "cached"}
-                    {metric.tokens.cache_tokens > 0 ? metric.tokens.cache_tokens.toLocaleString() : "-"}
-                  {:else if key === "prompt"}
-                    {metric.tokens.input_tokens.toLocaleString()}
-                  {:else if key === "generated"}
-                    {metric.tokens.output_tokens.toLocaleString()}
-                  {:else if key === "drafted"}
-                    {formatDrafted(metric.tokens.draft_tokens, metric.tokens.draft_acc_tokens)}
-                  {:else if key === "prompt_speed"}
-                    {formatSpeed(metric.tokens.prompt_per_second)}
-                  {:else if key === "gen_speed"}
-                    {formatSpeed(metric.tokens.tokens_per_second)}
-                  {:else if key === "duration"}
-                    {formatDuration(metric.duration_ms)}
-                  {:else if key === "capture"}
-                    {#if metric.has_capture}
-                      <button
-                        onclick={() => viewCapture(metric.id)}
-                        disabled={loadingCaptureId === metric.id}
-                        class="btn btn--sm"
-                      >
-                        {loadingCaptureId === metric.id ? "..." : "View"}
-                      </button>
-                    {:else}
-                      <span class="text-txtsecondary">-</span>
-                    {/if}
-                  {:else if key === "meta"}
-                    {#if Object.keys(metric.metadata || {}).length > 0}
-                      <MetadataTooltip metadata={metric.metadata}>
-                        <span class="cursor-help text-txtsecondary hover:text-txtmain">...</span>
-                      </MetadataTooltip>
-                    {:else}
-                      <span class="text-txtsecondary">-</span>
-                    {/if}
+                  {#if metric.has_capture}
+                    <button
+                      onclick={() => viewCapture(metric.id)}
+                      disabled={loadingCaptureId === metric.id}
+                      class="btn btn--sm"
+                    >
+                      {loadingCaptureId === metric.id ? "..." : "View"}
+                    </button>
                  {:else}
-                    -
+                    <span class="text-txtsecondary">-</span>
                  {/if}
                </td>
-              {/each}
+              {/if}
            </tr>
          {/each}
        {/if}
@@ -19,7 +19,6 @@ export const proxyLogs = writable<string>("");
 export const upstreamLogs = writable<string>("");
 export const metrics = writable<ActivityLogEntry[]>([]);
 export const inFlightRequests = writable<number>(0);
-export const performanceEnabled = writable<boolean>(false);
 export const versionInfo = writable<VersionInfo>({
  build_date: "unknown",
  commit: "unknown",
@@ -211,20 +210,6 @@ export async function getCapture(id: number): Promise<ReqRespCapture | null> {
  }
 }

-export async function checkPerformanceEnabled(): Promise<void> {
-  try {
-    const response = await fetch("/api/performance");
-    if (!response.ok) {
-      performanceEnabled.set(false);
-      return;
-    }
-    const data = await response.json();
-    performanceEnabled.set(data.enabled);
-  } catch {
-    performanceEnabled.set(false);
-  }
-}
-
 export async function fetchPerformance(after?: string): Promise<PerformanceResponse | null> {
  try {
    const url = after ? `/api/performance?after=${encodeURIComponent(after)}` : "/api/performance";