Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5dc6b3e6d9 | |||
| 74c69f39ef | |||
| a186318892 | |||
| c4e4d5e1e9 |
@@ -4,3 +4,4 @@ build/
|
|||||||
dist/
|
dist/
|
||||||
.vscode
|
.vscode
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.dev/
|
||||||
|
|||||||
@@ -31,8 +31,9 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
|||||||
- ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
- ✅ Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
||||||
- ✅ Automatic unloading of models after timeout by setting a `ttl`
|
- ✅ Automatic unloading of models after timeout by setting a `ttl`
|
||||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
||||||
- ✅ Docker and Podman support
|
- ✅ Reliable Docker and Podman support with `cmdStart` and `cmdStop`
|
||||||
- ✅ Full control over server settings per model
|
- ✅ Full control over server settings per model
|
||||||
|
- ✅ Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
|
||||||
|
|
||||||
## How does llama-swap work?
|
## How does llama-swap work?
|
||||||
|
|
||||||
@@ -71,9 +72,13 @@ See the [configuration documentation](https://github.com/mostlygeek/llama-swap/w
|
|||||||
|
|
||||||
## Web UI
|
## Web UI
|
||||||
|
|
||||||
llama-swap ships with a real time web interface to monitor logs and status of models:
|
llama-swap includes a real time web interface for monitoring logs and models:
|
||||||
|
|
||||||
<img width="1786" height="1334" alt="image" src="https://github.com/user-attachments/assets/d6258cb9-1dad-40db-828f-2be860aec8fe" />
|
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/adef4a8e-de0b-49db-885a-8f6dedae6799" />
|
||||||
|
|
||||||
|
The Activity Page shows recent requests:
|
||||||
|
|
||||||
|
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,13 @@
|
|||||||
# llama-swap YAML configuration example
|
# llama-swap YAML configuration example
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
#
|
#
|
||||||
|
# 💡 Tip - Use an LLM with this file!
|
||||||
|
# ====================================
|
||||||
|
# This example configuration is written to be LLM friendly! Try
|
||||||
|
# copying this file into an LLM and asking it to explain or generate
|
||||||
|
# sections for you.
|
||||||
|
# ====================================
|
||||||
|
#
|
||||||
# - Below are all the available configuration options for llama-swap.
|
# - Below are all the available configuration options for llama-swap.
|
||||||
# - Settings with a default value, or noted as optional can be omitted.
|
# - Settings with a default value, or noted as optional can be omitted.
|
||||||
# - Settings that are marked required must be in your configuration file
|
# - Settings that are marked required must be in your configuration file
|
||||||
@@ -207,3 +214,19 @@ groups:
|
|||||||
- "forever-modelA"
|
- "forever-modelA"
|
||||||
- "forever-modelB"
|
- "forever-modelB"
|
||||||
- "forever-modelc"
|
- "forever-modelc"
|
||||||
|
|
||||||
|
# hooks: a dictionary of event triggers and actions
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - the only supported hook is on_startup
|
||||||
|
hooks:
|
||||||
|
# on_startup: a dictionary of actions to perform on startup
|
||||||
|
# - optional, default: empty dictionar
|
||||||
|
# - the only supported action is preload
|
||||||
|
on_startup:
|
||||||
|
# preload: a list of model ids to load on startup
|
||||||
|
# - optional, default: empty list
|
||||||
|
# - model names must match keys in the models sections
|
||||||
|
# - when preloading multiple models at once, define a group
|
||||||
|
# otherwise models will be loaded and swapped out
|
||||||
|
preload:
|
||||||
|
- "llama"
|
||||||
|
|||||||
@@ -138,6 +138,14 @@ func (c *GroupConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type HooksConfig struct {
|
||||||
|
OnStartup HookOnStartup `yaml:"on_startup"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type HookOnStartup struct {
|
||||||
|
Preload []string `yaml:"preload"`
|
||||||
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
||||||
LogRequests bool `yaml:"logRequests"`
|
LogRequests bool `yaml:"logRequests"`
|
||||||
@@ -155,6 +163,9 @@ type Config struct {
|
|||||||
|
|
||||||
// automatic port assignments
|
// automatic port assignments
|
||||||
StartPort int `yaml:"startPort"`
|
StartPort int `yaml:"startPort"`
|
||||||
|
|
||||||
|
// hooks, see: #209
|
||||||
|
Hooks HooksConfig `yaml:"hooks"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Config) RealModelName(search string) (string, bool) {
|
func (c *Config) RealModelName(search string) (string, bool) {
|
||||||
@@ -330,6 +341,22 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clean up hooks preload
|
||||||
|
if len(config.Hooks.OnStartup.Preload) > 0 {
|
||||||
|
var toPreload []string
|
||||||
|
for _, modelID := range config.Hooks.OnStartup.Preload {
|
||||||
|
modelID = strings.TrimSpace(modelID)
|
||||||
|
if modelID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if real, found := config.RealModelName(modelID); found {
|
||||||
|
toPreload = append(toPreload, real)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
config.Hooks.OnStartup.Preload = toPreload
|
||||||
|
}
|
||||||
|
|
||||||
return config, nil
|
return config, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -100,6 +100,9 @@ func TestConfig_LoadPosix(t *testing.T) {
|
|||||||
content := `
|
content := `
|
||||||
macros:
|
macros:
|
||||||
svr-path: "path/to/server"
|
svr-path: "path/to/server"
|
||||||
|
hooks:
|
||||||
|
on_startup:
|
||||||
|
preload: ["model1", "model2"]
|
||||||
models:
|
models:
|
||||||
model1:
|
model1:
|
||||||
cmd: path/to/cmd --arg1 one
|
cmd: path/to/cmd --arg1 one
|
||||||
@@ -163,6 +166,11 @@ groups:
|
|||||||
Macros: map[string]string{
|
Macros: map[string]string{
|
||||||
"svr-path": "path/to/server",
|
"svr-path": "path/to/server",
|
||||||
},
|
},
|
||||||
|
Hooks: HooksConfig{
|
||||||
|
OnStartup: HookOnStartup{
|
||||||
|
Preload: []string{"model1", "model2"},
|
||||||
|
},
|
||||||
|
},
|
||||||
Models: map[string]ModelConfig{
|
Models: map[string]ModelConfig{
|
||||||
"model1": {
|
"model1": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
package proxy
|
||||||
|
|
||||||
|
import "net/http"
|
||||||
|
|
||||||
|
// Custom discard writer that implements http.ResponseWriter but just discards everything
|
||||||
|
type DiscardWriter struct {
|
||||||
|
header http.Header
|
||||||
|
status int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *DiscardWriter) Header() http.Header {
|
||||||
|
if w.header == nil {
|
||||||
|
w.header = make(http.Header)
|
||||||
|
}
|
||||||
|
return w.header
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *DiscardWriter) Write(data []byte) (int, error) {
|
||||||
|
return len(data), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *DiscardWriter) WriteHeader(code int) {
|
||||||
|
w.status = code
|
||||||
|
}
|
||||||
|
|
||||||
|
// Satisfy the http.Flusher interface for streaming responses
|
||||||
|
func (w *DiscardWriter) Flush() {}
|
||||||
@@ -7,6 +7,7 @@ const ChatCompletionStatsEventID = 0x02
|
|||||||
const ConfigFileChangedEventID = 0x03
|
const ConfigFileChangedEventID = 0x03
|
||||||
const LogDataEventID = 0x04
|
const LogDataEventID = 0x04
|
||||||
const TokenMetricsEventID = 0x05
|
const TokenMetricsEventID = 0x05
|
||||||
|
const ModelPreloadedEventID = 0x06
|
||||||
|
|
||||||
type ProcessStateChangeEvent struct {
|
type ProcessStateChangeEvent struct {
|
||||||
ProcessName string
|
ProcessName string
|
||||||
@@ -48,3 +49,12 @@ type LogDataEvent struct {
|
|||||||
func (e LogDataEvent) Type() uint32 {
|
func (e LogDataEvent) Type() uint32 {
|
||||||
return LogDataEventID
|
return LogDataEventID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ModelPreloadedEvent struct {
|
||||||
|
ModelName string
|
||||||
|
Success bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e ModelPreloadedEvent) Type() uint32 {
|
||||||
|
return ModelPreloadedEventID
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ var (
|
|||||||
nextTestPort int = 12000
|
nextTestPort int = 12000
|
||||||
portMutex sync.Mutex
|
portMutex sync.Mutex
|
||||||
testLogger = NewLogMonitorWriter(os.Stdout)
|
testLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
simpleResponderPath = getSimpleResponderPath()
|
||||||
)
|
)
|
||||||
|
|
||||||
// Check if the binary exists
|
// Check if the binary exists
|
||||||
@@ -69,13 +70,11 @@ func getTestSimpleResponderConfig(expectedMessage string) ModelConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getTestSimpleResponderConfigPort(expectedMessage string, port int) ModelConfig {
|
func getTestSimpleResponderConfigPort(expectedMessage string, port int) ModelConfig {
|
||||||
binaryPath := getSimpleResponderPath()
|
|
||||||
|
|
||||||
// Create a YAML string with just the values we want to set
|
// Create a YAML string with just the values we want to set
|
||||||
yamlStr := fmt.Sprintf(`
|
yamlStr := fmt.Sprintf(`
|
||||||
cmd: '%s --port %d --silent --respond %s'
|
cmd: '%s --port %d --silent --respond %s'
|
||||||
proxy: "http://127.0.0.1:%d"
|
proxy: "http://127.0.0.1:%d"
|
||||||
`, binaryPath, port, expectedMessage, port)
|
`, simpleResponderPath, port, expectedMessage, port)
|
||||||
|
|
||||||
var cfg ModelConfig
|
var cfg ModelConfig
|
||||||
if err := yaml.Unmarshal([]byte(yamlStr), &cfg); err != nil {
|
if err := yaml.Unmarshal([]byte(yamlStr), &cfg); err != nil {
|
||||||
|
|||||||
@@ -79,10 +79,12 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
|||||||
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
||||||
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
||||||
tokensPerSecond := -1.0
|
tokensPerSecond := -1.0
|
||||||
|
promptPerSecond := -1.0
|
||||||
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
||||||
|
|
||||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
||||||
if timings := jsonData.Get("timings"); timings.Exists() {
|
if timings := jsonData.Get("timings"); timings.Exists() {
|
||||||
|
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
||||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||||
}
|
}
|
||||||
@@ -92,6 +94,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
|||||||
Model: rec.realModelName,
|
Model: rec.realModelName,
|
||||||
InputTokens: inputTokens,
|
InputTokens: inputTokens,
|
||||||
OutputTokens: outputTokens,
|
OutputTokens: outputTokens,
|
||||||
|
PromptPerSecond: promptPerSecond,
|
||||||
TokensPerSecond: tokensPerSecond,
|
TokensPerSecond: tokensPerSecond,
|
||||||
DurationMs: durationMs,
|
DurationMs: durationMs,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ type TokenMetrics struct {
|
|||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
InputTokens int `json:"input_tokens"`
|
InputTokens int `json:"input_tokens"`
|
||||||
OutputTokens int `json:"output_tokens"`
|
OutputTokens int `json:"output_tokens"`
|
||||||
|
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||||
TokensPerSecond float64 `json:"tokens_per_second"`
|
TokensPerSecond float64 `json:"tokens_per_second"`
|
||||||
DurationMs int `json:"duration_ms"`
|
DurationMs int `json:"duration_ms"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
@@ -96,6 +97,35 @@ func New(config Config) *ProxyManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pm.setupGinEngine()
|
pm.setupGinEngine()
|
||||||
|
|
||||||
|
// run any startup hooks
|
||||||
|
if len(config.Hooks.OnStartup.Preload) > 0 {
|
||||||
|
// do it in the background, don't block startup -- not sure if good idea yet
|
||||||
|
go func() {
|
||||||
|
discardWriter := &DiscardWriter{}
|
||||||
|
for _, realModelName := range config.Hooks.OnStartup.Preload {
|
||||||
|
proxyLogger.Infof("Preloading model: %s", realModelName)
|
||||||
|
processGroup, _, err := pm.swapProcessGroup(realModelName)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
event.Emit(ModelPreloadedEvent{
|
||||||
|
ModelName: realModelName,
|
||||||
|
Success: false,
|
||||||
|
})
|
||||||
|
proxyLogger.Errorf("Failed to preload model %s: %v", realModelName, err)
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
req, _ := http.NewRequest("GET", "/", nil)
|
||||||
|
processGroup.ProxyRequest(realModelName, discardWriter, req)
|
||||||
|
event.Emit(ModelPreloadedEvent{
|
||||||
|
ModelName: realModelName,
|
||||||
|
Success: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
return pm
|
return pm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
@@ -832,3 +833,62 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
|
|||||||
assert.Equal(t, http.StatusOK, rec.Code)
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
assert.Equal(t, "OK", rec.Body.String())
|
assert.Equal(t, "OK", rec.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_StartupHooks(t *testing.T) {
|
||||||
|
|
||||||
|
// using real YAML as the configuration has gotten more complex
|
||||||
|
// is the right approach as LoadConfigFromReader() does a lot more
|
||||||
|
// than parse YAML now. Eventually migrate all tests to use this approach
|
||||||
|
configStr := strings.Replace(`
|
||||||
|
logLevel: error
|
||||||
|
hooks:
|
||||||
|
on_startup:
|
||||||
|
preload:
|
||||||
|
- model1
|
||||||
|
- model2
|
||||||
|
groups:
|
||||||
|
preloadTestGroup:
|
||||||
|
swap: false
|
||||||
|
members:
|
||||||
|
- model1
|
||||||
|
- model2
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: ${simpleresponderpath} --port ${PORT} --silent --respond model1
|
||||||
|
model2:
|
||||||
|
cmd: ${simpleresponderpath} --port ${PORT} --silent --respond model2
|
||||||
|
`, "${simpleresponderpath}", simpleResponderPath, -1)
|
||||||
|
|
||||||
|
// Create a test model configuration
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
if !assert.NoError(t, err, "Invalid configuration") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
preloadChan := make(chan ModelPreloadedEvent, 2) // buffer for 2 expected events
|
||||||
|
|
||||||
|
unsub := event.On(func(e ModelPreloadedEvent) {
|
||||||
|
preloadChan <- e
|
||||||
|
})
|
||||||
|
|
||||||
|
defer unsub()
|
||||||
|
|
||||||
|
// Create the proxy which should trigger preloading
|
||||||
|
proxy := New(config)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
|
select {
|
||||||
|
case <-preloadChan:
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Fatal("timed out waiting for models to preload")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// make sure they are both loaded
|
||||||
|
_, foundGroup := proxy.processGroups["preloadTestGroup"]
|
||||||
|
if !assert.True(t, foundGroup, "preloadTestGroup should exist") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model1"].CurrentState())
|
||||||
|
assert.Equal(t, StateReady, proxy.processGroups["preloadTestGroup"].processes["model2"].CurrentState())
|
||||||
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ interface Metrics {
|
|||||||
model: string;
|
model: string;
|
||||||
input_tokens: number;
|
input_tokens: number;
|
||||||
output_tokens: number;
|
output_tokens: number;
|
||||||
|
prompt_per_second: number;
|
||||||
tokens_per_second: number;
|
tokens_per_second: number;
|
||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ const ActivityPage = () => {
|
|||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
||||||
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Prompt Processing</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -62,6 +63,7 @@ const ActivityPage = () => {
|
|||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
|
||||||
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.prompt_per_second)}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|||||||
Reference in New Issue
Block a user