Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c3c258a55d | |||
| 29a38fde0d | |||
| d569681daa | |||
| 24efdb76b1 | |||
| cc77139ff8 | |||
| 390a35bf93 | |||
| 181f71ca11 |
@@ -29,7 +29,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
|
|||||||
+9
-3
@@ -48,6 +48,12 @@
|
|||||||
"default": 120,
|
"default": 120,
|
||||||
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
||||||
},
|
},
|
||||||
|
"globalTTL": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 0,
|
||||||
|
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
|
||||||
|
},
|
||||||
"logLevel": {
|
"logLevel": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
@@ -177,9 +183,9 @@
|
|||||||
},
|
},
|
||||||
"ttl": {
|
"ttl": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 0,
|
"minimum": -1,
|
||||||
"default": 0,
|
"default": -1,
|
||||||
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable."
|
"description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
|
||||||
},
|
},
|
||||||
"useModelName": {
|
"useModelName": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|||||||
+9
-2
@@ -75,6 +75,11 @@ sendLoadingState: true
|
|||||||
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||||
includeAliasesInList: false
|
includeAliasesInList: false
|
||||||
|
|
||||||
|
# globalTTL: the default TTL in seconds before unloading a model
|
||||||
|
# - optional, default: 0 (never automatically unload)
|
||||||
|
# - must be >= 0
|
||||||
|
globalTTL: 0
|
||||||
|
|
||||||
# macros: a dictionary of string substitutions
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros are reusable snippets
|
# - macros are reusable snippets
|
||||||
@@ -180,8 +185,10 @@ models:
|
|||||||
checkEndpoint: /custom-endpoint
|
checkEndpoint: /custom-endpoint
|
||||||
|
|
||||||
# ttl: automatically unload the model after ttl seconds
|
# ttl: automatically unload the model after ttl seconds
|
||||||
# - optional, default: 0
|
# - optional, default: -1 (use global default)
|
||||||
# - ttl values must be a value greater than 0
|
# - ttl values must be a value greater than or equal to 0
|
||||||
|
# - a ttl of -1 will use the global TTL value as the default
|
||||||
|
# - a ttl of 0 will mean never unload
|
||||||
# - a value of 0 disables automatic unloading of the model
|
# - a value of 0 disables automatic unloading of the model
|
||||||
ttl: 60
|
ttl: 60
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ ARCH=$1
|
|||||||
PUSH_IMAGES=${2:-false}
|
PUSH_IMAGES=${2:-false}
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ type Config struct {
|
|||||||
LogToStdout string `yaml:"logToStdout"`
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
CaptureBuffer int `yaml:"captureBuffer"`
|
CaptureBuffer int `yaml:"captureBuffer"`
|
||||||
|
GlobalTTL int `yaml:"globalTTL"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||||
@@ -203,6 +204,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
LogToStdout: LogToStdoutProxy,
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
CaptureBuffer: 5,
|
CaptureBuffer: 5,
|
||||||
|
GlobalTTL: 0,
|
||||||
}
|
}
|
||||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
@@ -216,6 +218,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.GlobalTTL < 0 {
|
||||||
|
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||||
|
}
|
||||||
|
|
||||||
switch config.LogToStdout {
|
switch config.LogToStdout {
|
||||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||||
default:
|
default:
|
||||||
@@ -255,6 +261,15 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||||
|
|
||||||
|
// set model TTL to globalTTL it is the default value
|
||||||
|
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||||
|
modelConfig.UnloadAfter = config.GlobalTTL
|
||||||
|
}
|
||||||
|
|
||||||
|
if modelConfig.UnloadAfter < 0 {
|
||||||
|
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||||
|
}
|
||||||
|
|
||||||
// Validate model macros
|
// Validate model macros
|
||||||
for _, macro := range modelConfig.Macros {
|
for _, macro := range modelConfig.Macros {
|
||||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||||
@@ -293,6 +308,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
|
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||||
|
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||||
|
|
||||||
// Substitute macros in SetParamsByID keys and values
|
// Substitute macros in SetParamsByID keys and values
|
||||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||||
@@ -336,6 +353,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
|
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||||
|
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||||
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||||
@@ -355,6 +374,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
"proxy": modelConfig.Proxy,
|
"proxy": modelConfig.Proxy,
|
||||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||||
|
"name": modelConfig.Name,
|
||||||
|
"description": modelConfig.Description,
|
||||||
}
|
}
|
||||||
|
|
||||||
for fieldName, fieldValue := range fieldMap {
|
for fieldName, fieldValue := range fieldMap {
|
||||||
|
|||||||
@@ -848,6 +848,71 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_GlobalTTL(t *testing.T) {
|
||||||
|
t.Run("globalTTL sets default for models", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 300, config.GlobalTTL)
|
||||||
|
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
ttl: 0
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
ttl: 600
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("globalTTL defaults to 0", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, config.GlobalTTL)
|
||||||
|
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("negative globalTTL rejected", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: -1
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfig_EnvMacros(t *testing.T) {
|
func TestConfig_EnvMacros(t *testing.T) {
|
||||||
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||||
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||||
|
|||||||
@@ -104,6 +104,62 @@ models:
|
|||||||
assert.Contains(t, err.Error(), "self-reference")
|
assert.Contains(t, err.Error(), "self-reference")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test macro substitution in name and description fields
|
||||||
|
func TestConfig_MacroInNameAndDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
macros:
|
||||||
|
"VARIANT": "Q4_K_M"
|
||||||
|
"FAMILY": "llama"
|
||||||
|
|
||||||
|
models:
|
||||||
|
my-model:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "${FAMILY} ${VARIANT}"
|
||||||
|
description: "A ${FAMILY} model in ${VARIANT} format"
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
|
||||||
|
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test MODEL_ID macro in name and description fields
|
||||||
|
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
models:
|
||||||
|
llama-3b:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "Model: ${MODEL_ID}"
|
||||||
|
description: "Running ${MODEL_ID}"
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
|
||||||
|
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test unknown macro in name or description returns an error
|
||||||
|
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "Model ${UNDEFINED}"
|
||||||
|
`
|
||||||
|
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED")
|
||||||
|
}
|
||||||
|
|
||||||
// Test undefined macro reference error
|
// Test undefined macro reference error
|
||||||
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||||
|
)
|
||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
CmdStop string `yaml:"cmdStop"`
|
CmdStop string `yaml:"cmdStop"`
|
||||||
@@ -47,7 +51,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/health",
|
CheckEndpoint: "/health",
|
||||||
UnloadAfter: 0,
|
UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
|
||||||
Unlisted: false,
|
Unlisted: false,
|
||||||
UseModelName: "",
|
UseModelName: "",
|
||||||
ConcurrencyLimit: 0,
|
ConcurrencyLimit: 0,
|
||||||
|
|||||||
@@ -350,6 +350,11 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
|||||||
usage := parsed.Get("usage")
|
usage := parsed.Get("usage")
|
||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
|
// v1/responses format nests usage under response.usage
|
||||||
|
if !usage.Exists() {
|
||||||
|
usage = parsed.Get("response.usage")
|
||||||
|
}
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
return parseMetrics(modelID, start, usage, timings)
|
return parseMetrics(modelID, start, usage, timings)
|
||||||
}
|
}
|
||||||
@@ -503,9 +508,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
|
|||||||
supported := map[string]bool{"gzip": true, "deflate": true}
|
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||||
var filtered []string
|
var filtered []string
|
||||||
|
|
||||||
for _, part := range strings.Split(acceptEncoding, ",") {
|
for part := range strings.SplitSeq(acceptEncoding, ",") {
|
||||||
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||||
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
|
||||||
if supported[strings.ToLower(encoding)] {
|
if supported[strings.ToLower(encoding)] {
|
||||||
filtered = append(filtered, strings.TrimSpace(part))
|
filtered = append(filtered, strings.TrimSpace(part))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -709,6 +709,35 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
// v1/responses SSE format: usage is nested under response.usage
|
||||||
|
responseBody := "event: response.completed\n" +
|
||||||
|
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
|
||||||
|
"\n\n"
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "text/event-stream")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/responses", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 17, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 23, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
|||||||
+10
-10
@@ -117,12 +117,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
expectedMessage := "I_sense_imminent_danger"
|
expectedMessage := "I_sense_imminent_danger"
|
||||||
config := getTestSimpleResponderConfig(expectedMessage)
|
conf := getTestSimpleResponderConfig(expectedMessage)
|
||||||
assert.Equal(t, 0, config.UnloadAfter)
|
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||||
config.UnloadAfter = 3 // seconds
|
conf.UnloadAfter = 3 // seconds
|
||||||
assert.Equal(t, 3, config.UnloadAfter)
|
assert.Equal(t, 3, conf.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
|
process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
// this should take 4 seconds
|
// this should take 4 seconds
|
||||||
@@ -159,12 +159,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
|
|||||||
t.Skip("skipping test, edit process_test.go to run it ")
|
t.Skip("skipping test, edit process_test.go to run it ")
|
||||||
}
|
}
|
||||||
|
|
||||||
config := getTestSimpleResponderConfig("fast_ttl")
|
conf := getTestSimpleResponderConfig("fast_ttl")
|
||||||
assert.Equal(t, 0, config.UnloadAfter)
|
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||||
config.UnloadAfter = 1 // second
|
conf.UnloadAfter = 1 // second
|
||||||
assert.Equal(t, 1, config.UnloadAfter)
|
assert.Equal(t, 1, conf.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
|
process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
for i := 0; i < 100; i++ {
|
for i := 0; i < 100; i++ {
|
||||||
|
|||||||
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
|||||||
// Verify extended fields are present
|
// Verify extended fields are present
|
||||||
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||||
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||||
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
legacy-peer-deps=true
|
||||||
Generated
+976
-1186
File diff suppressed because it is too large
Load Diff
@@ -12,18 +12,18 @@
|
|||||||
"test:watch": "vitest"
|
"test:watch": "vitest"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||||
"@tailwindcss/vite": "^4.1.8",
|
"@tailwindcss/vite": "^4.1.8",
|
||||||
"@tsconfig/svelte": "^5.0.4",
|
"@tsconfig/svelte": "^5.0.4",
|
||||||
"@types/hast": "^3.0.4",
|
"@types/hast": "^3.0.4",
|
||||||
"@types/node": "^25.1.0",
|
"@types/node": "^25.1.0",
|
||||||
"svelte": "^5.19.0",
|
"svelte": "^5.46.4",
|
||||||
"svelte-check": "^4.1.4",
|
"svelte-check": "^4.1.4",
|
||||||
"tailwindcss": "^4.1.8",
|
"tailwindcss": "^4.1.8",
|
||||||
"typescript": "~5.8.3",
|
"typescript": "~5.8.3",
|
||||||
"vite": "^6.3.5",
|
"vite": "^8.0.0",
|
||||||
"vite-plugin-compression2": "^2.4.0",
|
"vite-plugin-compression2": "^2.5.1",
|
||||||
"vitest": "^4.0.18"
|
"vitest": "^4.1.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"highlight.js": "^11.11.1",
|
"highlight.js": "^11.11.1",
|
||||||
|
|||||||
@@ -116,6 +116,47 @@
|
|||||||
cancelEdit();
|
cancelEdit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
||||||
|
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
|
||||||
|
|
||||||
|
function codeBlockCopy(node: HTMLElement) {
|
||||||
|
function attachButtons() {
|
||||||
|
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
|
||||||
|
pre.setAttribute('data-copy-btn', 'true');
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'code-copy-btn';
|
||||||
|
btn.title = 'Copy code';
|
||||||
|
btn.innerHTML = COPY_SVG;
|
||||||
|
btn.addEventListener('click', async () => {
|
||||||
|
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
|
||||||
|
try {
|
||||||
|
if (navigator.clipboard && window.isSecureContext) {
|
||||||
|
await navigator.clipboard.writeText(text);
|
||||||
|
} else {
|
||||||
|
const ta = document.createElement('textarea');
|
||||||
|
ta.value = text;
|
||||||
|
ta.style.cssText = 'position:fixed;left:-9999px';
|
||||||
|
document.body.appendChild(ta);
|
||||||
|
ta.select();
|
||||||
|
document.execCommand('copy');
|
||||||
|
document.body.removeChild(ta);
|
||||||
|
}
|
||||||
|
btn.innerHTML = CHECK_SVG;
|
||||||
|
btn.classList.add('copied');
|
||||||
|
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('copy failed', e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
pre.appendChild(btn);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
attachButtons();
|
||||||
|
const mo = new MutationObserver(attachButtons);
|
||||||
|
mo.observe(node, { childList: true, subtree: true });
|
||||||
|
return { destroy: () => mo.disconnect() };
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||||
@@ -174,7 +215,7 @@
|
|||||||
{#if showRaw}
|
{#if showRaw}
|
||||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||||
{:else}
|
{:else}
|
||||||
<div class="prose prose-sm dark:prose-invert max-w-none">
|
<div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
|
||||||
{#each renderedParts.blocks as block (block.id)}
|
{#each renderedParts.blocks as block (block.id)}
|
||||||
{@html block.html}
|
{@html block.html}
|
||||||
{/each}
|
{/each}
|
||||||
@@ -299,14 +340,42 @@
|
|||||||
|
|
||||||
<style>
|
<style>
|
||||||
.prose :global(pre) {
|
.prose :global(pre) {
|
||||||
|
position: relative;
|
||||||
background-color: var(--color-surface);
|
background-color: var(--color-surface);
|
||||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||||
border-radius: 0.375rem;
|
border-radius: 0.375rem;
|
||||||
padding: 0.75rem;
|
padding: 0.75rem;
|
||||||
|
padding-right: 2.5rem;
|
||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
margin: 0.5rem 0;
|
margin: 0.5rem 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn) {
|
||||||
|
position: absolute;
|
||||||
|
top: 0.375rem;
|
||||||
|
right: 0.375rem;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
padding: 0.25rem;
|
||||||
|
border-radius: 0.25rem;
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
background: var(--color-surface);
|
||||||
|
color: var(--color-txtsecondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.15s;
|
||||||
|
line-height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn:hover) {
|
||||||
|
background: var(--color-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn.copied) {
|
||||||
|
color: var(--color-success);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
.prose :global(code) {
|
.prose :global(code) {
|
||||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
||||||
font-size: 0.875em;
|
font-size: 0.875em;
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
const newModels = JSON.parse(message.data) as Model[];
|
const newModels = JSON.parse(message.data) as Model[];
|
||||||
// Sort models by name and id
|
// Sort models by name and id
|
||||||
newModels.sort((a, b) => {
|
newModels.sort((a, b) => {
|
||||||
return (a.name + a.id).localeCompare(b.name + b.id);
|
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
||||||
});
|
});
|
||||||
models.set(newModels);
|
models.set(newModels);
|
||||||
break;
|
break;
|
||||||
|
|||||||
Reference in New Issue
Block a user