Compare commits
39 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d567fa78cb | |||
| 187f1ae27a | |||
| 0ae56b1eb9 | |||
| e46cbeb2bf | |||
| a0578f0007 | |||
| d207a059a4 | |||
| 040ee1e284 | |||
| 82cad1b84e | |||
| 55c3678906 | |||
| 8b5a62d92a | |||
| d1e4c8ee77 | |||
| 11f8afead8 | |||
| 749819ef47 | |||
| 0ab9e74333 | |||
| b20be6dcd1 | |||
| fc24722258 | |||
| 2b087dffb1 | |||
| 746c083a87 | |||
| 8dd91e99e8 | |||
| 136dcdc25f | |||
| 767b8015fa | |||
| f0144a2361 | |||
| 32bc781326 | |||
| 316ad63f76 | |||
| e37077a963 | |||
| eff9b60434 | |||
| 9bcddad91b | |||
| a15e47922c | |||
| 0ab214d1c8 | |||
| d07b063ab6 | |||
| 826210dac9 | |||
| 6cf1317341 | |||
| 8e84b2ec4f | |||
| ed77385d08 | |||
| 92b90447e8 | |||
| 62aea0e83d | |||
| 8c660dcb90 | |||
| f6877b8175 | |||
| 9b3a33d7b9 |
@@ -15,6 +15,8 @@ reviews:
|
||||
auto_review:
|
||||
enabled: false
|
||||
drafts: false
|
||||
unit_tests:
|
||||
enabled: false
|
||||
chat:
|
||||
auto_reply: true
|
||||
issue_enrichment:
|
||||
|
||||
@@ -44,13 +44,10 @@ jobs:
|
||||
|
||||
echo "✓ config-schema.json is valid"
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0
|
||||
with:
|
||||
python-version: "3.x"
|
||||
|
||||
- name: Install check-jsonschema
|
||||
run: pip install check-jsonschema
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Validate config.example.yaml against schema
|
||||
run: check-jsonschema --schemafile config-schema.json config.example.yaml
|
||||
run: go test ./internal/config/ -run TestConfig_ExampleMatchesSchema -v
|
||||
|
||||
@@ -88,10 +88,11 @@ Real time log streaming:
|
||||
llama-swap can be installed in multiple ways
|
||||
|
||||
1. Docker
|
||||
2. Homebrew (OSX and Linux)
|
||||
3. WinGet
|
||||
4. From release binaries
|
||||
5. From source
|
||||
2. Homebrew (macOS and Linux)
|
||||
3. MacPorts (macOS)
|
||||
4. WinGet
|
||||
5. From release binaries
|
||||
6. From source
|
||||
|
||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||
|
||||
@@ -155,6 +156,16 @@ brew install llama-swap
|
||||
llama-swap --config path/to/config.yaml --listen localhost:8080
|
||||
```
|
||||
|
||||
### MacPorts (macOS)
|
||||
|
||||
> [!NOTE]
|
||||
> Maintained by MacPorts community - [llama-swap port](https://ports.macports.org/port/llama-swap). It is not an official part of llama-swap.
|
||||
|
||||
```shell
|
||||
sudo port install llama-swap
|
||||
llama-swap --config path/to/config.yaml --listen localhost:8080
|
||||
```
|
||||
|
||||
### WinGet Install (Windows)
|
||||
|
||||
> [!NOTE]
|
||||
|
||||
+227
-73
@@ -82,6 +82,78 @@
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Timeout settings for proxy connections."
|
||||
},
|
||||
"groupsConfig": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"members"
|
||||
],
|
||||
"properties": {
|
||||
"swap": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Controls model swapping behaviour within the group. True: only one model runs at a time. False: all models can run together."
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Controls how the group affects other groups. True: causes all other groups to unload when this group runs a model. False: does not affect other groups."
|
||||
},
|
||||
"persistent": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Prevents other groups from unloading the models in this group. Does not affect individual model behaviour."
|
||||
},
|
||||
"members": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Array of model IDs that are members of this group. Model IDs must be defined in models."
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "A dictionary of group settings. Provides advanced controls over model swapping behaviour. Model IDs must be defined in models. A model can only be a member of one group. Behaviour controlled via swap, exclusive, persistent."
|
||||
},
|
||||
"matrixConfig": {
|
||||
"type": "object",
|
||||
"description": "Solver-based alternative to groups. Declares valid combinations of concurrent models. The solver minimizes eviction cost when swapping. A config must use either groups or matrix, not both.",
|
||||
"required": [
|
||||
"vars",
|
||||
"sets"
|
||||
],
|
||||
"properties": {
|
||||
"vars": {
|
||||
"type": "object",
|
||||
"description": "Short names for models. Keys must be alphanumeric, 1-8 characters. All sets and evict_costs must use these IDs.",
|
||||
"minProperties": 1,
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"propertyNames": {
|
||||
"pattern": "^[a-zA-Z0-9]{1,8}$"
|
||||
}
|
||||
},
|
||||
"evict_costs": {
|
||||
"type": "object",
|
||||
"description": "Relative cost of evicting a running model. Models not listed default to 1. Values must be positive integers.",
|
||||
"additionalProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 1
|
||||
}
|
||||
},
|
||||
"sets": {
|
||||
"type": "object",
|
||||
"description": "Named sets of concurrent model combinations. Values are DSL strings using & (AND), | (OR), () (grouping), and +ref (inline another set). Definition order is used for tie-breaking.",
|
||||
"minProperties": 1,
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
@@ -306,81 +378,68 @@
|
||||
},
|
||||
"timeouts": {
|
||||
"$ref": "#/definitions/timeouts"
|
||||
},
|
||||
"capabilities": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"in": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"uniqueItems": true,
|
||||
"default": [],
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"text",
|
||||
"audio",
|
||||
"image"
|
||||
]
|
||||
},
|
||||
"description": "List of input modalities understood by the model."
|
||||
},
|
||||
"out": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"uniqueItems": true,
|
||||
"default": [],
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"text",
|
||||
"audio",
|
||||
"image"
|
||||
]
|
||||
},
|
||||
"description": "List of output modalities generated by the model."
|
||||
},
|
||||
"tools": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Whether the model supports function calling."
|
||||
},
|
||||
"reranker": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Whether the model supports the /v1/rerank endpoint."
|
||||
},
|
||||
"context": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Maximum token context length supported by the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Defines what the model accepts for input, output and other metadata. Used in v1/models to inform clients what the model can do. An empty capabilities block (all zero values) is treated as not configured."
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"groups": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"members"
|
||||
],
|
||||
"properties": {
|
||||
"swap": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Controls model swapping behaviour within the group. True: only one model runs at a time. False: all models can run together."
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Controls how the group affects other groups. True: causes all other groups to unload when this group runs a model. False: does not affect other groups."
|
||||
},
|
||||
"persistent": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Prevents other groups from unloading the models in this group. Does not affect individual model behaviour."
|
||||
},
|
||||
"members": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Array of model IDs that are members of this group. Model IDs must be defined in models."
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "A dictionary of group settings. Provides advanced controls over model swapping behaviour. Model IDs must be defined in models. A model can only be a member of one group. Behaviour controlled via swap, exclusive, persistent."
|
||||
"$ref": "#/definitions/groupsConfig"
|
||||
},
|
||||
"matrix": {
|
||||
"type": "object",
|
||||
"description": "Solver-based alternative to groups. Declares valid combinations of concurrent models. The solver minimizes eviction cost when swapping. A config must use either groups or matrix, not both.",
|
||||
"required": [
|
||||
"vars",
|
||||
"sets"
|
||||
],
|
||||
"properties": {
|
||||
"vars": {
|
||||
"type": "object",
|
||||
"description": "Short names for models. Keys must be alphanumeric, 1-8 characters. All sets and evict_costs must use these IDs.",
|
||||
"minProperties": 1,
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"propertyNames": {
|
||||
"pattern": "^[a-zA-Z0-9]{1,8}$"
|
||||
}
|
||||
},
|
||||
"evict_costs": {
|
||||
"type": "object",
|
||||
"description": "Relative cost of evicting a running model. Models not listed default to 1. Values must be positive integers.",
|
||||
"additionalProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 1
|
||||
}
|
||||
},
|
||||
"sets": {
|
||||
"type": "object",
|
||||
"description": "Named sets of concurrent model combinations. Values are DSL strings using & (AND), | (OR), () (grouping), and +ref (inline another set). Definition order is used for tie-breaking.",
|
||||
"minProperties": 1,
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
"$ref": "#/definitions/matrixConfig"
|
||||
},
|
||||
"hooks": {
|
||||
"type": "object",
|
||||
@@ -512,28 +571,123 @@
|
||||
},
|
||||
"default": {},
|
||||
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||
},
|
||||
"upstream": {
|
||||
"type": "object",
|
||||
"description": "Controls behaviour of the /upstream passthrough endpoint. Recommended to only use in special use cases; leaving it as the default will typically be the best experience.",
|
||||
"properties": {
|
||||
"ignorePaths": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": [
|
||||
".*\\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$"
|
||||
],
|
||||
"description": "List of RE2 compatible regular expressions. Any request to a path matching any of the regular expressions will be ignored and not trigger a swap. When not specified, defaults to a pattern matching common static-asset suffixes (.js, .json, .css, .png, .gif, .jpg, .jpeg, .ico, .txt)."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"default": {}
|
||||
},
|
||||
"routing": {
|
||||
"type": "object",
|
||||
"description": "Canonical routing/scheduling configuration. Alternative to the legacy top-level 'groups'/'matrix' keys; a config must not use both styles.",
|
||||
"properties": {
|
||||
"scheduler": {
|
||||
"type": "object",
|
||||
"description": "Scheduler configuration. Decides the order in which queued requests are serviced.",
|
||||
"properties": {
|
||||
"use": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"fifo"
|
||||
],
|
||||
"default": "fifo",
|
||||
"description": "Scheduler to use. Only 'fifo' is currently supported."
|
||||
},
|
||||
"settings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"fifo": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"priority": {
|
||||
"type": "object",
|
||||
"description": "Per-model priority. Keys are model IDs, values are integers (default 0). Higher values are serviced first.",
|
||||
"additionalProperties": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"router": {
|
||||
"type": "object",
|
||||
"description": "Router configuration. Selects between the group and matrix swapping strategies.",
|
||||
"properties": {
|
||||
"use": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"group",
|
||||
"matrix"
|
||||
],
|
||||
"default": "group",
|
||||
"description": "Router to use. 'group' uses static groups, 'matrix' uses the solver-based swap matrix."
|
||||
},
|
||||
"settings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"groups": {
|
||||
"$ref": "#/definitions/groupsConfig"
|
||||
},
|
||||
"matrix": {
|
||||
"$ref": "#/definitions/matrixConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"required": ["groups"]
|
||||
"required": [
|
||||
"groups"
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"not": {
|
||||
"required": ["matrix"]
|
||||
"required": [
|
||||
"matrix"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"required": ["matrix"]
|
||||
"required": [
|
||||
"matrix"
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"not": {
|
||||
"required": ["groups"]
|
||||
"required": [
|
||||
"groups"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+213
-87
@@ -134,6 +134,18 @@ apiKeys:
|
||||
- "${env.API_KEY_1}"
|
||||
- "${env.API_KEY_2}"
|
||||
|
||||
# upstream: controls behaviour of the /upstream passthrough endpoint
|
||||
# - optional, default: empty dictionary
|
||||
# - recommended to only use in special use cases. Leaving it as the
|
||||
# default will typically be the best experience
|
||||
upstream:
|
||||
# ignorePaths: list of RE2 compatible regular expressions
|
||||
# - default: (see below)
|
||||
# - any request to a path matching any of the regular expressions
|
||||
# will be ignored and not trigger a swap
|
||||
ignorePaths:
|
||||
- '.*\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$'
|
||||
|
||||
# models: a dictionary of model configurations
|
||||
# - required
|
||||
# - each key is the model's ID, used in API requests
|
||||
@@ -312,6 +324,37 @@ models:
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
|
||||
# capabilities: defines what the model accepts for input, output and other metadata
|
||||
# - optional; omitted or all-zero means no capabilities
|
||||
# - used in v1/models to inform clients what the model can do
|
||||
capabilities:
|
||||
# in: list of modalities understood by the model
|
||||
# - default: []
|
||||
# - valid: text, audio, image
|
||||
in:
|
||||
- text
|
||||
- audio
|
||||
- image
|
||||
# out: list of modalities generated by the model
|
||||
# - default: []
|
||||
# - valid: text, audio, image
|
||||
out:
|
||||
- text
|
||||
- audio
|
||||
- image
|
||||
# tools: the model supports function calling
|
||||
# - default: false
|
||||
tools: true
|
||||
|
||||
# reranker: the model supports the /v1/rerank endpoint
|
||||
# - default: false
|
||||
reranker: false
|
||||
|
||||
# context: the maximum token context length supported
|
||||
# - default: 0
|
||||
# - must be an integer > 0
|
||||
context: 32000
|
||||
|
||||
# Unlisted model example:
|
||||
"qwen-unlisted":
|
||||
# unlisted: boolean, true or false
|
||||
@@ -343,93 +386,6 @@ models:
|
||||
# - processes have 5 seconds to shutdown until forceful termination is attempted
|
||||
cmdStop: docker stop ${MODEL_ID}
|
||||
|
||||
# =============================================================================
|
||||
# matrix: run concurrent models with a solver-based swap DSL
|
||||
# =============================================================================
|
||||
#
|
||||
# Matrix or Groups?
|
||||
#
|
||||
# Groups are available and fully supported. The syntax may be easier to use
|
||||
# for simple use cases.
|
||||
#
|
||||
# Documentation can be found here:
|
||||
# https://github.com/mostlygeek/llama-swap/blob/40e39f7/config.example.yaml#L334-L396
|
||||
#
|
||||
# A config can only use a matrix (recommended) or groups. A configuration error
|
||||
# will occur if both are defined. Groups is legacy but is fully supported with
|
||||
# no plans to deprecate it.
|
||||
#
|
||||
# ~~~~~
|
||||
#
|
||||
# The matrix declares valid combinations of models that can run concurrently.
|
||||
# When a model is requested, the solver finds the cheapest way to make it
|
||||
# available by evicting as few (and least costly) running models as possible.
|
||||
#
|
||||
# Solver behavior:
|
||||
# 1. Request arrives for model X
|
||||
# 2. If X is already running, forward immediately. Done.
|
||||
# 3. Find all sets containing X
|
||||
# 4. For each candidate set, compute cost: sum of evict_costs for
|
||||
# every running model NOT in that set
|
||||
# 5. Pick lowest cost candidate. Ties broken by definition order.
|
||||
# 6. Evict what needs to stop. Start X. Forward request.
|
||||
#
|
||||
# Subset semantics: a set [a, b, c] means any subset is valid.
|
||||
# Only the requested model is started — others are not preloaded.
|
||||
#
|
||||
# A model not appearing in any set can only run alone.
|
||||
#
|
||||
matrix:
|
||||
# vars: short names for models (alphanumeric, 1-8 chars)
|
||||
# - required for sets and evict_costs settings
|
||||
# - each entry is a short name to a real model ID. Do not use an alias
|
||||
# - used to keep set DSL logic short and easier to read
|
||||
# - sets and evict_costs only use identifiers defined in vars
|
||||
vars:
|
||||
g: gemma-model
|
||||
q: qwen-model
|
||||
m: mistral-model
|
||||
v: voxtral-model
|
||||
e: reranker-model
|
||||
L: llama-70B
|
||||
sd: stable-diffusion
|
||||
|
||||
# evict_costs: relative cost of losing a running model (default: 1)
|
||||
evict_costs:
|
||||
v: 50 # vllm backend, slow cold start
|
||||
L: 30 # 70B weights, slow to load
|
||||
|
||||
# sets: named sets of concurrent model combinations
|
||||
# Values are DSL strings with operators:
|
||||
# & AND (models run together)
|
||||
# | OR (alternatives)
|
||||
# () grouping
|
||||
# +ref inline another set's expression
|
||||
#
|
||||
# Expansion examples:
|
||||
# "L" → [L]
|
||||
# "a & b" → [a, b]
|
||||
# "a | b" → [a], [b]
|
||||
# "(a | b) & c" → [a, c], [b, c]
|
||||
# "(a | b) & (c | d)" → [a,c], [a,d], [b,c], [b,d]
|
||||
# "+llms & v" → expands llms inline, then applies & v
|
||||
sets:
|
||||
# LLM + TTS: switching between g/q/m won't evict v
|
||||
# expands to: [g,v], [q,v], [m,v]
|
||||
standard: "(g | q | m) & v"
|
||||
|
||||
# LLM + TTS + reranker
|
||||
# expands to: [g,v,e], [q,v,e]
|
||||
with_rerank: "(g | q) & v & e"
|
||||
|
||||
# LLM + image generation, no TTS
|
||||
# expands to: [g,sd], [q,sd]
|
||||
creative: "(g | q) & sd"
|
||||
|
||||
# 70B model uses all GPUs, can only run alone
|
||||
# expands to: [L]
|
||||
full: "L"
|
||||
|
||||
# hooks: a dictionary of event triggers and actions
|
||||
# - optional, default: empty dictionary
|
||||
# - the only supported hook is on_startup
|
||||
@@ -446,6 +402,176 @@ hooks:
|
||||
preload:
|
||||
- "llama"
|
||||
|
||||
# routing:
|
||||
# Controls how llama-swap decides which models can run at the same time and
|
||||
# which get swapped out. Choose one of two swap engines:
|
||||
#
|
||||
# - group: the default engine. Simpler to configure. You define groups of
|
||||
# models that run together, and loading one group typically unloads
|
||||
# the others.
|
||||
#
|
||||
# - matrix: the newer engine. More involved to configure, but far more
|
||||
# flexible. It uses a small expression language to describe which
|
||||
# model combinations are allowed to run concurrently, enabling
|
||||
# setups that groups cannot express.
|
||||
#
|
||||
# The routing section is optional.
|
||||
routing:
|
||||
router:
|
||||
# use: a string defining which engine to use
|
||||
# - optional, default: "group"
|
||||
# - valid values: group, matrix
|
||||
use: group
|
||||
|
||||
# settings: a dictionary of settings for the specific engines
|
||||
settings:
|
||||
# groups: a dictionary of named groups
|
||||
# - optional, default: empty dictionary
|
||||
# - lets you keep some models loaded while others swap out
|
||||
# - every member must be a model ID defined in the models section
|
||||
# - a model can belong to only one group
|
||||
# - behaviour is set per group with the `swap`, `exclusive` and
|
||||
# `persistent` fields
|
||||
# - see issue #109 for details
|
||||
#
|
||||
# NOTE: the model names below are illustrative and are not defined above.
|
||||
groups:
|
||||
# group1 reproduces llama-swap's default behaviour: only one model
|
||||
# runs at a time across the entire instance.
|
||||
"group1":
|
||||
# swap: how members of this group swap among themselves
|
||||
# - optional, default: true
|
||||
# - true: only one member runs at a time
|
||||
# - false: all members can run together, no swapping
|
||||
swap: true
|
||||
|
||||
# exclusive: how this group affects other groups
|
||||
# - optional, default: true
|
||||
# - true: running a member unloads every other group
|
||||
# - false: running a member leaves other groups untouched
|
||||
exclusive: true
|
||||
|
||||
# members: the model IDs in this group
|
||||
# required
|
||||
members:
|
||||
- "llama"
|
||||
- "qwen-unlisted"
|
||||
|
||||
# group2: members all run together, but loading any other group
|
||||
# unloads them.
|
||||
"group2":
|
||||
# swap: false lets all members stay loaded at once
|
||||
swap: false
|
||||
|
||||
# exclusive: false means requesting a member loads it without
|
||||
# unloading any other group
|
||||
exclusive: false
|
||||
members:
|
||||
- "docker-llama"
|
||||
- "modelA"
|
||||
- "modelB"
|
||||
|
||||
# forever: a persistent group that other groups can never unload.
|
||||
"forever":
|
||||
# persistent: other groups cannot unload this group's members
|
||||
# - optional, default: false
|
||||
# - has no effect on swapping within the group
|
||||
persistent: true
|
||||
|
||||
# swap/exclusive: false keeps all members loaded and avoids
|
||||
# unloading other groups
|
||||
swap: false
|
||||
exclusive: false
|
||||
members:
|
||||
- "forever-modelA"
|
||||
- "forever-modelB"
|
||||
- "forever-modelc"
|
||||
|
||||
# The matrix lists the model combinations that are allowed to run
|
||||
# concurrently. When a model is requested, the solver makes room for it
|
||||
# by evicting as few running models as possible, preferring to keep the
|
||||
# costliest ones loaded.
|
||||
#
|
||||
# Solver behaviour:
|
||||
# 1. A request arrives for model X.
|
||||
# 2. If X is already running, forward the request. Done.
|
||||
# 3. Collect every set that contains X.
|
||||
# 4. For each set, add up the evict_costs of the running models that
|
||||
# are NOT in that set — that is the set's cost.
|
||||
# 5. Choose the lowest-cost set. Break ties by definition order.
|
||||
# 6. Evict the models outside that set, start X, forward the request.
|
||||
#
|
||||
# Subset semantics: a set [a, b, c] also permits any subset of itself.
|
||||
# Only the requested model is started; the others are not preloaded.
|
||||
#
|
||||
# A model that appears in no set can only run on its own.
|
||||
#
|
||||
matrix:
|
||||
# vars: short aliases for model IDs (alphanumeric, 1-8 chars)
|
||||
# - required: sets and evict_costs reference these names, not model IDs
|
||||
# - map each short name to a real model ID (not a model alias)
|
||||
# - keeps the set expressions short and readable
|
||||
vars:
|
||||
g: gemma-model
|
||||
q: qwen-model
|
||||
m: mistral-model
|
||||
v: voxtral-model
|
||||
e: reranker-model
|
||||
L: llama-70B
|
||||
sd: stable-diffusion
|
||||
|
||||
# evict_costs: relative cost of losing a running model (default: 1)
|
||||
evict_costs:
|
||||
v: 50 # vllm backend, slow cold start
|
||||
L: 30 # 70B weights, slow to load
|
||||
|
||||
# sets: named combinations of models that may run together.
|
||||
# Each value is an expression built from these operators:
|
||||
# & AND (models run together)
|
||||
# | OR (alternatives)
|
||||
# () grouping
|
||||
# +ref inline the expression of another set
|
||||
#
|
||||
# Each expression expands into one or more concrete sets:
|
||||
# "L" → [L]
|
||||
# "a & b" → [a, b]
|
||||
# "a | b" → [a], [b]
|
||||
# "(a | b) & c" → [a, c], [b, c]
|
||||
# "(a | b) & (c | d)" → [a,c], [a,d], [b,c], [b,d]
|
||||
# "+llms & v" → inline the llms set, then AND with v
|
||||
sets:
|
||||
# An LLM plus TTS. Switching between g/q/m keeps v loaded.
|
||||
# expands to: [g,v], [q,v], [m,v]
|
||||
standard: "(g | q | m) & v"
|
||||
|
||||
# An LLM plus TTS plus reranker.
|
||||
# expands to: [g,v,e], [q,v,e]
|
||||
with_rerank: "(g | q) & v & e"
|
||||
|
||||
# An LLM plus image generation, no TTS.
|
||||
# expands to: [g,sd], [q,sd]
|
||||
creative: "(g | q) & sd"
|
||||
|
||||
# The 70B model uses every GPU, so it can only run alone.
|
||||
# expands to: [L]
|
||||
full: "L"
|
||||
|
||||
# scheduler: how queued requests are ordered.
|
||||
# The default and only valid scheduler is "fifo"
|
||||
scheduler:
|
||||
use: fifo
|
||||
settings:
|
||||
fifo:
|
||||
# priority: a dictionary of model ID -> priority
|
||||
# - optional, default: empty dictionary
|
||||
# - models default to priority 0
|
||||
# - higher priority requests are serviced first in the queue
|
||||
priority:
|
||||
A: 10
|
||||
B: 5
|
||||
C: 5
|
||||
D: 1
|
||||
|
||||
# peers: a dictionary of remote peers and models they provide
|
||||
# - optional, default empty dictionary
|
||||
# - peers can be another llama-swap
|
||||
|
||||
@@ -9,6 +9,7 @@ require (
|
||||
github.com/charmbracelet/lipgloss v1.1.0
|
||||
github.com/fxamacker/cbor/v2 v2.9.1
|
||||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/google/jsonschema-go v0.4.3
|
||||
github.com/klauspost/compress v1.18.5
|
||||
github.com/shirou/gopsutil/v4 v4.26.4
|
||||
github.com/stretchr/testify v1.11.1
|
||||
|
||||
@@ -61,6 +61,8 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0=
|
||||
github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/billziss-gh/golib/shlex"
|
||||
)
|
||||
|
||||
func SanitizeCommand(cmdStr string) ([]string, error) {
|
||||
var cleanedLines []string
|
||||
for _, line := range strings.Split(cmdStr, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
// Skip comment lines
|
||||
if strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
// Handle trailing backslashes by replacing with space
|
||||
if strings.HasSuffix(trimmed, "\\") {
|
||||
cleanedLines = append(cleanedLines, strings.TrimSuffix(trimmed, "\\")+" ")
|
||||
} else {
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
// put it back together
|
||||
cmdStr = strings.Join(cleanedLines, "\n")
|
||||
|
||||
// Split the command into arguments
|
||||
var args []string
|
||||
if runtime.GOOS == "windows" {
|
||||
args = shlex.Windows.Split(cmdStr)
|
||||
} else {
|
||||
args = shlex.Posix.Split(cmdStr)
|
||||
}
|
||||
|
||||
// Ensure the command is not empty
|
||||
if len(args) == 0 {
|
||||
return nil, fmt.Errorf("empty command")
|
||||
}
|
||||
|
||||
return args, nil
|
||||
}
|
||||
|
||||
func StripComments(cmdStr string) string {
|
||||
var cleanedLines []string
|
||||
for _, line := range strings.Split(cmdStr, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
// Skip comment lines
|
||||
if strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
return strings.Join(cleanedLines, "\n")
|
||||
}
|
||||
+40
-611
@@ -2,16 +2,9 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/billziss-gh/golib/shlex"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -85,12 +78,6 @@ type GroupConfig struct {
|
||||
Members []string `yaml:"members"`
|
||||
}
|
||||
|
||||
var (
|
||||
macroNameRegex = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
||||
macroPatternRegex = regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
||||
envMacroRegex = regexp.MustCompile(`\$\{env\.([a-zA-Z_][a-zA-Z0-9_]*)\}`)
|
||||
)
|
||||
|
||||
// set default values for GroupConfig
|
||||
func (c *GroupConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type rawGroupConfig GroupConfig
|
||||
@@ -129,13 +116,16 @@ type Config struct {
|
||||
GlobalTTL int `yaml:"globalTTL"`
|
||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||
Profiles map[string][]string `yaml:"profiles"`
|
||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||
|
||||
// swap matrix: solver-based alternative to groups
|
||||
Matrix *MatrixConfig `yaml:"matrix"`
|
||||
// routing is the canonical source for swap/scheduling configuration.
|
||||
// New code must read Routing, never the backwards-compat fields below.
|
||||
Routing RoutingConfig `yaml:"routing"`
|
||||
|
||||
// populated during validation when matrix is configured
|
||||
ExpandedSets []ExpandedSet `yaml:"-"`
|
||||
// Groups and Matrix are permanent backwards-compat input fields for the
|
||||
// legacy top-level `groups:`/`matrix:` keys. They are normalized into
|
||||
// Routing by LoadConfigFromReader. New code must not read them directly.
|
||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||
Matrix *MatrixConfig `yaml:"matrix"`
|
||||
|
||||
// for key/value replacements in model's cmd, cmdStop, proxy, checkEndPoint
|
||||
Macros MacroList `yaml:"macros"`
|
||||
@@ -160,6 +150,38 @@ type Config struct {
|
||||
|
||||
// support remote peers, see issue #433, #296
|
||||
Peers PeerDictionaryConfig `yaml:"peers"`
|
||||
|
||||
// upstream controls behaviour of the /upstream passthrough endpoint
|
||||
Upstream UpstreamConfig `yaml:"upstream"`
|
||||
}
|
||||
|
||||
// RoutingConfig is the canonical, normalized routing/scheduling configuration.
|
||||
type RoutingConfig struct {
|
||||
Scheduler SchedulerConfig `yaml:"scheduler"`
|
||||
Router RouterConfig `yaml:"router"`
|
||||
}
|
||||
|
||||
type SchedulerConfig struct {
|
||||
Use string `yaml:"use"` // default "fifo"
|
||||
Settings SchedulerSettings `yaml:"settings"`
|
||||
}
|
||||
|
||||
type SchedulerSettings struct {
|
||||
Fifo FifoConfig `yaml:"fifo"`
|
||||
}
|
||||
|
||||
type FifoConfig struct {
|
||||
Priority map[string]int `yaml:"priority"` // model ID -> priority, default 0
|
||||
}
|
||||
|
||||
type RouterConfig struct {
|
||||
Use string `yaml:"use"` // "group" (default) | "matrix"
|
||||
Settings RouterSettings `yaml:"settings"`
|
||||
}
|
||||
|
||||
type RouterSettings struct {
|
||||
Groups map[string]GroupConfig `yaml:"groups"`
|
||||
Matrix *MatrixConfig `yaml:"matrix"`
|
||||
}
|
||||
|
||||
func (c *Config) RealModelName(search string) (string, bool) {
|
||||
@@ -189,369 +211,6 @@ func LoadConfig(path string) (Config, error) {
|
||||
return LoadConfigFromReader(file)
|
||||
}
|
||||
|
||||
func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
yamlStr := string(data)
|
||||
|
||||
// Phase 1: Substitute all ${env.VAR} macros at string level
|
||||
// This is safe because env values are simple strings without YAML formatting
|
||||
yamlStr, err = substituteEnvMacros(yamlStr)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
// Unmarshal into full Config with defaults
|
||||
config := Config{
|
||||
HealthCheckTimeout: 120,
|
||||
StartPort: 5800,
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
LogToStdout: LogToStdoutProxy,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
GlobalTTL: 0,
|
||||
}
|
||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
if config.HealthCheckTimeout < 15 {
|
||||
config.HealthCheckTimeout = 15
|
||||
}
|
||||
|
||||
// Apply defaults for performance config when section is missing
|
||||
if config.Performance.Every == 0 {
|
||||
config.Performance.Every = 5 * time.Second
|
||||
}
|
||||
if err = config.Performance.Validate(); err != nil {
|
||||
return Config{}, fmt.Errorf("performance: %w", err)
|
||||
}
|
||||
|
||||
if config.StartPort < 1 {
|
||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||
}
|
||||
|
||||
if config.GlobalTTL < 0 {
|
||||
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||
}
|
||||
|
||||
switch config.LogToStdout {
|
||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||
default:
|
||||
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
|
||||
}
|
||||
|
||||
// Populate the aliases map
|
||||
config.aliases = make(map[string]string)
|
||||
for modelName, modelConfig := range config.Models {
|
||||
for _, alias := range modelConfig.Aliases {
|
||||
if _, found := config.aliases[alias]; found {
|
||||
return Config{}, fmt.Errorf("duplicate alias %s found in model: %s", alias, modelName)
|
||||
}
|
||||
config.aliases[alias] = modelName
|
||||
}
|
||||
}
|
||||
|
||||
// Validate global macros
|
||||
for _, macro := range config.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Get and sort all model IDs for consistent port assignment
|
||||
modelIds := make([]string, 0, len(config.Models))
|
||||
for modelId := range config.Models {
|
||||
modelIds = append(modelIds, modelId)
|
||||
}
|
||||
sort.Strings(modelIds)
|
||||
|
||||
nextPort := config.StartPort
|
||||
for _, modelId := range modelIds {
|
||||
modelConfig := config.Models[modelId]
|
||||
modelConfig.HealthCheckTimeout = config.HealthCheckTimeout
|
||||
|
||||
// Strip comments from command fields
|
||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||
|
||||
// set model TTL to globalTTL it is the default value
|
||||
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||
modelConfig.UnloadAfter = config.GlobalTTL
|
||||
}
|
||||
|
||||
if modelConfig.UnloadAfter < 0 {
|
||||
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||
}
|
||||
|
||||
// Validate model macros
|
||||
for _, macro := range modelConfig.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: %s", modelId, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Build merged macro list: MODEL_ID + global macros + model macros (model overrides global)
|
||||
mergedMacros := make(MacroList, 0, len(config.Macros)+len(modelConfig.Macros)+1)
|
||||
mergedMacros = append(mergedMacros, MacroEntry{Name: "MODEL_ID", Value: modelId})
|
||||
mergedMacros = append(mergedMacros, config.Macros...)
|
||||
|
||||
// Add model macros (override globals with same name)
|
||||
for _, entry := range modelConfig.Macros {
|
||||
found := false
|
||||
for i, existing := range mergedMacros {
|
||||
if existing.Name == entry.Name {
|
||||
mergedMacros[i] = entry
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
mergedMacros = append(mergedMacros, entry)
|
||||
}
|
||||
}
|
||||
|
||||
// Substitute remaining macros in model fields (LIFO order)
|
||||
for i := len(mergedMacros) - 1; i >= 0; i-- {
|
||||
entry := mergedMacros[i]
|
||||
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
// Substitute macros in SetParamsByID keys and values
|
||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
||||
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
||||
}
|
||||
newParamMap, ok := newValAny.(map[string]any)
|
||||
if !ok {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
||||
}
|
||||
newSetParamsByID[newKey] = newParamMap
|
||||
}
|
||||
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
||||
}
|
||||
|
||||
// Substitute in metadata (type-preserving)
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||
}
|
||||
modelConfig.Metadata = result.(map[string]any)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle PORT macro - only allocate if cmd uses it
|
||||
cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}")
|
||||
proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}")
|
||||
if cmdHasPort || proxyHasPort {
|
||||
if !cmdHasPort && proxyHasPort {
|
||||
return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId)
|
||||
}
|
||||
|
||||
macroSlug := "${PORT}"
|
||||
macroStr := fmt.Sprintf("%v", nextPort)
|
||||
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||
}
|
||||
modelConfig.Metadata = result.(map[string]any)
|
||||
}
|
||||
|
||||
nextPort++
|
||||
}
|
||||
|
||||
// Validate no unknown macros remain
|
||||
fieldMap := map[string]string{
|
||||
"cmd": modelConfig.Cmd,
|
||||
"cmdStop": modelConfig.CmdStop,
|
||||
"proxy": modelConfig.Proxy,
|
||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||
"name": modelConfig.Name,
|
||||
"description": modelConfig.Description,
|
||||
}
|
||||
|
||||
for fieldName, fieldValue := range fieldMap {
|
||||
matches := macroPatternRegex.FindAllStringSubmatch(fieldValue, -1)
|
||||
for _, match := range matches {
|
||||
macroName := match[1]
|
||||
if macroName == "PID" && fieldName == "cmdStop" {
|
||||
continue // replaced at runtime
|
||||
}
|
||||
if macroName == "PORT" || macroName == "MODEL_ID" {
|
||||
return Config{}, fmt.Errorf("macro '${%s}' should have been substituted in %s.%s", macroName, modelId, fieldName)
|
||||
}
|
||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in %s.%s", macroName, modelId, fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
if err := validateNestedForUnknownMacros(modelConfig.Metadata, fmt.Sprintf("model %s metadata", modelId)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Validate SetParamsByID keys and values
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
||||
}
|
||||
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
||||
for key := range modelConfig.Filters.SetParamsByID {
|
||||
if key == modelId {
|
||||
continue
|
||||
}
|
||||
if _, exists := config.Models[key]; exists {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
||||
}
|
||||
if existingModel, exists := config.aliases[key]; exists {
|
||||
if existingModel != modelId {
|
||||
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
||||
}
|
||||
continue // already registered as explicit alias for this model
|
||||
}
|
||||
config.aliases[key] = modelId
|
||||
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
||||
}
|
||||
|
||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||
}
|
||||
|
||||
if modelConfig.SendLoadingState == nil {
|
||||
v := config.SendLoadingState
|
||||
modelConfig.SendLoadingState = &v
|
||||
}
|
||||
|
||||
config.Models[modelId] = modelConfig
|
||||
}
|
||||
|
||||
// groups XOR matrix
|
||||
if config.Matrix != nil && len(config.Groups) > 0 {
|
||||
return Config{}, fmt.Errorf("config cannot use both 'groups' and 'matrix'")
|
||||
}
|
||||
|
||||
if config.Matrix != nil {
|
||||
expandedSets, err := ValidateMatrix(*config.Matrix, config.Models)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("matrix: %w", err)
|
||||
}
|
||||
config.ExpandedSets = expandedSets
|
||||
} else {
|
||||
config = AddDefaultGroupToConfig(config)
|
||||
|
||||
// Validate group members
|
||||
memberUsage := make(map[string]string)
|
||||
for groupID, groupConfig := range config.Groups {
|
||||
prevSet := make(map[string]bool)
|
||||
for _, member := range groupConfig.Members {
|
||||
if _, found := prevSet[member]; found {
|
||||
return Config{}, fmt.Errorf("duplicate model member %s found in group: %s", member, groupID)
|
||||
}
|
||||
prevSet[member] = true
|
||||
|
||||
if existingGroup, exists := memberUsage[member]; exists {
|
||||
return Config{}, fmt.Errorf("model member %s is used in multiple groups: %s and %s", member, existingGroup, groupID)
|
||||
}
|
||||
memberUsage[member] = groupID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up hooks preload
|
||||
if len(config.Hooks.OnStartup.Preload) > 0 {
|
||||
var toPreload []string
|
||||
for _, modelID := range config.Hooks.OnStartup.Preload {
|
||||
modelID = strings.TrimSpace(modelID)
|
||||
if modelID == "" {
|
||||
continue
|
||||
}
|
||||
if real, found := config.RealModelName(modelID); found {
|
||||
toPreload = append(toPreload, real)
|
||||
}
|
||||
}
|
||||
config.Hooks.OnStartup.Preload = toPreload
|
||||
}
|
||||
|
||||
// Validate API keys (env macros already substituted at string level)
|
||||
for i, apikey := range config.RequiredAPIKeys {
|
||||
if apikey == "" {
|
||||
return Config{}, fmt.Errorf("empty api key found in apiKeys")
|
||||
}
|
||||
if strings.Contains(apikey, " ") {
|
||||
return Config{}, fmt.Errorf("api key cannot contain spaces: `%s`", apikey)
|
||||
}
|
||||
config.RequiredAPIKeys[i] = apikey
|
||||
}
|
||||
|
||||
// Process peers with global macro substitution
|
||||
for peerName, peerConfig := range config.Peers {
|
||||
// Substitute global macros (LIFO order)
|
||||
for i := len(config.Macros) - 1; i >= 0; i-- {
|
||||
entry := config.Macros[i]
|
||||
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||
|
||||
peerConfig.ApiKey = strings.ReplaceAll(peerConfig.ApiKey, macroSlug, macroStr)
|
||||
peerConfig.Filters.StripParams = strings.ReplaceAll(peerConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
|
||||
// Substitute in setParams (type-preserving)
|
||||
if len(peerConfig.Filters.SetParams) > 0 {
|
||||
result, err := substituteMacroInValue(peerConfig.Filters.SetParams, entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("peers.%s.filters.setParams: %w", peerName, err)
|
||||
}
|
||||
peerConfig.Filters.SetParams = result.(map[string]any)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate no unknown macros remain
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.ApiKey, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("peers.%s.apiKey: unknown macro '${%s}'", peerName, matches[0][1])
|
||||
}
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.Filters.StripParams, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("peers.%s.filters.stripParams: unknown macro '${%s}'", peerName, matches[0][1])
|
||||
}
|
||||
if len(peerConfig.Filters.SetParams) > 0 {
|
||||
if err := validateNestedForUnknownMacros(peerConfig.Filters.SetParams, fmt.Sprintf("peers.%s.filters.setParams", peerName)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
config.Peers[peerName] = peerConfig
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
// rewrites the yaml to include a default group with any orphaned models
|
||||
func AddDefaultGroupToConfig(config Config) Config {
|
||||
|
||||
@@ -596,233 +255,3 @@ func AddDefaultGroupToConfig(config Config) Config {
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
func SanitizeCommand(cmdStr string) ([]string, error) {
|
||||
var cleanedLines []string
|
||||
for _, line := range strings.Split(cmdStr, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
// Skip comment lines
|
||||
if strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
// Handle trailing backslashes by replacing with space
|
||||
if strings.HasSuffix(trimmed, "\\") {
|
||||
cleanedLines = append(cleanedLines, strings.TrimSuffix(trimmed, "\\")+" ")
|
||||
} else {
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
// put it back together
|
||||
cmdStr = strings.Join(cleanedLines, "\n")
|
||||
|
||||
// Split the command into arguments
|
||||
var args []string
|
||||
if runtime.GOOS == "windows" {
|
||||
args = shlex.Windows.Split(cmdStr)
|
||||
} else {
|
||||
args = shlex.Posix.Split(cmdStr)
|
||||
}
|
||||
|
||||
// Ensure the command is not empty
|
||||
if len(args) == 0 {
|
||||
return nil, fmt.Errorf("empty command")
|
||||
}
|
||||
|
||||
return args, nil
|
||||
}
|
||||
|
||||
func StripComments(cmdStr string) string {
|
||||
var cleanedLines []string
|
||||
for _, line := range strings.Split(cmdStr, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
// Skip comment lines
|
||||
if strings.HasPrefix(trimmed, "#") {
|
||||
continue
|
||||
}
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
return strings.Join(cleanedLines, "\n")
|
||||
}
|
||||
|
||||
// validateMacro validates macro name and value constraints
|
||||
func validateMacro(name string, value any) error {
|
||||
if len(name) >= 64 {
|
||||
return fmt.Errorf("macro name '%s' exceeds maximum length of 63 characters", name)
|
||||
}
|
||||
if !macroNameRegex.MatchString(name) {
|
||||
return fmt.Errorf("macro name '%s' contains invalid characters, must match pattern ^[a-zA-Z0-9_-]+$", name)
|
||||
}
|
||||
|
||||
// Validate that value is a scalar type
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
// Check for self-reference
|
||||
macroSlug := fmt.Sprintf("${%s}", name)
|
||||
if strings.Contains(v, macroSlug) {
|
||||
return fmt.Errorf("macro '%s' contains self-reference", name)
|
||||
}
|
||||
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, bool:
|
||||
// These types are allowed
|
||||
default:
|
||||
return fmt.Errorf("macro '%s' has invalid type %T, must be a scalar type (string, int, float, or bool)", name, value)
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "PORT", "MODEL_ID":
|
||||
return fmt.Errorf("macro name '%s' is reserved", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateNestedForUnknownMacros recursively checks for any remaining macro references in nested structures
|
||||
func validateNestedForUnknownMacros(value any, context string) error {
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
matches := macroPatternRegex.FindAllStringSubmatch(v, -1)
|
||||
for _, match := range matches {
|
||||
macroName := match[1]
|
||||
return fmt.Errorf("%s: unknown macro '${%s}'", context, macroName)
|
||||
}
|
||||
// Check for unsubstituted env macros
|
||||
envMatches := envMacroRegex.FindAllStringSubmatch(v, -1)
|
||||
for _, match := range envMatches {
|
||||
varName := match[1]
|
||||
return fmt.Errorf("%s: environment variable '%s' not set", context, varName)
|
||||
}
|
||||
return nil
|
||||
|
||||
case map[string]any:
|
||||
for _, val := range v {
|
||||
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
case []any:
|
||||
for _, val := range v {
|
||||
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
default:
|
||||
// Scalar types don't contain macros
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// substituteMacroInValue recursively substitutes a single macro in a value structure
|
||||
// This is called once per macro, allowing LIFO substitution order
|
||||
func substituteMacroInValue(value any, macroName string, macroValue any) (any, error) {
|
||||
macroSlug := fmt.Sprintf("${%s}", macroName)
|
||||
macroStr := fmt.Sprintf("%v", macroValue)
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
// Check if this is a direct macro substitution
|
||||
if v == macroSlug {
|
||||
return macroValue, nil
|
||||
}
|
||||
// Handle string interpolation
|
||||
if strings.Contains(v, macroSlug) {
|
||||
return strings.ReplaceAll(v, macroSlug, macroStr), nil
|
||||
}
|
||||
return v, nil
|
||||
|
||||
case map[string]any:
|
||||
// Recursively process map values
|
||||
newMap := make(map[string]any)
|
||||
for key, val := range v {
|
||||
newVal, err := substituteMacroInValue(val, macroName, macroValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newMap[key] = newVal
|
||||
}
|
||||
return newMap, nil
|
||||
|
||||
case []any:
|
||||
// Recursively process slice elements
|
||||
newSlice := make([]any, len(v))
|
||||
for i, val := range v {
|
||||
newVal, err := substituteMacroInValue(val, macroName, macroValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newSlice[i] = newVal
|
||||
}
|
||||
return newSlice, nil
|
||||
|
||||
default:
|
||||
// Return scalar types as-is
|
||||
return value, nil
|
||||
}
|
||||
}
|
||||
|
||||
// substituteEnvMacros replaces ${env.VAR_NAME} with environment variable values.
|
||||
// Returns error if any referenced env var is not set or contains invalid characters.
|
||||
// Env macros inside YAML comments are ignored by unmarshalling the YAML first
|
||||
// (which strips comments) and only checking the comment-free version for macros.
|
||||
func substituteEnvMacros(s string) (string, error) {
|
||||
// Unmarshal and remarshal to strip YAML comments
|
||||
var raw any
|
||||
if err := yaml.Unmarshal([]byte(s), &raw); err != nil {
|
||||
// If YAML is invalid, fall back to scanning the original string
|
||||
// so the user gets the env var error rather than a confusing YAML parse error
|
||||
return substituteEnvMacrosInString(s, s)
|
||||
}
|
||||
clean, err := yaml.Marshal(raw)
|
||||
if err != nil {
|
||||
return substituteEnvMacrosInString(s, s)
|
||||
}
|
||||
|
||||
return substituteEnvMacrosInString(s, string(clean))
|
||||
}
|
||||
|
||||
// substituteEnvMacrosInString finds ${env.VAR} macros in scanStr and substitutes
|
||||
// them in target. This separation allows scanning comment-free YAML while
|
||||
// substituting in the original string.
|
||||
func substituteEnvMacrosInString(target, scanStr string) (string, error) {
|
||||
result := target
|
||||
matches := envMacroRegex.FindAllStringSubmatch(scanStr, -1)
|
||||
for _, match := range matches {
|
||||
fullMatch := match[0] // ${env.VAR_NAME}
|
||||
varName := match[1] // VAR_NAME
|
||||
|
||||
value, exists := os.LookupEnv(varName)
|
||||
if !exists {
|
||||
return "", fmt.Errorf("environment variable '%s' is not set", varName)
|
||||
}
|
||||
|
||||
// Sanitize the value for safe YAML substitution
|
||||
value, err := sanitizeEnvValueForYAML(value, varName)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
result = strings.ReplaceAll(result, fullMatch, value)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// sanitizeEnvValueForYAML ensures an environment variable value is safe for YAML substitution.
|
||||
// It rejects values with characters that break YAML structure and escapes quotes/backslashes
|
||||
// for compatibility with double-quoted YAML strings.
|
||||
func sanitizeEnvValueForYAML(value, varName string) (string, error) {
|
||||
// Reject values that would break YAML structure regardless of quoting context
|
||||
if strings.ContainsAny(value, "\n\r\x00") {
|
||||
return "", fmt.Errorf("environment variable '%s' contains newlines or null bytes which are not allowed in YAML substitution", varName)
|
||||
}
|
||||
|
||||
// Escape backslashes and double quotes for safe use in double-quoted YAML strings.
|
||||
// In unquoted contexts, these escapes appear literally (harmless for most use cases).
|
||||
// In double-quoted contexts, they are interpreted correctly.
|
||||
value = strings.ReplaceAll(value, `\`, `\\`)
|
||||
value = strings.ReplaceAll(value, `"`, `\"`)
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
@@ -173,6 +173,25 @@ groups:
|
||||
IdleConn: 90,
|
||||
}
|
||||
|
||||
expectedGroups := map[string]GroupConfig{
|
||||
DEFAULT_GROUP_ID: {
|
||||
Swap: true,
|
||||
Exclusive: true,
|
||||
Members: []string{"model1", "model3"},
|
||||
},
|
||||
"group1": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Members: []string{"model2"},
|
||||
},
|
||||
"forever": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Persistent: true,
|
||||
Members: []string{"model4"},
|
||||
},
|
||||
}
|
||||
|
||||
expected := Config{
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
@@ -246,22 +265,19 @@ groups:
|
||||
"m2": "model2",
|
||||
"mthree": "model3",
|
||||
},
|
||||
Groups: map[string]GroupConfig{
|
||||
DEFAULT_GROUP_ID: {
|
||||
Swap: true,
|
||||
Exclusive: true,
|
||||
Members: []string{"model1", "model3"},
|
||||
Groups: expectedGroups,
|
||||
Upstream: UpstreamConfig{
|
||||
IgnorePaths: DefaultUpstreamIgnorePaths(),
|
||||
},
|
||||
Routing: RoutingConfig{
|
||||
Router: RouterConfig{
|
||||
Use: "group",
|
||||
Settings: RouterSettings{
|
||||
Groups: expectedGroups,
|
||||
},
|
||||
},
|
||||
"group1": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Members: []string{"model2"},
|
||||
},
|
||||
"forever": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Persistent: true,
|
||||
Members: []string{"model4"},
|
||||
Scheduler: SchedulerConfig{
|
||||
Use: "fifo",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/google/jsonschema-go/jsonschema"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// TestConfig_ExampleMatchesSchema validates that config.example.yaml conforms to
|
||||
// config-schema.json. Both files live at the repository root.
|
||||
func TestConfig_ExampleMatchesSchema(t *testing.T) {
|
||||
const (
|
||||
schemaPath = "../../config-schema.json"
|
||||
examplePath = "../../config.example.yaml"
|
||||
)
|
||||
|
||||
schemaBytes, err := os.ReadFile(schemaPath)
|
||||
if err != nil {
|
||||
t.Fatalf("reading %s: %v", schemaPath, err)
|
||||
}
|
||||
|
||||
var schema jsonschema.Schema
|
||||
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
|
||||
t.Fatalf("unmarshalling schema: %v", err)
|
||||
}
|
||||
|
||||
resolved, err := schema.Resolve(&jsonschema.ResolveOptions{
|
||||
BaseURI: "https://github.com/mostlygeek/llama-swap/",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("resolving schema: %v", err)
|
||||
}
|
||||
|
||||
exampleBytes, err := os.ReadFile(examplePath)
|
||||
if err != nil {
|
||||
t.Fatalf("reading %s: %v", examplePath, err)
|
||||
}
|
||||
|
||||
// Convert YAML to a JSON-like value so numbers and keys match what the
|
||||
// validator expects.
|
||||
var yamlValue any
|
||||
if err := yaml.Unmarshal(exampleBytes, &yamlValue); err != nil {
|
||||
t.Fatalf("unmarshalling example yaml: %v", err)
|
||||
}
|
||||
jsonBytes, err := json.Marshal(yamlValue)
|
||||
if err != nil {
|
||||
t.Fatalf("converting example to json: %v", err)
|
||||
}
|
||||
var instance any
|
||||
if err := json.Unmarshal(jsonBytes, &instance); err != nil {
|
||||
t.Fatalf("unmarshalling example json: %v", err)
|
||||
}
|
||||
|
||||
if err := resolved.Validate(instance); err != nil {
|
||||
t.Fatalf("config.example.yaml does not match config-schema.json:\n%v", err)
|
||||
}
|
||||
}
|
||||
@@ -777,22 +777,27 @@ func TestConfig_APIKeys_Invalid(t *testing.T) {
|
||||
{
|
||||
name: "blank spaces only",
|
||||
content: `apiKeys: [" "]`,
|
||||
expectedErr: "api key cannot contain spaces: ` `",
|
||||
expectedErr: "apiKeys[0]: api key cannot contain spaces",
|
||||
},
|
||||
{
|
||||
name: "contains leading space",
|
||||
content: `apiKeys: [" key123"]`,
|
||||
expectedErr: "api key cannot contain spaces: ` key123`",
|
||||
expectedErr: "apiKeys[0]: api key cannot contain spaces",
|
||||
},
|
||||
{
|
||||
name: "contains trailing space",
|
||||
content: `apiKeys: ["key123 "]`,
|
||||
expectedErr: "api key cannot contain spaces: `key123 `",
|
||||
expectedErr: "apiKeys[0]: api key cannot contain spaces",
|
||||
},
|
||||
{
|
||||
name: "contains middle space",
|
||||
content: `apiKeys: ["key 123"]`,
|
||||
expectedErr: "api key cannot contain spaces: `key 123`",
|
||||
expectedErr: "apiKeys[0]: api key cannot contain spaces",
|
||||
},
|
||||
{
|
||||
name: "space in second key reports correct index",
|
||||
content: `apiKeys: ["valid-key", "bad key"]`,
|
||||
expectedErr: "apiKeys[1]: api key cannot contain spaces",
|
||||
},
|
||||
{
|
||||
name: "empty in list with valid keys",
|
||||
@@ -1544,3 +1549,174 @@ peers:
|
||||
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
|
||||
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
|
||||
}
|
||||
|
||||
// twoModels is a minimal models block reused by the routing tests below.
|
||||
const twoModels = `
|
||||
models:
|
||||
gemma:
|
||||
cmd: echo gemma
|
||||
proxy: http://localhost:8080
|
||||
qwen:
|
||||
cmd: echo qwen
|
||||
proxy: http://localhost:8081
|
||||
`
|
||||
|
||||
func TestConfig_Routing_LegacyTopLevelGroups(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
groups:
|
||||
g1:
|
||||
members: [gemma, qwen]
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
// default group injected for orphaned models (none here) still leaves g1
|
||||
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
matrix:
|
||||
vars:
|
||||
g: gemma
|
||||
q: qwen
|
||||
sets:
|
||||
combo: "g | q"
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
|
||||
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
|
||||
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_RouterUseMatrix(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
router:
|
||||
use: matrix
|
||||
settings:
|
||||
matrix:
|
||||
vars:
|
||||
g: gemma
|
||||
q: qwen
|
||||
sets:
|
||||
combo: "g | q"
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
|
||||
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
|
||||
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_RouterUseGroup(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
router:
|
||||
use: group
|
||||
settings:
|
||||
groups:
|
||||
g1:
|
||||
members: [gemma, qwen]
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
|
||||
}
|
||||
|
||||
func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
groups:
|
||||
g1:
|
||||
members: [gemma, qwen]
|
||||
routing:
|
||||
router:
|
||||
use: group
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "migrate")
|
||||
}
|
||||
|
||||
func TestConfig_Routing_RouterUseMatrixWithoutSettings(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
router:
|
||||
use: matrix
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "routing.router.settings.matrix is not set")
|
||||
}
|
||||
|
||||
// Both groups and matrix may be defined under routing.router.settings;
|
||||
// routing.router.use selects which one is active.
|
||||
func TestConfig_Routing_RouterSettingsBothGroupsAndMatrix(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
router:
|
||||
use: group
|
||||
settings:
|
||||
groups:
|
||||
g1:
|
||||
members: [gemma, qwen]
|
||||
matrix:
|
||||
sets:
|
||||
s: "gemma"
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
// use: group means groups are active and matrix is ignored
|
||||
assert.Equal(t, "group", config.Routing.Router.Use)
|
||||
assert.Nil(t, config.Matrix)
|
||||
assert.Contains(t, config.Groups, "g1")
|
||||
}
|
||||
|
||||
func TestConfig_Routing_UnknownRouter(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
router:
|
||||
use: bogus
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "unknown router")
|
||||
}
|
||||
|
||||
func TestConfig_Routing_FifoPriorityUnknownModel(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
scheduler:
|
||||
settings:
|
||||
fifo:
|
||||
priority:
|
||||
nope: 5
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "unknown model")
|
||||
}
|
||||
|
||||
func TestConfig_Routing_FifoPriorityKnownModel(t *testing.T) {
|
||||
yaml := twoModels + `
|
||||
routing:
|
||||
scheduler:
|
||||
settings:
|
||||
fifo:
|
||||
priority:
|
||||
gemma: 5
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 5, cfg.Routing.Scheduler.Settings.Fifo.Priority["gemma"])
|
||||
}
|
||||
|
||||
@@ -165,6 +165,25 @@ groups:
|
||||
IdleConn: 90,
|
||||
}
|
||||
|
||||
expectedGroups := map[string]GroupConfig{
|
||||
DEFAULT_GROUP_ID: {
|
||||
Swap: true,
|
||||
Exclusive: true,
|
||||
Members: []string{"model1", "model3"},
|
||||
},
|
||||
"group1": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Members: []string{"model2"},
|
||||
},
|
||||
"forever": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Persistent: true,
|
||||
Members: []string{"model4"},
|
||||
},
|
||||
}
|
||||
|
||||
expected := Config{
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
@@ -235,22 +254,19 @@ groups:
|
||||
"m2": "model2",
|
||||
"mthree": "model3",
|
||||
},
|
||||
Groups: map[string]GroupConfig{
|
||||
DEFAULT_GROUP_ID: {
|
||||
Swap: true,
|
||||
Exclusive: true,
|
||||
Members: []string{"model1", "model3"},
|
||||
Groups: expectedGroups,
|
||||
Upstream: UpstreamConfig{
|
||||
IgnorePaths: DefaultUpstreamIgnorePaths(),
|
||||
},
|
||||
Routing: RoutingConfig{
|
||||
Router: RouterConfig{
|
||||
Use: "group",
|
||||
Settings: RouterSettings{
|
||||
Groups: expectedGroups,
|
||||
},
|
||||
},
|
||||
"group1": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Members: []string{"model2"},
|
||||
},
|
||||
"forever": {
|
||||
Swap: true,
|
||||
Exclusive: false,
|
||||
Persistent: true,
|
||||
Members: []string{"model4"},
|
||||
Scheduler: SchedulerConfig{
|
||||
Use: "fifo",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -0,0 +1,436 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
yamlStr := string(data)
|
||||
|
||||
// Phase 1: Substitute all ${env.VAR} macros at string level
|
||||
// This is safe because env values are simple strings without YAML formatting
|
||||
yamlStr, err = substituteEnvMacros(yamlStr)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
// Unmarshal into full Config with defaults
|
||||
config := Config{
|
||||
HealthCheckTimeout: 120,
|
||||
StartPort: 5800,
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
LogToStdout: LogToStdoutProxy,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
GlobalTTL: 0,
|
||||
}
|
||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
if config.HealthCheckTimeout < 15 {
|
||||
config.HealthCheckTimeout = 15
|
||||
}
|
||||
|
||||
// Apply defaults for performance config when section is missing
|
||||
if config.Performance.Every == 0 {
|
||||
config.Performance.Every = 5 * time.Second
|
||||
}
|
||||
if err = config.Performance.Validate(); err != nil {
|
||||
return Config{}, fmt.Errorf("performance: %w", err)
|
||||
}
|
||||
|
||||
if config.StartPort < 1 {
|
||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||
}
|
||||
|
||||
if config.GlobalTTL < 0 {
|
||||
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||
}
|
||||
|
||||
// Apply default for upstream.ignorePaths when not specified. The default
|
||||
// matches common static-asset suffixes so they do not trigger a swap.
|
||||
if len(config.Upstream.IgnorePaths) == 0 {
|
||||
config.Upstream.IgnorePaths = DefaultUpstreamIgnorePaths()
|
||||
}
|
||||
|
||||
switch config.LogToStdout {
|
||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||
default:
|
||||
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
|
||||
}
|
||||
|
||||
// Populate the aliases map
|
||||
config.aliases = make(map[string]string)
|
||||
for modelName, modelConfig := range config.Models {
|
||||
for _, alias := range modelConfig.Aliases {
|
||||
if _, found := config.aliases[alias]; found {
|
||||
return Config{}, fmt.Errorf("duplicate alias %s found in model: %s", alias, modelName)
|
||||
}
|
||||
config.aliases[alias] = modelName
|
||||
}
|
||||
}
|
||||
|
||||
// Validate global macros
|
||||
for _, macro := range config.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Get and sort all model IDs for consistent port assignment
|
||||
modelIds := make([]string, 0, len(config.Models))
|
||||
for modelId := range config.Models {
|
||||
modelIds = append(modelIds, modelId)
|
||||
}
|
||||
sort.Strings(modelIds)
|
||||
|
||||
nextPort := config.StartPort
|
||||
for _, modelId := range modelIds {
|
||||
modelConfig := config.Models[modelId]
|
||||
modelConfig.HealthCheckTimeout = config.HealthCheckTimeout
|
||||
|
||||
// Strip comments from command fields
|
||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||
|
||||
// set model TTL to globalTTL it is the default value
|
||||
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||
modelConfig.UnloadAfter = config.GlobalTTL
|
||||
}
|
||||
|
||||
if modelConfig.UnloadAfter < 0 {
|
||||
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||
}
|
||||
|
||||
// Validate model macros
|
||||
for _, macro := range modelConfig.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: %s", modelId, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Build merged macro list: MODEL_ID + global macros + model macros (model overrides global)
|
||||
mergedMacros := make(MacroList, 0, len(config.Macros)+len(modelConfig.Macros)+1)
|
||||
mergedMacros = append(mergedMacros, MacroEntry{Name: "MODEL_ID", Value: modelId})
|
||||
mergedMacros = append(mergedMacros, config.Macros...)
|
||||
|
||||
// Add model macros (override globals with same name)
|
||||
for _, entry := range modelConfig.Macros {
|
||||
found := false
|
||||
for i, existing := range mergedMacros {
|
||||
if existing.Name == entry.Name {
|
||||
mergedMacros[i] = entry
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
mergedMacros = append(mergedMacros, entry)
|
||||
}
|
||||
}
|
||||
|
||||
// Substitute remaining macros in model fields (LIFO order)
|
||||
for i := len(mergedMacros) - 1; i >= 0; i-- {
|
||||
entry := mergedMacros[i]
|
||||
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
// Substitute macros in SetParamsByID keys and values
|
||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
||||
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
||||
}
|
||||
newParamMap, ok := newValAny.(map[string]any)
|
||||
if !ok {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
||||
}
|
||||
newSetParamsByID[newKey] = newParamMap
|
||||
}
|
||||
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
||||
}
|
||||
|
||||
// Substitute in metadata (type-preserving)
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||
}
|
||||
modelConfig.Metadata = result.(map[string]any)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle PORT macro - only allocate if cmd uses it
|
||||
cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}")
|
||||
proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}")
|
||||
if cmdHasPort || proxyHasPort {
|
||||
if !cmdHasPort && proxyHasPort {
|
||||
return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId)
|
||||
}
|
||||
|
||||
macroSlug := "${PORT}"
|
||||
macroStr := fmt.Sprintf("%v", nextPort)
|
||||
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||
}
|
||||
modelConfig.Metadata = result.(map[string]any)
|
||||
}
|
||||
|
||||
nextPort++
|
||||
}
|
||||
|
||||
// Validate no unknown macros remain
|
||||
fieldMap := map[string]string{
|
||||
"cmd": modelConfig.Cmd,
|
||||
"cmdStop": modelConfig.CmdStop,
|
||||
"proxy": modelConfig.Proxy,
|
||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||
"name": modelConfig.Name,
|
||||
"description": modelConfig.Description,
|
||||
}
|
||||
|
||||
for fieldName, fieldValue := range fieldMap {
|
||||
matches := macroPatternRegex.FindAllStringSubmatch(fieldValue, -1)
|
||||
for _, match := range matches {
|
||||
macroName := match[1]
|
||||
if macroName == "PID" && fieldName == "cmdStop" {
|
||||
continue // replaced at runtime
|
||||
}
|
||||
if macroName == "PORT" || macroName == "MODEL_ID" {
|
||||
return Config{}, fmt.Errorf("macro '${%s}' should have been substituted in %s.%s", macroName, modelId, fieldName)
|
||||
}
|
||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in %s.%s", macroName, modelId, fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
if err := validateNestedForUnknownMacros(modelConfig.Metadata, fmt.Sprintf("model %s metadata", modelId)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if err = modelConfig.Capabilities.Validate(); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: %w", modelId, err)
|
||||
}
|
||||
|
||||
// Validate SetParamsByID keys and values
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
||||
}
|
||||
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
||||
for key := range modelConfig.Filters.SetParamsByID {
|
||||
if key == modelId {
|
||||
continue
|
||||
}
|
||||
if _, exists := config.Models[key]; exists {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
||||
}
|
||||
if existingModel, exists := config.aliases[key]; exists {
|
||||
if existingModel != modelId {
|
||||
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
||||
}
|
||||
continue // already registered as explicit alias for this model
|
||||
}
|
||||
config.aliases[key] = modelId
|
||||
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
||||
}
|
||||
|
||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||
}
|
||||
|
||||
if modelConfig.SendLoadingState == nil {
|
||||
v := config.SendLoadingState
|
||||
modelConfig.SendLoadingState = &v
|
||||
}
|
||||
|
||||
config.Models[modelId] = modelConfig
|
||||
}
|
||||
|
||||
// Normalize routing config. The legacy top-level `matrix`/`groups` keys and
|
||||
// the new `routing.router` block are mutually exclusive: a config may use
|
||||
// either style, never both.
|
||||
hasTopLevel := config.Matrix != nil || len(config.Groups) > 0
|
||||
rtr := config.Routing.Router
|
||||
hasRouting := rtr.Use != "" || rtr.Settings.Matrix != nil || len(rtr.Settings.Groups) > 0
|
||||
|
||||
if hasTopLevel && hasRouting {
|
||||
return Config{}, fmt.Errorf("config uses both the legacy top-level 'matrix'/'groups' keys and the new 'routing.router' block; please migrate the top-level keys into 'routing.router' and remove them")
|
||||
}
|
||||
|
||||
if !hasTopLevel {
|
||||
// Both groups and matrix may be defined under routing.router.settings;
|
||||
// routing.router.use selects which one is active, so there is no conflict.
|
||||
rs := config.Routing.Router.Settings
|
||||
switch config.Routing.Router.Use {
|
||||
case "matrix":
|
||||
if rs.Matrix == nil {
|
||||
return Config{}, fmt.Errorf("routing.router.use is 'matrix' but routing.router.settings.matrix is not set")
|
||||
}
|
||||
config.Matrix = rs.Matrix
|
||||
case "group", "":
|
||||
config.Groups = rs.Groups
|
||||
default:
|
||||
return Config{}, fmt.Errorf("routing.router.use: unknown router %q (valid: group, matrix)", config.Routing.Router.Use)
|
||||
}
|
||||
}
|
||||
|
||||
// groups XOR matrix
|
||||
if config.Matrix != nil && len(config.Groups) > 0 {
|
||||
return Config{}, fmt.Errorf("config cannot use both 'groups' and 'matrix'")
|
||||
}
|
||||
|
||||
if config.Matrix != nil {
|
||||
expandedSets, err := ValidateMatrix(*config.Matrix, config.Models)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("matrix: %w", err)
|
||||
}
|
||||
config.Matrix.ExpandedSets = expandedSets
|
||||
} else {
|
||||
config = AddDefaultGroupToConfig(config)
|
||||
|
||||
// Validate group members
|
||||
memberUsage := make(map[string]string)
|
||||
for groupID, groupConfig := range config.Groups {
|
||||
prevSet := make(map[string]bool)
|
||||
for _, member := range groupConfig.Members {
|
||||
if _, found := prevSet[member]; found {
|
||||
return Config{}, fmt.Errorf("duplicate model member %s found in group: %s", member, groupID)
|
||||
}
|
||||
prevSet[member] = true
|
||||
|
||||
if existingGroup, exists := memberUsage[member]; exists {
|
||||
return Config{}, fmt.Errorf("model member %s is used in multiple groups: %s and %s", member, existingGroup, groupID)
|
||||
}
|
||||
memberUsage[member] = groupID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build the canonical Config.Routing from the effective result. Both legacy
|
||||
// and new-style configs converge here. The Matrix pointer is shared so
|
||||
// ExpandedSets stays in one place.
|
||||
if config.Matrix != nil {
|
||||
config.Routing.Router.Use = "matrix"
|
||||
} else {
|
||||
config.Routing.Router.Use = "group"
|
||||
}
|
||||
config.Routing.Router.Settings.Matrix = config.Matrix
|
||||
config.Routing.Router.Settings.Groups = config.Groups
|
||||
|
||||
if config.Routing.Scheduler.Use == "" {
|
||||
config.Routing.Scheduler.Use = "fifo"
|
||||
}
|
||||
if config.Routing.Scheduler.Use != "fifo" {
|
||||
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo)", config.Routing.Scheduler.Use)
|
||||
}
|
||||
for modelID := range config.Routing.Scheduler.Settings.Fifo.Priority {
|
||||
if _, found := config.RealModelName(modelID); !found {
|
||||
return Config{}, fmt.Errorf("routing.scheduler.settings.fifo.priority references unknown model %q", modelID)
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up hooks preload
|
||||
if len(config.Hooks.OnStartup.Preload) > 0 {
|
||||
var toPreload []string
|
||||
for _, modelID := range config.Hooks.OnStartup.Preload {
|
||||
modelID = strings.TrimSpace(modelID)
|
||||
if modelID == "" {
|
||||
continue
|
||||
}
|
||||
if real, found := config.RealModelName(modelID); found {
|
||||
toPreload = append(toPreload, real)
|
||||
}
|
||||
}
|
||||
config.Hooks.OnStartup.Preload = toPreload
|
||||
}
|
||||
|
||||
// Validate API keys (env macros already substituted at string level)
|
||||
for i, apikey := range config.RequiredAPIKeys {
|
||||
if apikey == "" {
|
||||
return Config{}, fmt.Errorf("empty api key found in apiKeys")
|
||||
}
|
||||
if strings.Contains(apikey, " ") {
|
||||
return Config{}, fmt.Errorf("apiKeys[%d]: api key cannot contain spaces", i)
|
||||
}
|
||||
config.RequiredAPIKeys[i] = apikey
|
||||
}
|
||||
|
||||
// Process peers with global macro substitution
|
||||
for peerName, peerConfig := range config.Peers {
|
||||
// Substitute global macros (LIFO order)
|
||||
for i := len(config.Macros) - 1; i >= 0; i-- {
|
||||
entry := config.Macros[i]
|
||||
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||
|
||||
peerConfig.ApiKey = strings.ReplaceAll(peerConfig.ApiKey, macroSlug, macroStr)
|
||||
peerConfig.Filters.StripParams = strings.ReplaceAll(peerConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
|
||||
// Substitute in setParams (type-preserving)
|
||||
if len(peerConfig.Filters.SetParams) > 0 {
|
||||
result, err := substituteMacroInValue(peerConfig.Filters.SetParams, entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("peers.%s.filters.setParams: %w", peerName, err)
|
||||
}
|
||||
peerConfig.Filters.SetParams = result.(map[string]any)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate no unknown macros remain
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.ApiKey, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("peers.%s.apiKey: unknown macro '${%s}'", peerName, matches[0][1])
|
||||
}
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.Filters.StripParams, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("peers.%s.filters.stripParams: unknown macro '${%s}'", peerName, matches[0][1])
|
||||
}
|
||||
if len(peerConfig.Filters.SetParams) > 0 {
|
||||
if err := validateNestedForUnknownMacros(peerConfig.Filters.SetParams, fmt.Sprintf("peers.%s.filters.setParams", peerName)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
config.Peers[peerName] = peerConfig
|
||||
}
|
||||
|
||||
return config, nil
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
macroNameRegex = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
||||
macroPatternRegex = regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
||||
envMacroRegex = regexp.MustCompile(`\$\{env\.([a-zA-Z_][a-zA-Z0-9_]*)\}`)
|
||||
)
|
||||
|
||||
// validateMacro validates macro name and value constraints
|
||||
func validateMacro(name string, value any) error {
|
||||
if len(name) >= 64 {
|
||||
return fmt.Errorf("macro name '%s' exceeds maximum length of 63 characters", name)
|
||||
}
|
||||
if !macroNameRegex.MatchString(name) {
|
||||
return fmt.Errorf("macro name '%s' contains invalid characters, must match pattern ^[a-zA-Z0-9_-]+$", name)
|
||||
}
|
||||
|
||||
// Validate that value is a scalar type
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
// Check for self-reference
|
||||
macroSlug := fmt.Sprintf("${%s}", name)
|
||||
if strings.Contains(v, macroSlug) {
|
||||
return fmt.Errorf("macro '%s' contains self-reference", name)
|
||||
}
|
||||
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, bool:
|
||||
// These types are allowed
|
||||
default:
|
||||
return fmt.Errorf("macro '%s' has invalid type %T, must be a scalar type (string, int, float, or bool)", name, value)
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "PORT", "MODEL_ID":
|
||||
return fmt.Errorf("macro name '%s' is reserved", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateNestedForUnknownMacros recursively checks for any remaining macro references in nested structures
|
||||
func validateNestedForUnknownMacros(value any, context string) error {
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
matches := macroPatternRegex.FindAllStringSubmatch(v, -1)
|
||||
for _, match := range matches {
|
||||
macroName := match[1]
|
||||
return fmt.Errorf("%s: unknown macro '${%s}'", context, macroName)
|
||||
}
|
||||
// Check for unsubstituted env macros
|
||||
envMatches := envMacroRegex.FindAllStringSubmatch(v, -1)
|
||||
for _, match := range envMatches {
|
||||
varName := match[1]
|
||||
return fmt.Errorf("%s: environment variable '%s' not set", context, varName)
|
||||
}
|
||||
return nil
|
||||
|
||||
case map[string]any:
|
||||
for _, val := range v {
|
||||
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
case []any:
|
||||
for _, val := range v {
|
||||
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
default:
|
||||
// Scalar types don't contain macros
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// substituteMacroInValue recursively substitutes a single macro in a value structure
|
||||
// This is called once per macro, allowing LIFO substitution order
|
||||
func substituteMacroInValue(value any, macroName string, macroValue any) (any, error) {
|
||||
macroSlug := fmt.Sprintf("${%s}", macroName)
|
||||
macroStr := fmt.Sprintf("%v", macroValue)
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
// Check if this is a direct macro substitution
|
||||
if v == macroSlug {
|
||||
return macroValue, nil
|
||||
}
|
||||
// Handle string interpolation
|
||||
if strings.Contains(v, macroSlug) {
|
||||
return strings.ReplaceAll(v, macroSlug, macroStr), nil
|
||||
}
|
||||
return v, nil
|
||||
|
||||
case map[string]any:
|
||||
// Recursively process map values
|
||||
newMap := make(map[string]any)
|
||||
for key, val := range v {
|
||||
newVal, err := substituteMacroInValue(val, macroName, macroValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newMap[key] = newVal
|
||||
}
|
||||
return newMap, nil
|
||||
|
||||
case []any:
|
||||
// Recursively process slice elements
|
||||
newSlice := make([]any, len(v))
|
||||
for i, val := range v {
|
||||
newVal, err := substituteMacroInValue(val, macroName, macroValue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newSlice[i] = newVal
|
||||
}
|
||||
return newSlice, nil
|
||||
|
||||
default:
|
||||
// Return scalar types as-is
|
||||
return value, nil
|
||||
}
|
||||
}
|
||||
|
||||
// substituteEnvMacros replaces ${env.VAR_NAME} with environment variable values.
|
||||
// Returns error if any referenced env var is not set or contains invalid characters.
|
||||
// Env macros inside YAML comments are ignored by unmarshalling the YAML first
|
||||
// (which strips comments) and only checking the comment-free version for macros.
|
||||
func substituteEnvMacros(s string) (string, error) {
|
||||
// Unmarshal and remarshal to strip YAML comments
|
||||
var raw any
|
||||
if err := yaml.Unmarshal([]byte(s), &raw); err != nil {
|
||||
// If YAML is invalid, fall back to scanning the original string
|
||||
// so the user gets the env var error rather than a confusing YAML parse error
|
||||
return substituteEnvMacrosInString(s, s)
|
||||
}
|
||||
clean, err := yaml.Marshal(raw)
|
||||
if err != nil {
|
||||
return substituteEnvMacrosInString(s, s)
|
||||
}
|
||||
|
||||
return substituteEnvMacrosInString(s, string(clean))
|
||||
}
|
||||
|
||||
// substituteEnvMacrosInString finds ${env.VAR} macros in scanStr and substitutes
|
||||
// them in target. This separation allows scanning comment-free YAML while
|
||||
// substituting in the original string.
|
||||
func substituteEnvMacrosInString(target, scanStr string) (string, error) {
|
||||
result := target
|
||||
matches := envMacroRegex.FindAllStringSubmatch(scanStr, -1)
|
||||
for _, match := range matches {
|
||||
fullMatch := match[0] // ${env.VAR_NAME}
|
||||
varName := match[1] // VAR_NAME
|
||||
|
||||
value, exists := os.LookupEnv(varName)
|
||||
if !exists {
|
||||
return "", fmt.Errorf("environment variable '%s' is not set", varName)
|
||||
}
|
||||
|
||||
// Sanitize the value for safe YAML substitution
|
||||
value, err := sanitizeEnvValueForYAML(value, varName)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
result = strings.ReplaceAll(result, fullMatch, value)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// sanitizeEnvValueForYAML ensures an environment variable value is safe for YAML substitution.
|
||||
// It rejects values with characters that break YAML structure and escapes quotes/backslashes
|
||||
// for compatibility with double-quoted YAML strings.
|
||||
func sanitizeEnvValueForYAML(value, varName string) (string, error) {
|
||||
// Reject values that would break YAML structure regardless of quoting context
|
||||
if strings.ContainsAny(value, "\n\r\x00") {
|
||||
return "", fmt.Errorf("environment variable '%s' contains newlines or null bytes which are not allowed in YAML substitution", varName)
|
||||
}
|
||||
|
||||
// Escape backslashes and double quotes for safe use in double-quoted YAML strings.
|
||||
// In unquoted contexts, these escapes appear literally (harmless for most use cases).
|
||||
// In double-quoted contexts, they are interpreted correctly.
|
||||
value = strings.ReplaceAll(value, `\`, `\\`)
|
||||
value = strings.ReplaceAll(value, `"`, `\"`)
|
||||
|
||||
return value, nil
|
||||
}
|
||||
@@ -15,6 +15,9 @@ type MatrixConfig struct {
|
||||
Var map[string]string `yaml:"vars"`
|
||||
EvictCosts map[string]int `yaml:"evict_costs"`
|
||||
Sets OrderedSets `yaml:"sets"`
|
||||
|
||||
// populated by ValidateMatrix; not settable from yaml
|
||||
ExpandedSets []ExpandedSet `yaml:"-"`
|
||||
}
|
||||
|
||||
// SetEntry is a single named set with its DSL expression.
|
||||
|
||||
@@ -289,7 +289,9 @@ matrix:
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, cfg.Matrix)
|
||||
assert.Len(t, cfg.ExpandedSets, 2)
|
||||
assert.Len(t, cfg.Matrix.ExpandedSets, 2)
|
||||
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
|
||||
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
|
||||
// Groups should be empty when matrix is used
|
||||
assert.Empty(t, cfg.Groups)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// identityMapPaths is the set of dotted paths whose direct children are
|
||||
// identity-keyed maps. A child key present in two sources is a hard error;
|
||||
// such keys name discrete entities (a model, a group, a peer, etc.) and a
|
||||
// duplicate means the user has split one entity across files by mistake.
|
||||
var identityMapPaths = map[string]bool{
|
||||
"models": true,
|
||||
"groups": true,
|
||||
"profiles": true,
|
||||
"peers": true,
|
||||
"matrix": true,
|
||||
"routing.router.settings.groups": true,
|
||||
"routing.router.settings.matrix": true,
|
||||
}
|
||||
|
||||
// LoadConfigSources loads and merges configuration from -config (optional)
|
||||
// and -config-dir (optional). At least one must be provided. The -config file
|
||||
// is loaded first; *.yml/*.yaml files directly under -config-dir are then
|
||||
// merged in sorted filename order. The merged document is passed through the
|
||||
// existing LoadConfigFromReader pipeline unchanged.
|
||||
func LoadConfigSources(configPath, configDir string) (Config, error) {
|
||||
if configPath == "" && configDir == "" {
|
||||
return Config{}, fmt.Errorf("at least one of -config or -config-dir must be provided")
|
||||
}
|
||||
|
||||
var sourcePaths []string
|
||||
|
||||
if configPath != "" {
|
||||
sourcePaths = append(sourcePaths, configPath)
|
||||
}
|
||||
|
||||
if configDir != "" {
|
||||
dirFiles, err := listYAMLFiles(configDir)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("-config-dir %s: %w", configDir, err)
|
||||
}
|
||||
|
||||
if configPath != "" {
|
||||
absConfig, err := filepath.Abs(configPath)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("failed to resolve -config path: %w", err)
|
||||
}
|
||||
for _, f := range dirFiles {
|
||||
absF, err := filepath.Abs(f)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("failed to resolve config dir file %s: %w", f, err)
|
||||
}
|
||||
if absConfig == absF {
|
||||
return Config{}, fmt.Errorf("-config path %s is also present in -config-dir %s; remove it from one", configPath, configDir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sourcePaths = append(sourcePaths, dirFiles...)
|
||||
}
|
||||
|
||||
if len(sourcePaths) == 0 {
|
||||
return Config{}, fmt.Errorf("no configuration sources found")
|
||||
}
|
||||
|
||||
var merged *yaml.Node
|
||||
for _, p := range sourcePaths {
|
||||
node, err := parseSource(p)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if node == nil {
|
||||
continue // empty file
|
||||
}
|
||||
if merged == nil {
|
||||
merged = node
|
||||
continue
|
||||
}
|
||||
if err := mergeNodes(merged, node, "", p); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if merged == nil {
|
||||
// All sources were empty; run the pipeline on empty input so defaults
|
||||
// and validation still apply (e.g. startPort, performance defaults).
|
||||
return LoadConfigFromReader(strings.NewReader(""))
|
||||
}
|
||||
|
||||
out, err := yaml.Marshal(merged)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("failed to marshal merged config: %w", err)
|
||||
}
|
||||
return LoadConfigFromReader(strings.NewReader(string(out)))
|
||||
}
|
||||
|
||||
// listYAMLFiles returns the top-level *.yml and *.yaml files in dir, sorted by
|
||||
// filename for deterministic merge order. Subdirectories are not traversed.
|
||||
func listYAMLFiles(dir string) ([]string, error) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var files []string
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".yml") && !strings.HasSuffix(name, ".yaml") {
|
||||
continue
|
||||
}
|
||||
files = append(files, filepath.Join(dir, name))
|
||||
}
|
||||
sort.Strings(files)
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// parseSource reads and parses one YAML config file into a root mapping node.
|
||||
// Returns a nil node (no error) when the file is empty or contains only
|
||||
// comments.
|
||||
//
|
||||
// Env macros (${env.VAR}) are substituted at the string level before YAML
|
||||
// parsing so that flow-style constructs like [${env.API_KEY}] parse
|
||||
// correctly — the brace would otherwise be interpreted as a flow mapping.
|
||||
func parseSource(path string) (*yaml.Node, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read config %s: %w", path, err)
|
||||
}
|
||||
yamlStr, err := substituteEnvMacros(string(data))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("config %s: %w", path, err)
|
||||
}
|
||||
var doc yaml.Node
|
||||
if err := yaml.Unmarshal([]byte(yamlStr), &doc); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config %s: %w", path, err)
|
||||
}
|
||||
// yaml.Unmarshal into a yaml.Node yields a DocumentNode whose Content[0]
|
||||
// is the actual root. Unwrap it so callers see the real top-level node.
|
||||
root := &doc
|
||||
if root.Kind == yaml.DocumentNode && len(root.Content) > 0 {
|
||||
root = root.Content[0]
|
||||
}
|
||||
if root.Kind == 0 || root.Content == nil {
|
||||
return nil, nil
|
||||
}
|
||||
if root.Kind != yaml.MappingNode {
|
||||
return nil, fmt.Errorf("config %s: top-level YAML must be a mapping", path)
|
||||
}
|
||||
return root, nil
|
||||
}
|
||||
|
||||
// mergeNodes merges src into dst (both MappingNodes) in place. Keys present in
|
||||
// only one side are kept; shared keys are merged recursively under the rules
|
||||
// in mergeValue. srcPath is included in error messages to identify the file
|
||||
// that introduced the conflict.
|
||||
func mergeNodes(dst, src *yaml.Node, path, srcPath string) error {
|
||||
srcIdx := indexMapping(src)
|
||||
|
||||
// First pass: merge shared keys in place.
|
||||
for i := 0; i+1 < len(dst.Content); i += 2 {
|
||||
keyNode := dst.Content[i]
|
||||
dstVal := dst.Content[i+1]
|
||||
key := keyNode.Value
|
||||
|
||||
srcVal, ok := srcIdx[key]
|
||||
if !ok {
|
||||
continue // dst-only key, keep as-is
|
||||
}
|
||||
|
||||
childPath := joinPath(path, key)
|
||||
|
||||
if identityMapPaths[childPath] {
|
||||
// Identity-keyed map: each child key names a discrete entity
|
||||
// (a model, group, peer, ...). A shared child key is a hard
|
||||
// error; src-only children are appended in the second pass.
|
||||
if err := mergeIdentityMap(dstVal, srcVal, childPath, key, srcPath); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if err := mergeValue(dstVal, srcVal, childPath, srcPath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: append src-only keys.
|
||||
dstIdx := indexMapping(dst)
|
||||
for i := 0; i+1 < len(src.Content); i += 2 {
|
||||
keyNode := src.Content[i]
|
||||
srcVal := src.Content[i+1]
|
||||
key := keyNode.Value
|
||||
|
||||
if _, ok := dstIdx[key]; ok {
|
||||
continue // already merged above
|
||||
}
|
||||
keyCopy := *keyNode
|
||||
valCopy := *srcVal
|
||||
dst.Content = append(dst.Content, &keyCopy, &valCopy)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mergeIdentityMap merges two identity-keyed mapping nodes (e.g. `models`,
|
||||
// `groups`, `peers`). Any child key present in both sides is a duplicate
|
||||
// entity and produces an error naming the conflicting key and source file.
|
||||
// src-only keys are appended to dst.
|
||||
func mergeIdentityMap(dst, src *yaml.Node, path, mapName, srcPath string) error {
|
||||
if dst.Kind != yaml.MappingNode || src.Kind != yaml.MappingNode {
|
||||
return fmt.Errorf("conflict at %q: expected a mapping, introduced by %s", path, srcPath)
|
||||
}
|
||||
dstIdx := indexMapping(dst)
|
||||
for i := 0; i+1 < len(src.Content); i += 2 {
|
||||
keyNode := src.Content[i]
|
||||
srcVal := src.Content[i+1]
|
||||
key := keyNode.Value
|
||||
if _, dup := dstIdx[key]; dup {
|
||||
return fmt.Errorf("duplicate %s %q found in %s (already defined in another config source)", mapName, key, srcPath)
|
||||
}
|
||||
keyCopy := *keyNode
|
||||
valCopy := *srcVal
|
||||
dst.Content = append(dst.Content, &keyCopy, &valCopy)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mergeValue merges srcVal into dstVal (both pointing into the parent's
|
||||
// Content slice). Mapping+Mapping recurses; Sequence+Sequence concatenates;
|
||||
// Scalar+Scalar errors on value mismatch; null on either side yields to the
|
||||
// non-null side.
|
||||
func mergeValue(dstVal, srcVal *yaml.Node, path, srcPath string) error {
|
||||
switch {
|
||||
case dstVal.Kind == yaml.MappingNode && srcVal.Kind == yaml.MappingNode:
|
||||
return mergeNodes(dstVal, srcVal, path, srcPath)
|
||||
|
||||
case dstVal.Kind == yaml.SequenceNode && srcVal.Kind == yaml.SequenceNode:
|
||||
dstVal.Content = append(dstVal.Content, srcVal.Content...)
|
||||
return nil
|
||||
|
||||
case dstVal.Kind == yaml.ScalarNode && srcVal.Kind == yaml.ScalarNode:
|
||||
if isNullScalar(dstVal) {
|
||||
*dstVal = *srcVal
|
||||
return nil
|
||||
}
|
||||
if isNullScalar(srcVal) {
|
||||
return nil
|
||||
}
|
||||
if dstVal.Value == srcVal.Value && dstVal.Tag == srcVal.Tag {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("conflict at %q: %s sets a different value than a previous source", path, srcPath)
|
||||
|
||||
case isNull(dstVal):
|
||||
*dstVal = *srcVal
|
||||
return nil
|
||||
|
||||
case isNull(srcVal):
|
||||
return nil
|
||||
|
||||
default:
|
||||
return fmt.Errorf("conflict at %q: incompatible YAML node kinds (kind %d vs %d) introduced by %s", path, dstVal.Kind, srcVal.Kind, srcPath)
|
||||
}
|
||||
}
|
||||
|
||||
// isNull reports whether n represents a YAML null (empty or !!null).
|
||||
func isNull(n *yaml.Node) bool {
|
||||
if n == nil || n.Kind == 0 {
|
||||
return true
|
||||
}
|
||||
return isNullScalar(n)
|
||||
}
|
||||
|
||||
func isNullScalar(n *yaml.Node) bool {
|
||||
return n.Kind == yaml.ScalarNode && (n.Tag == "!!null" || n.Tag == "") && n.Value == ""
|
||||
}
|
||||
|
||||
// indexMapping builds a key -> value-node index for a mapping node.
|
||||
func indexMapping(n *yaml.Node) map[string]*yaml.Node {
|
||||
idx := make(map[string]*yaml.Node, len(n.Content)/2)
|
||||
for i := 0; i+1 < len(n.Content); i += 2 {
|
||||
idx[n.Content[i].Value] = n.Content[i+1]
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
func joinPath(parent, key string) string {
|
||||
if parent == "" {
|
||||
return key
|
||||
}
|
||||
return parent + "." + key
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// writeYAML writes content to a file named name inside dir. Returns the full
|
||||
// path of the written file.
|
||||
func writeYAML(t *testing.T, dir, name, content string) string {
|
||||
t.Helper()
|
||||
p := filepath.Join(dir, name)
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755))
|
||||
require.NoError(t, os.WriteFile(p, []byte(content), 0o644))
|
||||
return p
|
||||
}
|
||||
|
||||
// modelCfg builds a single-model YAML snippet indented for nesting under a
|
||||
// `models:` key. The proxy uses a fixed port so tests don't depend on
|
||||
// ${PORT} allocation.
|
||||
func modelCfg(id, cmd string) string {
|
||||
return " " + id + ":\n cmd: " + cmd + "\n proxy: \"http://localhost:9999\"\n"
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_NeitherProvided(t *testing.T) {
|
||||
_, err := LoadConfigSources("", "")
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "at least one of -config or -config-dir")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_ConfigOnly(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cfgPath := writeYAML(t, dir, "config.yaml", `
|
||||
models:
|
||||
`+modelCfg("model1", "echo hi")+`
|
||||
groups:
|
||||
group1:
|
||||
members: ["model1"]
|
||||
`)
|
||||
cfg, err := LoadConfigSources(cfgPath, "")
|
||||
require.NoError(t, err)
|
||||
_, id, ok := cfg.FindConfig("model1")
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "model1", id)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_DirOnly(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("alpha", "echo a"))
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("beta", "echo b"))
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
for _, want := range []string{"alpha", "beta"} {
|
||||
_, _, ok := cfg.FindConfig(want)
|
||||
assert.True(t, ok, "model %s should be present", want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_ConfigPlusDirAdditive(t *testing.T) {
|
||||
// -config lives outside -config-dir; both contribute models additively.
|
||||
dir := t.TempDir()
|
||||
cfgPath := writeYAML(t, dir, "config.yaml", "models:\n"+modelCfg("base", "echo base"))
|
||||
cfgDir := t.TempDir()
|
||||
writeYAML(t, cfgDir, "extra.yaml", "models:\n"+modelCfg("ext", "echo ext"))
|
||||
|
||||
cfg, err := LoadConfigSources(cfgPath, cfgDir)
|
||||
require.NoError(t, err)
|
||||
for _, want := range []string{"base", "ext"} {
|
||||
_, _, ok := cfg.FindConfig(want)
|
||||
assert.True(t, ok, "model %s should be present after merge", want)
|
||||
}
|
||||
}
|
||||
|
||||
// TestLoadConfigSources_ConfigInDirOverlap verifies that a -config file that
|
||||
// is also a member of -config-dir is rejected.
|
||||
func TestLoadConfigSources_ConfigInDirOverlap(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cfgPath := writeYAML(t, dir, "main.yaml", "models:\n"+modelCfg("base", "echo base"))
|
||||
|
||||
_, err := LoadConfigSources(cfgPath, dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "is also present in -config-dir")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_DuplicateModelID(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("dup", "echo a"))
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("dup", "echo b"))
|
||||
|
||||
_, err := LoadConfigSources("", dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), `duplicate models "dup"`)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_DuplicateGroupID(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", `
|
||||
models:
|
||||
`+modelCfg("m1", "echo m1")+"groups:\n g1:\n members: [m1]\n")
|
||||
writeYAML(t, dir, "b.yaml", `
|
||||
models:
|
||||
`+modelCfg("m2", "echo m2")+"groups:\n g1:\n members: [m2]\n")
|
||||
|
||||
_, err := LoadConfigSources("", dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), `duplicate groups "g1"`)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_DuplicatePeer(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
peerA := "peers:\n remote:\n proxy: http://x:1\n models: [m1]\n"
|
||||
peerB := "peers:\n remote:\n proxy: http://x:2\n models: [m2]\n"
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("m1", "echo m1")+"\n"+peerA)
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("m2", "echo m2")+"\n"+peerB)
|
||||
|
||||
_, err := LoadConfigSources("", dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), `duplicate peers "remote"`)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_ScalarConflict(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("m1", "echo m1")+"\nglobalTTL: 100\n")
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("m2", "echo m2")+"\nglobalTTL: 200\n")
|
||||
|
||||
_, err := LoadConfigSources("", dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), `conflict at "globalTTL"`)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_ScalarSameValueNoConflict(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("m1", "echo m1")+"\nglobalTTL: 100\n")
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("m2", "echo m2")+"\nglobalTTL: 100\n")
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 100, cfg.GlobalTTL)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_MacrosConcatenate(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "macros:\n LOW: 1\nmodels:\n"+modelCfg("m1", "echo ${LOW}"))
|
||||
writeYAML(t, dir, "b.yaml", "macros:\n HIGH: 2\nmodels:\n"+modelCfg("m2", "echo ${HIGH}"))
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
// Both macros are available globally after merge.
|
||||
low, ok := cfg.Macros.Get("LOW")
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, 1, low)
|
||||
high, ok := cfg.Macros.Get("HIGH")
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, 2, high)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_APIKeysConcatenate(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("m1", "echo m1")+"\napiKeys: [key-a]\n")
|
||||
writeYAML(t, dir, "b.yaml", "models:\n"+modelCfg("m2", "echo m2")+"\napiKeys: [key-b]\n")
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
assert.ElementsMatch(t, []string{"key-a", "key-b"}, cfg.RequiredAPIKeys)
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_RoutingGroupsMerge(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", `
|
||||
models:
|
||||
`+modelCfg("m1", "echo m1")+`
|
||||
routing:
|
||||
router:
|
||||
settings:
|
||||
groups:
|
||||
groupA:
|
||||
members: [m1]
|
||||
`)
|
||||
writeYAML(t, dir, "b.yaml", `
|
||||
models:
|
||||
`+modelCfg("m2", "echo m2")+`
|
||||
routing:
|
||||
router:
|
||||
settings:
|
||||
groups:
|
||||
groupB:
|
||||
members: [m2]
|
||||
`)
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
groups := cfg.Routing.Router.Settings.Groups
|
||||
assert.Contains(t, groups, "groupA")
|
||||
assert.Contains(t, groups, "groupB")
|
||||
// default group added by pipeline for orphaned/leftover routing groups...
|
||||
// here both groups reference distinct models
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_EnvMacrosSubstituted(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// Use ${PORT} in cmd so the pipeline allocates a port and substitutes it;
|
||||
// verifies env/macro substitution runs on the merged document.
|
||||
writeYAML(t, dir, "a.yaml", "models:\n m1:\n cmd: serve --port ${PORT}\n proxy: \"http://localhost:${PORT}\"\n")
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
m := cfg.Models["m1"]
|
||||
assert.NotContains(t, m.Cmd, "${PORT}", "PORT macro should have been substituted")
|
||||
assert.NotContains(t, m.Proxy, "${PORT}", "PORT macro should have been substituted in proxy")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_EnvMacroInFlowStyleList(t *testing.T) {
|
||||
// Regression: flow-style lists with ${env.*} must parse. Previously
|
||||
// parseSource unmarshalled before env substitution, so the brace in
|
||||
// [${env.API_KEY}] was misread as a flow mapping and parsing failed.
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n m1:\n cmd: echo hi\n proxy: \"http://localhost:9999\"\n")
|
||||
writeYAML(t, dir, "keys.yaml", "apiKeys: [${env.TEST_API_KEY}]\nmodels:\n m2:\n cmd: echo hi\n proxy: \"http://localhost:9998\"\n")
|
||||
|
||||
t.Setenv("TEST_API_KEY", "secret123")
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, cfg.RequiredAPIKeys, "secret123")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_SortedOrderDeterministic(t *testing.T) {
|
||||
// Two files defining distinct models, scanned in z..a order by filename.
|
||||
// Determine merged result is the same regardless of how the FS returns them.
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "z.yaml", "models:\n"+modelCfg("zmodel", "echo z"))
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("amodel", "echo a"))
|
||||
|
||||
const runs = 3
|
||||
for i := 0; i < runs; i++ {
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
// startPort-based allocation: first allocated model gets 5800.
|
||||
// Sorted order means amodel gets 5800, zmodel gets 5801.
|
||||
_, _, ok := cfg.FindConfig("amodel")
|
||||
assert.True(t, ok)
|
||||
_, _, ok = cfg.FindConfig("zmodel")
|
||||
assert.True(t, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_EmptyDirWithConfig(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cfgDir := t.TempDir()
|
||||
cfgPath := writeYAML(t, dir, "main.yaml", "models:\n"+modelCfg("m1", "echo m1"))
|
||||
|
||||
cfg, err := LoadConfigSources(cfgPath, cfgDir)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, cfg.Models, "m1")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_EmptyDirOnly(t *testing.T) {
|
||||
// An empty -config-dir with no -config is an error: there is nothing to
|
||||
// load and silently producing an empty config would mask the misconfig.
|
||||
cfgDir := t.TempDir()
|
||||
_, err := LoadConfigSources("", cfgDir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no configuration sources found")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_AssertNoUnknownMacrosAfterMerge(t *testing.T) {
|
||||
// Macros defined in one file should not satisfy unknown-macro validation in
|
||||
// another — they do, because merge concats global macros before validation
|
||||
// runs. This test documents that a macro from file A is usable in file B.
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "macros.yaml", "macros:\n SHARED: hello\nmodels:\n"+modelCfg("dummy", "echo dummy"))
|
||||
writeYAML(t, dir, "use.yaml", "models:\n"+modelCfg("user", "echo ${SHARED}"))
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
m := cfg.Models["user"]
|
||||
assert.Contains(t, m.Cmd, "hello")
|
||||
assert.NotContains(t, m.Cmd, "${SHARED}")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_KindMismatchErrors(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "startPort: 5800\nmodels:\n"+modelCfg("m1", "echo m1"))
|
||||
writeYAML(t, dir, "b.yaml", "startPort: [5800, 5801]\nmodels:\n"+modelCfg("m2", "echo m2"))
|
||||
|
||||
_, err := LoadConfigSources("", dir)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "incompatible YAML node kinds")
|
||||
}
|
||||
|
||||
func TestLoadConfigSources_NullYieldsToValue(t *testing.T) {
|
||||
// File A: routing.router block absent (null on root for routing);
|
||||
// file B: defines routing.router.settings.groups. Merge should keep B's.
|
||||
dir := t.TempDir()
|
||||
writeYAML(t, dir, "a.yaml", "models:\n"+modelCfg("m1", "echo m1"))
|
||||
writeYAML(t, dir, "b.yaml", "routing:\n router:\n settings:\n groups:\n g1:\n members: [m1]\nmodels:\n"+modelCfg("m2", "echo m2"))
|
||||
|
||||
cfg, err := LoadConfigSources("", dir)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
@@ -9,6 +10,47 @@ const (
|
||||
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||
)
|
||||
|
||||
var validModalities = map[string]struct{}{
|
||||
"text": {},
|
||||
"audio": {},
|
||||
"image": {},
|
||||
}
|
||||
|
||||
// ModelCapConfig defines what modalities and features a model supports.
|
||||
// Used in /v1/models to inform clients. An empty block (all zero values) is
|
||||
// treated as not configured.
|
||||
type ModelCapConfig struct {
|
||||
In []string `yaml:"in"`
|
||||
Out []string `yaml:"out"`
|
||||
Tools bool `yaml:"tools"`
|
||||
Reranker bool `yaml:"reranker"`
|
||||
Context int `yaml:"context"`
|
||||
}
|
||||
|
||||
// Empty returns true when all fields are at their zero values.
|
||||
func (c ModelCapConfig) Empty() bool {
|
||||
return len(c.In) == 0 && len(c.Out) == 0 && !c.Tools && !c.Reranker && c.Context == 0
|
||||
}
|
||||
|
||||
// Validate checks that all modality values are recognized and context is
|
||||
// non-negative. Returns an error if any value is invalid.
|
||||
func (c ModelCapConfig) Validate() error {
|
||||
for _, m := range c.In {
|
||||
if _, ok := validModalities[m]; !ok {
|
||||
return fmt.Errorf("capabilities.in: invalid modality %q, must be one of: text, audio, image", m)
|
||||
}
|
||||
}
|
||||
for _, m := range c.Out {
|
||||
if _, ok := validModalities[m]; !ok {
|
||||
return fmt.Errorf("capabilities.out: invalid modality %q, must be one of: text, audio, image", m)
|
||||
}
|
||||
}
|
||||
if c.Context < 0 {
|
||||
return errors.New("capabilities.context: must be >= 0")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TimeoutsConfig holds timeout settings for proxy connections
|
||||
// 0 = no timeout
|
||||
type TimeoutsConfig struct {
|
||||
@@ -55,6 +97,9 @@ type ModelConfig struct {
|
||||
// Timeout settings for proxy connections
|
||||
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
||||
|
||||
// Capabilities defines what modalities and features the model supports.
|
||||
Capabilities ModelCapConfig `yaml:"capabilities"`
|
||||
|
||||
// Copy of HealthCheckTimeout from global config
|
||||
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
||||
}
|
||||
|
||||
@@ -152,7 +152,7 @@ models:
|
||||
stop:
|
||||
- "<|end|>"
|
||||
- "<|stop|>"
|
||||
`
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -170,3 +170,167 @@ models:
|
||||
assert.Equal(t, 0.7, setParams["temperature"])
|
||||
assert.Equal(t, 0.9, setParams["top_p"])
|
||||
}
|
||||
|
||||
func TestConfig_ModelCapabilities(t *testing.T) {
|
||||
t.Run("all fields", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
in:
|
||||
- text
|
||||
- audio
|
||||
- image
|
||||
out:
|
||||
- text
|
||||
- audio
|
||||
- image
|
||||
tools: true
|
||||
context: 32000
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.False(t, mc.Capabilities.Empty())
|
||||
assert.Equal(t, []string{"text", "audio", "image"}, mc.Capabilities.In)
|
||||
assert.Equal(t, []string{"text", "audio", "image"}, mc.Capabilities.Out)
|
||||
assert.True(t, mc.Capabilities.Tools)
|
||||
assert.Equal(t, 32000, mc.Capabilities.Context)
|
||||
})
|
||||
|
||||
t.Run("partial fields", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
tools: true
|
||||
context: 8192
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.False(t, mc.Capabilities.Empty())
|
||||
assert.Nil(t, mc.Capabilities.In)
|
||||
assert.Nil(t, mc.Capabilities.Out)
|
||||
assert.True(t, mc.Capabilities.Tools)
|
||||
assert.Equal(t, 8192, mc.Capabilities.Context)
|
||||
})
|
||||
|
||||
t.Run("not set", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.True(t, mc.Capabilities.Empty())
|
||||
})
|
||||
|
||||
t.Run("tools false is empty", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
tools: false
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.True(t, mc.Capabilities.Empty())
|
||||
})
|
||||
|
||||
t.Run("reranker true is not empty", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
reranker: true
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.False(t, mc.Capabilities.Empty())
|
||||
assert.True(t, mc.Capabilities.Reranker)
|
||||
})
|
||||
|
||||
t.Run("reranker false is empty", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
reranker: false
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
mc := config.Models["model1"]
|
||||
assert.True(t, mc.Capabilities.Empty())
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig_ModelCapabilities_Validate(t *testing.T) {
|
||||
t.Run("valid_modalities", func(t *testing.T) {
|
||||
caps := ModelCapConfig{
|
||||
In: []string{"text", "image"},
|
||||
Out: []string{"text", "audio"},
|
||||
Tools: true,
|
||||
Context: 100000,
|
||||
}
|
||||
assert.NoError(t, caps.Validate())
|
||||
})
|
||||
|
||||
t.Run("empty_is_valid", func(t *testing.T) {
|
||||
caps := ModelCapConfig{}
|
||||
assert.NoError(t, caps.Validate())
|
||||
})
|
||||
|
||||
t.Run("invalid_in_modality", func(t *testing.T) {
|
||||
caps := ModelCapConfig{In: []string{"video"}}
|
||||
err := caps.Validate()
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "capabilities.in")
|
||||
assert.Contains(t, err.Error(), "video")
|
||||
})
|
||||
|
||||
t.Run("invalid_out_modality", func(t *testing.T) {
|
||||
caps := ModelCapConfig{Out: []string{"video"}}
|
||||
err := caps.Validate()
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "capabilities.out")
|
||||
assert.Contains(t, err.Error(), "video")
|
||||
})
|
||||
|
||||
t.Run("negative_context", func(t *testing.T) {
|
||||
caps := ModelCapConfig{Context: -1}
|
||||
err := caps.Validate()
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "capabilities.context")
|
||||
})
|
||||
|
||||
t.Run("rejects_invalid_at_load", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
capabilities:
|
||||
in:
|
||||
- text
|
||||
- video
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "video")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// DefaultUpstreamIgnorePathsPattern is the default regular expression applied
|
||||
// to upstream.ignorePaths when the section is empty or absent from the config.
|
||||
// It matches common static-asset suffixes so requests for .js/.css/.png/etc.
|
||||
// files do not trigger a model swap.
|
||||
const DefaultUpstreamIgnorePathsPattern = `.*\.(js|json|css|png|gif|jpg|jpeg|ico|txt)$`
|
||||
|
||||
// DefaultUpstreamIgnorePaths returns the default compiled ignore paths used
|
||||
// when upstream.ignorePaths is not specified in the config. The returned slice
|
||||
// is fresh so callers may mutate it without affecting other configs.
|
||||
func DefaultUpstreamIgnorePaths() []*regexp.Regexp {
|
||||
return []*regexp.Regexp{regexp.MustCompile(DefaultUpstreamIgnorePathsPattern)}
|
||||
}
|
||||
|
||||
// UpstreamConfig controls behaviour of the /upstream passthrough endpoint.
|
||||
type UpstreamConfig struct {
|
||||
// IgnorePaths is a slice of compiled regular expressions. Any request to
|
||||
// /upstream/<model>/<path> whose remaining path matches any of these
|
||||
// expressions will be ignored and not trigger a swap. When the config
|
||||
// does not specify any patterns, DefaultUpstreamIgnorePaths is applied.
|
||||
IgnorePaths []*regexp.Regexp `yaml:"-"`
|
||||
}
|
||||
|
||||
// rawUpstreamConfig is the intermediate form used to unmarshal the YAML into
|
||||
// plain strings, which are then compiled into *regexp.Regexp.
|
||||
type rawUpstreamConfig struct {
|
||||
IgnorePaths []string `yaml:"ignorePaths"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML compiles each ignorePaths entry into a *regexp.Regexp. If any
|
||||
// entry fails to compile, an error is returned.
|
||||
func (u *UpstreamConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||
var raw rawUpstreamConfig
|
||||
if err := value.Decode(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
patterns := make([]*regexp.Regexp, 0, len(raw.IgnorePaths))
|
||||
for _, p := range raw.IgnorePaths {
|
||||
re, err := regexp.Compile(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("upstream.ignorePaths: invalid regular expression %q: %w", p, err)
|
||||
}
|
||||
patterns = append(patterns, re)
|
||||
}
|
||||
u.IgnorePaths = patterns
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const upstreamConfigHeader = `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --arg1 one
|
||||
proxy: "http://localhost:8080"
|
||||
`
|
||||
|
||||
func TestConfig_UpstreamIgnorePaths_DefaultWhenAbsent(t *testing.T) {
|
||||
// When upstream is not specified at all, the default pattern is applied.
|
||||
content := upstreamConfigHeader
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, cfg.Upstream.IgnorePaths, 1)
|
||||
|
||||
def := cfg.Upstream.IgnorePaths[0]
|
||||
assert.IsType(t, ®exp.Regexp{}, def)
|
||||
assert.Equal(t, DefaultUpstreamIgnorePathsPattern, def.String())
|
||||
|
||||
// The default matches common static-asset suffixes.
|
||||
assert.True(t, def.MatchString("/foo.js"))
|
||||
assert.True(t, def.MatchString("/bar/baz.json"))
|
||||
assert.True(t, def.MatchString("/static/img.png"))
|
||||
assert.True(t, def.MatchString("/notes.txt"))
|
||||
assert.True(t, def.MatchString("/favicon.ico"))
|
||||
// And does not match inference API paths.
|
||||
assert.False(t, def.MatchString("/v1/chat/completions"))
|
||||
assert.False(t, def.MatchString("/v1/models"))
|
||||
assert.False(t, def.MatchString("/health"))
|
||||
}
|
||||
|
||||
func TestConfig_UpstreamIgnorePaths_DefaultWhenSectionEmpty(t *testing.T) {
|
||||
// When upstream is present but ignorePaths is omitted, the default is still
|
||||
// applied.
|
||||
content := `upstream: {}` + "\n" + upstreamConfigHeader
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, cfg.Upstream.IgnorePaths, 1)
|
||||
assert.Equal(t, DefaultUpstreamIgnorePathsPattern, cfg.Upstream.IgnorePaths[0].String())
|
||||
}
|
||||
|
||||
func TestConfig_UpstreamIgnorePaths_Compiles(t *testing.T) {
|
||||
content := `
|
||||
upstream:
|
||||
ignorePaths:
|
||||
- ".*\\.(js|json|css|png|gif|jpg|jpeg|txt)$"
|
||||
- "^/static/.*"
|
||||
` + upstreamConfigHeader
|
||||
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, cfg.Upstream.IgnorePaths, 2)
|
||||
|
||||
// Verify the patterns are compiled into *regexp.Regexp and match as expected.
|
||||
assert.True(t, cfg.Upstream.IgnorePaths[0].MatchString("/foo.js"))
|
||||
assert.True(t, cfg.Upstream.IgnorePaths[0].MatchString("/bar/baz.json"))
|
||||
assert.False(t, cfg.Upstream.IgnorePaths[0].MatchString("/v1/chat/completions"))
|
||||
assert.True(t, cfg.Upstream.IgnorePaths[1].MatchString("/static/foo.png"))
|
||||
assert.False(t, cfg.Upstream.IgnorePaths[1].MatchString("/v1/chat/completions"))
|
||||
|
||||
// Confirm the type is *regexp.Regexp to satisfy the API contract.
|
||||
for _, re := range cfg.Upstream.IgnorePaths {
|
||||
assert.IsType(t, ®exp.Regexp{}, re)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfig_UpstreamIgnorePaths_InvalidRegexReturnsError(t *testing.T) {
|
||||
content := `
|
||||
upstream:
|
||||
ignorePaths:
|
||||
- "[invalid("
|
||||
` + upstreamConfigHeader
|
||||
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "upstream.ignorePaths")
|
||||
assert.Contains(t, err.Error(), "invalid regular expression")
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
package perf
|
||||
|
||||
type LUID struct {
|
||||
LowPart uint32
|
||||
HighPart int32
|
||||
}
|
||||
|
||||
const maxEnumAdapters = 16
|
||||
|
||||
type D3DKMT_ENUMADAPTERS2 struct {
|
||||
NumAdapters uint32
|
||||
pAdapters uintptr
|
||||
}
|
||||
|
||||
type D3DKMT_ADAPTERINFO struct {
|
||||
hAdapter uint32
|
||||
AdapterLuid LUID
|
||||
NumOfSources uint32
|
||||
bPresentMoveRegionsPreferred int32
|
||||
}
|
||||
|
||||
type D3DKMT_OPENADAPTERFROMLUID struct {
|
||||
AdapterLuid LUID
|
||||
hAdapter uint32
|
||||
}
|
||||
|
||||
type D3DKMT_CLOSEADAPTER struct {
|
||||
hAdapter uint32
|
||||
}
|
||||
|
||||
type KMTQUERYADAPTERINFOTYPE int32
|
||||
|
||||
const (
|
||||
KMTQAITYPE_UMDRIVERPRIVATE KMTQUERYADAPTERINFOTYPE = 0
|
||||
KMTQAITYPE_ADAPTERREGISTRYINFO KMTQUERYADAPTERINFOTYPE = 8
|
||||
KMTQAITYPE_DRIVERVERSION KMTQUERYADAPTERINFOTYPE = 13
|
||||
KMTQAITYPE_PHYSICALADAPTERDEVICEIDS KMTQUERYADAPTERINFOTYPE = 31
|
||||
KMTQAITYPE_NODEPERFDATA KMTQUERYADAPTERINFOTYPE = 61
|
||||
KMTQAITYPE_ADAPTERPERFDATA KMTQUERYADAPTERINFOTYPE = 62
|
||||
KMTQAITYPE_ADAPTERPERFDATA_CAPS KMTQUERYADAPTERINFOTYPE = 63
|
||||
)
|
||||
|
||||
type D3DKMT_QUERYADAPTERINFO struct {
|
||||
hAdapter uint32
|
||||
Type KMTQUERYADAPTERINFOTYPE
|
||||
pPrivateDriverData uintptr
|
||||
PrivateDriverDataSize uint32
|
||||
}
|
||||
|
||||
type D3DKMT_ADAPTER_PERFDATA struct {
|
||||
PhysicalAdapterIndex uint32
|
||||
MemoryFrequency uint64
|
||||
MaxMemoryFrequency uint64
|
||||
MaxMemoryFrequencyOC uint64
|
||||
MemoryBandwidth uint64
|
||||
PCIEBandwidth uint64
|
||||
FanRPM uint32
|
||||
Power uint32
|
||||
Temperature uint32
|
||||
PowerStateOverride byte
|
||||
}
|
||||
|
||||
type D3DKMT_QUERYSTATISTICS_TYPE int32
|
||||
|
||||
const (
|
||||
D3DKMT_QUERYSTATISTICS_ADAPTER D3DKMT_QUERYSTATISTICS_TYPE = 0
|
||||
D3DKMT_QUERYSTATISTICS_PROCESS D3DKMT_QUERYSTATISTICS_TYPE = 1
|
||||
D3DKMT_QUERYSTATISTICS_PROCESS_ADAPTER D3DKMT_QUERYSTATISTICS_TYPE = 2
|
||||
D3DKMT_QUERYSTATISTICS_SEGMENT D3DKMT_QUERYSTATISTICS_TYPE = 3
|
||||
D3DKMT_QUERYSTATISTICS_PROCESS_SEGMENT D3DKMT_QUERYSTATISTICS_TYPE = 4
|
||||
D3DKMT_QUERYSTATISTICS_NODE D3DKMT_QUERYSTATISTICS_TYPE = 5
|
||||
D3DKMT_QUERYSTATISTICS_PROCESS_NODE D3DKMT_QUERYSTATISTICS_TYPE = 6
|
||||
D3DKMT_QUERYSTATISTICS_VIDPNSOURCE D3DKMT_QUERYSTATISTICS_TYPE = 7
|
||||
D3DKMT_QUERYSTATISTICS_PROCESS_VIDPNSOURCE D3DKMT_QUERYSTATISTICS_TYPE = 8
|
||||
)
|
||||
|
||||
type D3DKMT_ADAPTER_PERFDATACAPS struct {
|
||||
PhysicalAdapterIndex uint32
|
||||
MaxMemoryBandwidth uint64
|
||||
MaxPCIEBandwidth uint64
|
||||
MaxFanRPM uint32
|
||||
TemperatureMax uint32
|
||||
TemperatureWarning uint32
|
||||
}
|
||||
|
||||
type D3DKMT_QUERYSTATISTICS_QUERY_SEGMENT struct {
|
||||
SegmentId uint32
|
||||
}
|
||||
|
||||
type D3DKMT_QUERYSTATISTICS_QUERY_NODE struct {
|
||||
NodeId uint32
|
||||
}
|
||||
@@ -0,0 +1,529 @@
|
||||
//go:build windows
|
||||
|
||||
package perf
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
var (
|
||||
d3dkmDLL *windows.LazyDLL
|
||||
procEnumAdapters2 *windows.LazyProc
|
||||
procOpenAdapterFromLuid *windows.LazyProc
|
||||
procCloseAdapter *windows.LazyProc
|
||||
procQueryAdapterInfo *windows.LazyProc
|
||||
procQueryStatistics *windows.LazyProc
|
||||
d3dkmtInitOnce sync.Once
|
||||
d3dkmtInitErr error
|
||||
)
|
||||
|
||||
// initD3DKMT lazily loads gdi32.dll and resolves D3DKMT function pointers.
|
||||
// Safe for concurrent use via sync.Once.
|
||||
func initD3DKMT() error {
|
||||
d3dkmtInitOnce.Do(func() {
|
||||
d3dkmDLL = windows.NewLazySystemDLL("gdi32.dll")
|
||||
|
||||
procEnumAdapters2 = d3dkmDLL.NewProc("D3DKMTEnumAdapters2")
|
||||
procOpenAdapterFromLuid = d3dkmDLL.NewProc("D3DKMTOpenAdapterFromLuid")
|
||||
procCloseAdapter = d3dkmDLL.NewProc("D3DKMTCloseAdapter")
|
||||
procQueryAdapterInfo = d3dkmDLL.NewProc("D3DKMTQueryAdapterInfo")
|
||||
procQueryStatistics = d3dkmDLL.NewProc("D3DKMTQueryStatistics")
|
||||
|
||||
for name, p := range map[string]*windows.LazyProc{
|
||||
"D3DKMTEnumAdapters2": procEnumAdapters2,
|
||||
"D3DKMTOpenAdapterFromLuid": procOpenAdapterFromLuid,
|
||||
"D3DKMTCloseAdapter": procCloseAdapter,
|
||||
"D3DKMTQueryAdapterInfo": procQueryAdapterInfo,
|
||||
"D3DKMTQueryStatistics": procQueryStatistics,
|
||||
} {
|
||||
if err := p.Find(); err != nil {
|
||||
d3dkmtInitErr = fmt.Errorf("D3DKMT %s not found: %w", name, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
return d3dkmtInitErr
|
||||
}
|
||||
|
||||
// ntstatusCall invokes a D3DKMT function and returns a non-nil error if the
|
||||
// NTSTATUS result is not STATUS_SUCCESS (0).
|
||||
func ntstatusCall(proc *windows.LazyProc, arg unsafe.Pointer) error {
|
||||
ret, _, _ := proc.Call(uintptr(arg))
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("NTSTATUS 0x%08x", uint32(ret))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// d3dkmEnumerateAdapters enumerates all available graphics adapters via
|
||||
// D3DKMTEnumAdapters2.
|
||||
func d3dkmEnumerateAdapters() ([]D3DKMT_ADAPTERINFO, error) {
|
||||
var adapters [maxEnumAdapters]D3DKMT_ADAPTERINFO
|
||||
enum := D3DKMT_ENUMADAPTERS2{
|
||||
NumAdapters: maxEnumAdapters,
|
||||
pAdapters: uintptr(unsafe.Pointer(&adapters[0])),
|
||||
}
|
||||
if err := ntstatusCall(procEnumAdapters2, unsafe.Pointer(&enum)); err != nil {
|
||||
return nil, fmt.Errorf("EnumAdapters2: %w", err)
|
||||
}
|
||||
if enum.NumAdapters == 0 {
|
||||
return nil, fmt.Errorf("no adapters found")
|
||||
}
|
||||
result := make([]D3DKMT_ADAPTERINFO, enum.NumAdapters)
|
||||
for i := uint32(0); i < enum.NumAdapters; i++ {
|
||||
result[i] = adapters[i]
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// d3dkmOpenAdapter opens a D3DKMT adapter handle for the given LUID.
|
||||
func d3dkmOpenAdapter(luid LUID) (uint32, error) {
|
||||
req := D3DKMT_OPENADAPTERFROMLUID{
|
||||
AdapterLuid: luid,
|
||||
}
|
||||
if err := ntstatusCall(procOpenAdapterFromLuid, unsafe.Pointer(&req)); err != nil {
|
||||
return 0, fmt.Errorf("OpenAdapterFromLuid: %w", err)
|
||||
}
|
||||
return req.hAdapter, nil
|
||||
}
|
||||
|
||||
// d3dkmCloseAdapter closes a previously opened D3DKMT adapter handle.
|
||||
func d3dkmCloseAdapter(hAdapter uint32) error {
|
||||
req := D3DKMT_CLOSEADAPTER{hAdapter: hAdapter}
|
||||
return ntstatusCall(procCloseAdapter, unsafe.Pointer(&req))
|
||||
}
|
||||
|
||||
// d3dkmGetAdapterPerfData queries per-adapter performance data (temperature,
|
||||
// fan RPM, power, bandwidth) via KMTQAITYPE_ADAPTERPERFDATA.
|
||||
func d3dkmGetAdapterPerfData(hAdapter uint32) (*D3DKMT_ADAPTER_PERFDATA, error) {
|
||||
var data D3DKMT_ADAPTER_PERFDATA
|
||||
req := D3DKMT_QUERYADAPTERINFO{
|
||||
hAdapter: hAdapter,
|
||||
Type: KMTQAITYPE_ADAPTERPERFDATA,
|
||||
pPrivateDriverData: uintptr(unsafe.Pointer(&data)),
|
||||
PrivateDriverDataSize: uint32(unsafe.Sizeof(data)),
|
||||
}
|
||||
if err := ntstatusCall(procQueryAdapterInfo, unsafe.Pointer(&req)); err != nil {
|
||||
return nil, fmt.Errorf("QueryAdapterInfo(ADAPTERPERFDATA): %w", err)
|
||||
}
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
// d3dkmGetAdapterPerfDataCaps queries static adapter performance capabilities
|
||||
// (max fan RPM, temperature limits, max bandwidth) via KMTQAITYPE_ADAPTERPERFDATA_CAPS.
|
||||
func d3dkmGetAdapterPerfDataCaps(hAdapter uint32) (*D3DKMT_ADAPTER_PERFDATACAPS, error) {
|
||||
var data D3DKMT_ADAPTER_PERFDATACAPS
|
||||
req := D3DKMT_QUERYADAPTERINFO{
|
||||
hAdapter: hAdapter,
|
||||
Type: KMTQAITYPE_ADAPTERPERFDATA_CAPS,
|
||||
pPrivateDriverData: uintptr(unsafe.Pointer(&data)),
|
||||
PrivateDriverDataSize: uint32(unsafe.Sizeof(data)),
|
||||
}
|
||||
if err := ntstatusCall(procQueryAdapterInfo, unsafe.Pointer(&req)); err != nil {
|
||||
return nil, fmt.Errorf("QueryAdapterInfo(ADAPTERPERFDATACAPS): %w", err)
|
||||
}
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
type queryStatsBuffer struct {
|
||||
Type int32 // offset 0
|
||||
AdapterLuid LUID // offset 4
|
||||
hProcess uintptr // offset 16
|
||||
// _result mirrors the D3DKMT_QUERYSTATISTICS_RESULT union.
|
||||
// sizeof(D3DKMT_QUERYSTATISTICS) == 0x328 (808 bytes) on x64.
|
||||
//
|
||||
// The C struct layout (x64):
|
||||
// offset 0: Type (int32, 4 bytes)
|
||||
// offset 4: AdapterLuid (LUID, 8 bytes)
|
||||
// offset 12: 4 bytes padding (for 8-byte alignment of hProcess)
|
||||
// offset 16: hProcess (HANDLE, 8 bytes)
|
||||
// offset 24: QueryResult (union, 780 bytes — largest member is AdapterInformation)
|
||||
// offset 804: anonymous input union (QueryNode.NodeId / QuerySegment.SegmentId, 4 bytes)
|
||||
//
|
||||
// Previous bug: _result was [776]byte, placing QueryId at offset 800 instead of 804.
|
||||
// The kernel read NodeId/SegmentId from offset 804 (always zero from _pad),
|
||||
// causing all NODE and SEGMENT queries to use index 0 regardless of the value
|
||||
// passed in QueryId. This produced alternating behavior where only GPU util OR
|
||||
// memory util appeared to work, depending on which test variant happened to put
|
||||
// non-zero data near offset 804 in the result buffer.
|
||||
_result [780]byte // offset 24, size 780 — places QueryId at offset 804
|
||||
QueryId int32 // offset 804 — matches C anonymous union for NodeId/SegmentId
|
||||
}
|
||||
|
||||
func init() {
|
||||
var buf queryStatsBuffer
|
||||
if unsafe.Sizeof(buf) != 808 {
|
||||
panic(fmt.Sprintf("queryStatsBuffer size %d != expected 808 (sizeof D3DKMT_QUERYSTATISTICS on x64)", unsafe.Sizeof(buf)))
|
||||
}
|
||||
if unsafe.Offsetof(buf.QueryId) != 804 {
|
||||
panic(fmt.Sprintf("queryStatsBuffer.QueryId offset %d != expected 804 (C anonymous union offset)", unsafe.Offsetof(buf.QueryId)))
|
||||
}
|
||||
|
||||
var perfData D3DKMT_ADAPTER_PERFDATA
|
||||
if unsafe.Sizeof(perfData) != 64 {
|
||||
panic(fmt.Sprintf("D3DKMT_ADAPTER_PERFDATA size %d != expected 64 on x64", unsafe.Sizeof(perfData)))
|
||||
}
|
||||
|
||||
var caps D3DKMT_ADAPTER_PERFDATACAPS
|
||||
if unsafe.Sizeof(caps) != 40 {
|
||||
panic(fmt.Sprintf("D3DKMT_ADAPTER_PERFDATACAPS size %d != expected 40 on x64", unsafe.Sizeof(caps)))
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
qsoffsetNbSegments = 0
|
||||
qsoffsetNodeCount = 4
|
||||
qsoffsetCommitLimit = 0
|
||||
qsoffsetBytesCommitted = 8
|
||||
qsoffsetBytesResident = 16
|
||||
qsoffsetRunningTime = 0
|
||||
qsoffsetSystemRunningTime = 272
|
||||
)
|
||||
|
||||
// d3dkmQueryAdapterStats returns the number of memory segments and compute
|
||||
// nodes for the adapter identified by luid.
|
||||
func d3dkmQueryAdapterStats(luid LUID) (nbSegments uint32, nodeCount uint32, err error) {
|
||||
buf := queryStatsBuffer{
|
||||
Type: int32(D3DKMT_QUERYSTATISTICS_ADAPTER),
|
||||
AdapterLuid: luid,
|
||||
}
|
||||
if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
|
||||
return 0, 0, fmt.Errorf("QueryStatistics(ADAPTER): %w", err)
|
||||
}
|
||||
nbSegments = binary.LittleEndian.Uint32(buf._result[qsoffsetNbSegments : qsoffsetNbSegments+4])
|
||||
nodeCount = binary.LittleEndian.Uint32(buf._result[qsoffsetNodeCount : qsoffsetNodeCount+4])
|
||||
return nbSegments, nodeCount, nil
|
||||
}
|
||||
|
||||
// d3dkmQuerySegmentStats returns the commit limit (total) and resident
|
||||
// (used) bytes for the given memory segment of an adapter.
|
||||
func d3dkmQuerySegmentStats(luid LUID, segmentID uint32) (commitLimit uint64, bytesResident uint64, err error) {
|
||||
buf := queryStatsBuffer{
|
||||
Type: int32(D3DKMT_QUERYSTATISTICS_SEGMENT),
|
||||
AdapterLuid: luid,
|
||||
QueryId: int32(segmentID),
|
||||
}
|
||||
if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
|
||||
return 0, 0, fmt.Errorf("QueryStatistics(SEGMENT %d): %w", segmentID, err)
|
||||
}
|
||||
commitLimit = binary.LittleEndian.Uint64(buf._result[qsoffsetCommitLimit : qsoffsetCommitLimit+8])
|
||||
bytesResident = binary.LittleEndian.Uint64(buf._result[qsoffsetBytesResident : qsoffsetBytesResident+8])
|
||||
if bytesResident == 0 {
|
||||
bytesResident = binary.LittleEndian.Uint64(buf._result[qsoffsetBytesCommitted : qsoffsetBytesCommitted+8])
|
||||
}
|
||||
return commitLimit, bytesResident, nil
|
||||
}
|
||||
|
||||
// d3dkmQueryNodeStats returns the global and system running time counters
|
||||
// (in 100ns units) for the given compute node of an adapter.
|
||||
func d3dkmQueryNodeStats(luid LUID, nodeID uint32) (runningTime uint64, systemRunningTime uint64, err error) {
|
||||
buf := queryStatsBuffer{
|
||||
Type: int32(D3DKMT_QUERYSTATISTICS_NODE),
|
||||
AdapterLuid: luid,
|
||||
QueryId: int32(nodeID),
|
||||
}
|
||||
if err := ntstatusCall(procQueryStatistics, unsafe.Pointer(&buf)); err != nil {
|
||||
return 0, 0, fmt.Errorf("QueryStatistics(NODE %d): %w", nodeID, err)
|
||||
}
|
||||
runningTime = binary.LittleEndian.Uint64(buf._result[qsoffsetRunningTime : qsoffsetRunningTime+8])
|
||||
systemRunningTime = binary.LittleEndian.Uint64(buf._result[qsoffsetSystemRunningTime : qsoffsetSystemRunningTime+8])
|
||||
return runningTime, systemRunningTime, nil
|
||||
}
|
||||
|
||||
type nodeRunningTimes struct {
|
||||
Global uint64
|
||||
System uint64
|
||||
}
|
||||
|
||||
// d3dkmtNodeUtil computes GPU node utilization as a percentage from running
|
||||
// time deltas. Returns -1 if counters went backwards (wrap/reset), 0 if idle.
|
||||
func d3dkmtNodeUtil(prevRT, curRT nodeRunningTimes, elapsed100ns int64) float64 {
|
||||
if curRT.Global < prevRT.Global || curRT.System < prevRT.System {
|
||||
return -1
|
||||
}
|
||||
gd := curRT.Global - prevRT.Global
|
||||
sd := curRT.System - prevRT.System
|
||||
|
||||
if gd > 0 && sd > 0 {
|
||||
util := float64(gd) / float64(sd)
|
||||
if util > 1.0 {
|
||||
util = 1.0
|
||||
}
|
||||
return util * 100.0
|
||||
} else if gd > 0 && elapsed100ns > 0 {
|
||||
util := float64(gd) / float64(elapsed100ns) * 100.0
|
||||
if util > 100.0 {
|
||||
util = 100.0
|
||||
}
|
||||
return util
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// d3dkmtFanPct returns fan speed as a percentage of maxFanRPM, clamped to
|
||||
// 100%. Returns 0 if maxFanRPM is unavailable or fan is not spinning.
|
||||
func d3dkmtFanPct(fanRPM, maxFanRPM uint32) float64 {
|
||||
if maxFanRPM > 0 && fanRPM > 0 {
|
||||
pct := float64(fanRPM) / float64(maxFanRPM) * 100.0
|
||||
if pct > 100.0 {
|
||||
pct = 100.0
|
||||
}
|
||||
return pct
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// d3dkmtPowerW converts power from deci-watts (as reported by D3DKMT) to
|
||||
// watts. Returns 0 if the power value is zero.
|
||||
func d3dkmtPowerW(power uint32) float64 {
|
||||
if power > 0 {
|
||||
return float64(power) / 10.0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// d3dkmtTempC converts temperature from deci-Celsius (as reported by D3DKMT)
|
||||
// to degrees Celsius.
|
||||
func d3dkmtTempC(tempDeciC uint32) int {
|
||||
return int(tempDeciC / 10)
|
||||
}
|
||||
|
||||
type d3dkmtAdapterState struct {
|
||||
luid LUID
|
||||
hAdapter uint32
|
||||
nbSegments uint32
|
||||
nodeCount uint32
|
||||
maxFanRPM uint32
|
||||
prevNodeRT map[uint32]nodeRunningTimes
|
||||
prevTime time.Time
|
||||
}
|
||||
|
||||
// tryD3DKMT attempts to start GPU monitoring using D3DKMT and optional PDH
|
||||
// counters. It returns a channel of GpuStat snapshots or an error if no
|
||||
// usable adapters are found.
|
||||
func tryD3DKMT(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||
if err := initD3DKMT(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
adapterInfos, err := d3dkmEnumerateAdapters()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type adapterMeta struct {
|
||||
luid LUID
|
||||
nbSegments uint32
|
||||
nodeCount uint32
|
||||
maxFanRPM uint32
|
||||
}
|
||||
|
||||
var metaList []adapterMeta
|
||||
|
||||
for i, ai := range adapterInfos {
|
||||
hAdapter, err := d3dkmOpenAdapter(ai.AdapterLuid)
|
||||
if err != nil {
|
||||
logger.Debugf("adapter %d: open failed: %s", i, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
nbSegments, nodeCount, err := d3dkmQueryAdapterStats(ai.AdapterLuid)
|
||||
if err != nil {
|
||||
logger.Debugf("adapter %d: query stats failed: %s", i, err.Error())
|
||||
d3dkmCloseAdapter(hAdapter)
|
||||
continue
|
||||
}
|
||||
|
||||
caps, err := d3dkmGetAdapterPerfDataCaps(hAdapter)
|
||||
if err != nil {
|
||||
logger.Debugf("adapter %d: perf caps failed: %s", i, err.Error())
|
||||
}
|
||||
|
||||
d3dkmCloseAdapter(hAdapter)
|
||||
|
||||
var maxFanRPM uint32
|
||||
if caps != nil {
|
||||
maxFanRPM = caps.MaxFanRPM
|
||||
}
|
||||
|
||||
metaList = append(metaList, adapterMeta{
|
||||
luid: ai.AdapterLuid,
|
||||
nbSegments: nbSegments,
|
||||
nodeCount: nodeCount,
|
||||
maxFanRPM: maxFanRPM,
|
||||
})
|
||||
logger.Debugf("adapter %d: segments=%d nodes=%d fan_max=%d luid=%d:%d", i, nbSegments, nodeCount, maxFanRPM, ai.AdapterLuid.HighPart, ai.AdapterLuid.LowPart)
|
||||
}
|
||||
|
||||
if len(metaList) == 0 {
|
||||
return nil, fmt.Errorf("no usable D3DKMT adapters found")
|
||||
}
|
||||
|
||||
pdhUtil, pdhErr := initPdhGpuUtil()
|
||||
if pdhErr != nil {
|
||||
logger.Debugf("PDH GPU utilization not available: %s", pdhErr.Error())
|
||||
} else {
|
||||
logger.Info("using PDH performance counters for GPU utilization")
|
||||
}
|
||||
|
||||
ch := make(chan []GpuStat, 1)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
if pdhUtil != nil {
|
||||
defer pdhUtil.close()
|
||||
}
|
||||
|
||||
var adapters []d3dkmtAdapterState
|
||||
for _, m := range metaList {
|
||||
hAdapter, err := d3dkmOpenAdapter(m.luid)
|
||||
if err != nil {
|
||||
logger.Debugf("reopen adapter failed: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
adapters = append(adapters, d3dkmtAdapterState{
|
||||
luid: m.luid,
|
||||
hAdapter: hAdapter,
|
||||
nbSegments: m.nbSegments,
|
||||
nodeCount: m.nodeCount,
|
||||
maxFanRPM: m.maxFanRPM,
|
||||
prevNodeRT: make(map[uint32]nodeRunningTimes),
|
||||
})
|
||||
}
|
||||
|
||||
if len(adapters) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
for _, a := range adapters {
|
||||
d3dkmCloseAdapter(a.hAdapter)
|
||||
}
|
||||
}()
|
||||
|
||||
for i := range adapters {
|
||||
a := &adapters[i]
|
||||
for node := uint32(0); node < a.nodeCount; node++ {
|
||||
globalRT, systemRT, err := d3dkmQueryNodeStats(a.luid, node)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
|
||||
}
|
||||
a.prevTime = time.Now()
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(every)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
stats := make([]GpuStat, 0, len(adapters))
|
||||
now := time.Now()
|
||||
|
||||
var pdhUtilMap map[LUID]float64
|
||||
if pdhUtil != nil {
|
||||
pdhUtilMap = pdhUtil.collect()
|
||||
}
|
||||
|
||||
for i := range adapters {
|
||||
a := &adapters[i]
|
||||
|
||||
perfData, err := d3dkmGetAdapterPerfData(a.hAdapter)
|
||||
if err != nil {
|
||||
logger.Debugf("adapter %d perfdata: %s", i, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
var memUsedMB, memTotalMB int
|
||||
for seg := uint32(0); seg < a.nbSegments; seg++ {
|
||||
limit, resident, err := d3dkmQuerySegmentStats(a.luid, seg)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
memUsedMB += int(resident / (1024 * 1024))
|
||||
memTotalMB += int(limit / (1024 * 1024))
|
||||
}
|
||||
|
||||
var gpuUtil float64
|
||||
pdhGaveValue := false
|
||||
if pdhUtilMap != nil {
|
||||
if util, ok := pdhUtilMap[a.luid]; ok {
|
||||
gpuUtil = util
|
||||
pdhGaveValue = true
|
||||
}
|
||||
}
|
||||
|
||||
if !pdhGaveValue && a.nodeCount > 0 {
|
||||
elapsedNs := now.Sub(a.prevTime).Nanoseconds()
|
||||
elapsed100ns := elapsedNs / 100
|
||||
|
||||
for node := uint32(0); node < a.nodeCount; node++ {
|
||||
globalRT, systemRT, err := d3dkmQueryNodeStats(a.luid, node)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if prevRT, ok := a.prevNodeRT[node]; ok {
|
||||
if globalRT < prevRT.Global || systemRT < prevRT.System {
|
||||
a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
|
||||
continue
|
||||
}
|
||||
nodeUtil := d3dkmtNodeUtil(prevRT, nodeRunningTimes{Global: globalRT, System: systemRT}, elapsed100ns)
|
||||
if nodeUtil > gpuUtil {
|
||||
gpuUtil = nodeUtil
|
||||
}
|
||||
}
|
||||
a.prevNodeRT[node] = nodeRunningTimes{Global: globalRT, System: systemRT}
|
||||
}
|
||||
|
||||
a.prevTime = now
|
||||
}
|
||||
|
||||
tempC := d3dkmtTempC(perfData.Temperature)
|
||||
|
||||
fanSpeedPct := d3dkmtFanPct(perfData.FanRPM, a.maxFanRPM)
|
||||
powerDrawW := d3dkmtPowerW(perfData.Power)
|
||||
|
||||
var memUtilPct float64
|
||||
if memTotalMB > 0 {
|
||||
memUtilPct = float64(memUsedMB) / float64(memTotalMB) * 100.0
|
||||
}
|
||||
|
||||
stats = append(stats, GpuStat{
|
||||
Timestamp: now,
|
||||
ID: i,
|
||||
Name: fmt.Sprintf("GPU %d", i),
|
||||
TempC: tempC,
|
||||
GpuUtilPct: gpuUtil,
|
||||
MemUtilPct: memUtilPct,
|
||||
MemUsedMB: memUsedMB,
|
||||
MemTotalMB: memTotalMB,
|
||||
FanSpeedPct: fanSpeedPct,
|
||||
PowerDrawW: powerDrawW,
|
||||
})
|
||||
}
|
||||
|
||||
if len(stats) > 0 {
|
||||
select {
|
||||
case ch <- stats:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
//go:build windows
|
||||
|
||||
package perf
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestD3dkmtNodeUtil_FullLoad(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 5000, System: 14000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, 100.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_PartialUtil(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 3000, System: 14000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, 50.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_Identical(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 10000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 20000, System: 20000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, 100.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_CounterWrap(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 9000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, -1.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_SystemWrap(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 1000, System: 9000}
|
||||
cur := nodeRunningTimes{Global: 5000, System: 1000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, -1.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_ZeroDelta(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 100000)
|
||||
assert.Equal(t, 0.0, got)
|
||||
}
|
||||
|
||||
func TestD3dkmtNodeUtil_ElapsedFallback(t *testing.T) {
|
||||
prev := nodeRunningTimes{Global: 1000, System: 10000}
|
||||
cur := nodeRunningTimes{Global: 6000, System: 10000}
|
||||
got := d3dkmtNodeUtil(prev, cur, 50000)
|
||||
assert.InDelta(t, 10.0, got, 0.01)
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_Normal(t *testing.T) {
|
||||
assert.Equal(t, 50.0, d3dkmtFanPct(1500, 3000))
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_MaxFan(t *testing.T) {
|
||||
assert.Equal(t, 100.0, d3dkmtFanPct(3000, 3000))
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_OverMaxClamped(t *testing.T) {
|
||||
assert.Equal(t, 100.0, d3dkmtFanPct(4000, 3000))
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_ZeroMaxFan(t *testing.T) {
|
||||
assert.Equal(t, 0.0, d3dkmtFanPct(1500, 0))
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_ZeroFanRPM(t *testing.T) {
|
||||
assert.Equal(t, 0.0, d3dkmtFanPct(0, 3000))
|
||||
}
|
||||
|
||||
func TestD3dkmtFanPct_BothZero(t *testing.T) {
|
||||
assert.Equal(t, 0.0, d3dkmtFanPct(0, 0))
|
||||
}
|
||||
|
||||
func TestD3dkmtPowerW(t *testing.T) {
|
||||
assert.Equal(t, 250.0, d3dkmtPowerW(2500))
|
||||
}
|
||||
|
||||
func TestD3dkmtPowerW_Zero(t *testing.T) {
|
||||
assert.Equal(t, 0.0, d3dkmtPowerW(0))
|
||||
}
|
||||
|
||||
func TestD3dkmtTempC(t *testing.T) {
|
||||
assert.Equal(t, 65, d3dkmtTempC(650))
|
||||
}
|
||||
|
||||
func TestD3dkmtTempC_Zero(t *testing.T) {
|
||||
assert.Equal(t, 0, d3dkmtTempC(0))
|
||||
}
|
||||
@@ -22,6 +22,13 @@ func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monito
|
||||
logger.Debugf("nvidia-smi: %s", err.Error())
|
||||
}
|
||||
|
||||
if ch, err := tryD3DKMT(ctx, every, logger); err == nil {
|
||||
logger.Info("using D3DKMT for GPU monitoring")
|
||||
return ch, nil
|
||||
} else {
|
||||
logger.Debugf("D3DKMT: %s", err.Error())
|
||||
}
|
||||
|
||||
return nil, ErrNoGpuTool
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
//go:build windows
|
||||
|
||||
package perf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
var (
|
||||
pdhDLL = windows.NewLazySystemDLL("pdh.dll")
|
||||
procPdhOpenQuery = pdhDLL.NewProc("PdhOpenQueryW")
|
||||
procPdhAddEnglishCounter = pdhDLL.NewProc("PdhAddEnglishCounterW")
|
||||
procPdhCollectQueryData = pdhDLL.NewProc("PdhCollectQueryData")
|
||||
procPdhGetFormattedCounterArray = pdhDLL.NewProc("PdhGetFormattedCounterArrayW")
|
||||
procPdhCloseQuery = pdhDLL.NewProc("PdhCloseQuery")
|
||||
)
|
||||
|
||||
const (
|
||||
pdhFmtDouble = 0x00000200
|
||||
pdhMoreData = 0x800007D2
|
||||
pdhNoData = 0x800007D5
|
||||
)
|
||||
|
||||
type pdhCounterValue struct {
|
||||
CStatus uint32
|
||||
DblVal float64
|
||||
}
|
||||
|
||||
type pdhCounterValueItem struct {
|
||||
SzName *uint16
|
||||
FmtValue pdhCounterValue
|
||||
}
|
||||
|
||||
func init() {
|
||||
var item pdhCounterValueItem
|
||||
if unsafe.Sizeof(item) != 24 {
|
||||
panic(fmt.Sprintf("pdhCounterValueItem size %d != expected 24 on x64", unsafe.Sizeof(item)))
|
||||
}
|
||||
}
|
||||
|
||||
type pdhGpuUtil struct {
|
||||
query uintptr
|
||||
counter uintptr
|
||||
}
|
||||
|
||||
// initPdhGpuUtil creates a PDH query for the GPU Engine utilization counter.
|
||||
// Returns nil with an error if PDH or the counter is unavailable.
|
||||
func initPdhGpuUtil() (*pdhGpuUtil, error) {
|
||||
var query uintptr
|
||||
if ret, _, _ := procPdhOpenQuery.Call(0, 0, uintptr(unsafe.Pointer(&query))); ret != 0 {
|
||||
return nil, fmt.Errorf("PdhOpenQuery: 0x%x", ret)
|
||||
}
|
||||
|
||||
path, _ := windows.UTF16PtrFromString(`\GPU Engine(*)\Utilization Percentage`)
|
||||
var counter uintptr
|
||||
if ret, _, _ := procPdhAddEnglishCounter.Call(
|
||||
query, uintptr(unsafe.Pointer(path)), 0, uintptr(unsafe.Pointer(&counter)),
|
||||
); ret != 0 {
|
||||
procPdhCloseQuery.Call(query)
|
||||
return nil, fmt.Errorf("PdhAddEnglishCounter(GPU Engine): 0x%x", ret)
|
||||
}
|
||||
|
||||
procPdhCollectQueryData.Call(query)
|
||||
|
||||
return &pdhGpuUtil{query: query, counter: counter}, nil
|
||||
}
|
||||
|
||||
// close releases the PDH query handle.
|
||||
func (p *pdhGpuUtil) close() {
|
||||
if p.query != 0 {
|
||||
procPdhCloseQuery.Call(p.query)
|
||||
p.query = 0
|
||||
}
|
||||
}
|
||||
|
||||
// collect reads the PDH counter and returns a map of adapter LUID to
|
||||
// aggregated GPU utilization percentage, summed across all engine instances
|
||||
// per adapter and clamped to 100%.
|
||||
func (p *pdhGpuUtil) collect() map[LUID]float64 {
|
||||
ret, _, _ := procPdhCollectQueryData.Call(p.query)
|
||||
if ret != 0 && ret != pdhNoData {
|
||||
return nil
|
||||
}
|
||||
|
||||
var bufSize uint32
|
||||
var itemCount uint32
|
||||
ret, _, _ = procPdhGetFormattedCounterArray.Call(
|
||||
p.counter, pdhFmtDouble,
|
||||
uintptr(unsafe.Pointer(&bufSize)),
|
||||
uintptr(unsafe.Pointer(&itemCount)),
|
||||
0,
|
||||
)
|
||||
if ret != pdhMoreData || itemCount == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
buf := make([]byte, bufSize)
|
||||
ret, _, _ = procPdhGetFormattedCounterArray.Call(
|
||||
p.counter, pdhFmtDouble,
|
||||
uintptr(unsafe.Pointer(&bufSize)),
|
||||
uintptr(unsafe.Pointer(&itemCount)),
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
)
|
||||
if ret != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
itemSize := uint32(unsafe.Sizeof(pdhCounterValueItem{}))
|
||||
result := make(map[LUID]float64)
|
||||
|
||||
for i := uint32(0); i < itemCount; i++ {
|
||||
item := (*pdhCounterValueItem)(unsafe.Pointer(&buf[i*itemSize]))
|
||||
if item.FmtValue.CStatus != 0 {
|
||||
continue
|
||||
}
|
||||
luid, ok := parsePdhLuid(windows.UTF16PtrToString(item.SzName))
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
result[luid] += item.FmtValue.DblVal
|
||||
}
|
||||
|
||||
for luid := range result {
|
||||
if result[luid] > 100.0 {
|
||||
result[luid] = 100.0
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// parsePdhLuid extracts the adapter LUID (high and low parts) from a PDH
|
||||
// GPU Engine instance name (e.g. "pid_1234_luid_0x00000000_0x000148BF_phys_0_eng_2_engtype_Compute").
|
||||
func parsePdhLuid(name string) (LUID, bool) {
|
||||
idx := strings.Index(name, "luid_0x")
|
||||
if idx < 0 {
|
||||
return LUID{}, false
|
||||
}
|
||||
rest := name[idx+7:]
|
||||
parts := strings.SplitN(rest, "_", 4)
|
||||
if len(parts) < 3 {
|
||||
return LUID{}, false
|
||||
}
|
||||
hp, err := strconv.ParseUint(parts[0], 16, 32)
|
||||
if err != nil {
|
||||
return LUID{}, false
|
||||
}
|
||||
lpStr := strings.TrimPrefix(parts[1], "0x")
|
||||
lp, err := strconv.ParseUint(lpStr, 16, 32)
|
||||
if err != nil {
|
||||
return LUID{}, false
|
||||
}
|
||||
return LUID{LowPart: uint32(lp), HighPart: int32(hp)}, true
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
//go:build windows
|
||||
|
||||
package perf
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParsePdhLuid_Valid(t *testing.T) {
|
||||
name := `pid_25312_luid_0x00000000_0x000148BF_phys_0_eng_2_engtype_Compute`
|
||||
got, ok := parsePdhLuid(name)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, uint32(0x000148BF), got.LowPart)
|
||||
assert.Equal(t, int32(0x00000000), got.HighPart)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_ValidNvidia(t *testing.T) {
|
||||
name := `pid_1388_luid_0x00000000_0x00011372_phys_0_eng_8_engtype_Compute_1`
|
||||
got, ok := parsePdhLuid(name)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, uint32(0x00011372), got.LowPart)
|
||||
assert.Equal(t, int32(0x00000000), got.HighPart)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_NonZeroHighPart(t *testing.T) {
|
||||
name := `pid_1234_luid_0x00000001_0x0000C85A_phys_0_eng_5_engtype_Copy`
|
||||
got, ok := parsePdhLuid(name)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, uint32(0x0000C85A), got.LowPart)
|
||||
assert.Equal(t, int32(0x00000001), got.HighPart)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_InvalidNoLuid(t *testing.T) {
|
||||
_, ok := parsePdhLuid("invalid_string_without_luid")
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_InvalidEmpty(t *testing.T) {
|
||||
_, ok := parsePdhLuid("")
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_InvalidHex(t *testing.T) {
|
||||
_, ok := parsePdhLuid("pid_1234_luid_0xZZZZ_0xGGGG_phys_0")
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
func TestParsePdhLuid_ShortAfterLuid(t *testing.T) {
|
||||
_, ok := parsePdhLuid("pid_1234_luid_0x00000000")
|
||||
assert.False(t, ok)
|
||||
}
|
||||
+122
-417
@@ -11,6 +11,8 @@ import (
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
type shutdownReq struct {
|
||||
@@ -24,56 +26,16 @@ type unloadReq struct {
|
||||
respond chan struct{}
|
||||
}
|
||||
|
||||
type handlerReq struct {
|
||||
model string
|
||||
ctx context.Context
|
||||
respond chan handlerResp
|
||||
positionCh chan int
|
||||
}
|
||||
|
||||
type handlerResp struct {
|
||||
handleFunc http.HandlerFunc
|
||||
err error
|
||||
}
|
||||
|
||||
type swapDone struct {
|
||||
modelID string
|
||||
err error
|
||||
}
|
||||
|
||||
type serveDoneEvent struct {
|
||||
modelID string
|
||||
}
|
||||
|
||||
type activeSwap struct {
|
||||
modelID string
|
||||
evict []string
|
||||
waiters []handlerReq
|
||||
}
|
||||
|
||||
// swapPlanner is the only piece of behaviour that differs between concrete
|
||||
// routers. baseRouter never inspects its internals.
|
||||
type swapPlanner interface {
|
||||
// EvictionFor returns running model IDs that must be stopped before
|
||||
// target can serve. alsoRunning lists models the baseRouter has already
|
||||
// committed to loading (in-flight swaps) which the planner cannot see
|
||||
// via process.State() yet. Pure decision; must not log.
|
||||
EvictionFor(target string, alsoRunning []string) []string
|
||||
|
||||
// OnSwapStart runs once at the start of every swap. Planners may log
|
||||
// their decision here at whatever verbosity they choose.
|
||||
OnSwapStart(target string)
|
||||
}
|
||||
|
||||
// baseRouter owns the channels, run-loop, and orchestration code shared by
|
||||
// every concrete router. Concrete routers embed *baseRouter and supply a
|
||||
// swapPlanner that captures how their eviction set is decided.
|
||||
// baseRouter owns the channels, run-loop, and process machinery shared by every
|
||||
// concrete router. Concrete routers embed *baseRouter and supply a
|
||||
// scheduler.Swapper describing how eviction sets are decided. baseRouter
|
||||
// implements scheduler.Effects so the scheduler can call back for side-effects.
|
||||
type baseRouter struct {
|
||||
name string
|
||||
config config.Config
|
||||
processes map[string]process.Process
|
||||
logger *logmon.Monitor
|
||||
planner swapPlanner
|
||||
schedule scheduler.Scheduler
|
||||
|
||||
// shutdownCtx governs the request machinery: cancelling it tells grant()
|
||||
// and ServeHTTP to stop granting and reject callers. It is deliberately
|
||||
@@ -90,11 +52,12 @@ type baseRouter struct {
|
||||
procCtx context.Context
|
||||
procCancel context.CancelFunc
|
||||
|
||||
handlerCh chan handlerReq
|
||||
handlerCh chan scheduler.HandlerReq
|
||||
cancelCh chan scheduler.HandlerReq
|
||||
shutdownCh chan shutdownReq
|
||||
unloadCh chan unloadReq
|
||||
swapDoneCh chan swapDone
|
||||
serveDoneCh chan serveDoneEvent
|
||||
swapDoneCh chan scheduler.SwapDone
|
||||
serveDoneCh chan scheduler.ServeDoneEvent
|
||||
|
||||
runDone chan struct{}
|
||||
|
||||
@@ -106,26 +69,38 @@ type baseRouter struct {
|
||||
testProcessed chan struct{}
|
||||
}
|
||||
|
||||
func newBaseRouter(name string, conf config.Config, processes map[string]process.Process, planner swapPlanner, logger *logmon.Monitor) *baseRouter {
|
||||
func newBaseRouter(
|
||||
name string,
|
||||
conf config.Config,
|
||||
processes map[string]process.Process,
|
||||
logger *logmon.Monitor,
|
||||
planner scheduler.Swapper,
|
||||
) (*baseRouter, error) {
|
||||
shutdownCtx, shutdownFn := context.WithCancel(context.Background())
|
||||
procCtx, procCancel := context.WithCancel(context.Background())
|
||||
return &baseRouter{
|
||||
b := &baseRouter{
|
||||
name: name,
|
||||
config: conf,
|
||||
processes: processes,
|
||||
logger: logger,
|
||||
planner: planner,
|
||||
shutdownCtx: shutdownCtx,
|
||||
shutdownFn: shutdownFn,
|
||||
procCtx: procCtx,
|
||||
procCancel: procCancel,
|
||||
handlerCh: make(chan handlerReq),
|
||||
handlerCh: make(chan scheduler.HandlerReq),
|
||||
cancelCh: make(chan scheduler.HandlerReq),
|
||||
shutdownCh: make(chan shutdownReq),
|
||||
unloadCh: make(chan unloadReq),
|
||||
swapDoneCh: make(chan swapDone),
|
||||
serveDoneCh: make(chan serveDoneEvent),
|
||||
swapDoneCh: make(chan scheduler.SwapDone),
|
||||
serveDoneCh: make(chan scheduler.ServeDoneEvent),
|
||||
runDone: make(chan struct{}),
|
||||
}
|
||||
sched, err := scheduler.New(conf, name, logger, planner, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b.schedule = sched
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (b *baseRouter) notifyProcessed() {
|
||||
@@ -137,30 +112,31 @@ func (b *baseRouter) notifyProcessed() {
|
||||
func (b *baseRouter) run() {
|
||||
defer close(b.runDone)
|
||||
|
||||
active := make(map[string]*activeSwap)
|
||||
inFlight := make(map[string]int)
|
||||
var queued []handlerReq
|
||||
|
||||
for {
|
||||
select {
|
||||
case req := <-b.shutdownCh:
|
||||
b.handleShutdown(req, active, queued)
|
||||
b.handleShutdown(req)
|
||||
return
|
||||
|
||||
case req := <-b.handlerCh:
|
||||
b.handleRequest(req, active, inFlight, &queued)
|
||||
b.schedule.OnRequest(req)
|
||||
b.notifyProcessed()
|
||||
|
||||
case req := <-b.cancelCh:
|
||||
b.schedule.OnCancel(req)
|
||||
b.notifyProcessed()
|
||||
|
||||
case req := <-b.unloadCh:
|
||||
b.handleUnload(req, active, inFlight, &queued)
|
||||
b.schedule.OnUnload(req.targets, req.timeout)
|
||||
close(req.respond)
|
||||
b.notifyProcessed()
|
||||
|
||||
case ev := <-b.swapDoneCh:
|
||||
b.handleSwapDone(ev, active, inFlight, &queued)
|
||||
b.schedule.OnSwapDone(ev)
|
||||
b.notifyProcessed()
|
||||
|
||||
case ev := <-b.serveDoneCh:
|
||||
b.handleServeDone(ev, active, inFlight, &queued)
|
||||
b.schedule.OnServeDone(ev)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -177,37 +153,68 @@ func (b *baseRouter) run() {
|
||||
// down, the send never lands, one of the other select cases fires, and we
|
||||
// report back that the grant did NOT happen.
|
||||
//
|
||||
// That distinction matters for in-flight bookkeeping — see grantHandler.
|
||||
func (b *baseRouter) grant(req handlerReq, resp handlerResp) bool {
|
||||
// That distinction matters for in-flight bookkeeping — see GrantServe.
|
||||
func (b *baseRouter) grant(req scheduler.HandlerReq, resp scheduler.HandlerResp) bool {
|
||||
select {
|
||||
case req.respond <- resp:
|
||||
case req.Respond <- resp:
|
||||
return true
|
||||
case <-req.ctx.Done():
|
||||
case <-req.Ctx.Done():
|
||||
return false
|
||||
case <-b.shutdownCtx.Done():
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// grantHandler is the "this caller can now use process p" path. It does
|
||||
// two things that must stay locked together:
|
||||
//
|
||||
// 1. Hand the caller a wrapped p.ServeHTTP (via trackedServe) so when the
|
||||
// HTTP request finishes, the run loop hears about it.
|
||||
// 2. Bump inFlight[modelID] so the router knows this process is busy and
|
||||
// refuses to evict it until the count comes back down.
|
||||
//
|
||||
// The increment is gated on grant() returning true. If grant() returns
|
||||
// false, the caller already walked away and trackedServe will never run —
|
||||
// which means no matching decrement will ever arrive on serveDoneCh.
|
||||
// Incrementing in that case would strand the counter at >0 forever and
|
||||
// the router would never again be willing to swap this model out.
|
||||
//
|
||||
// In short: increment if and only if we know a decrement is coming.
|
||||
func (b *baseRouter) grantHandler(req handlerReq, modelID string, p process.Process, inFlight map[string]int) {
|
||||
if b.grant(req, handlerResp{handleFunc: b.trackedServe(modelID, p)}) {
|
||||
inFlight[modelID]++
|
||||
// ModelState implements scheduler.Effects.
|
||||
func (b *baseRouter) ModelState(modelID string) (process.ProcessState, bool) {
|
||||
p, ok := b.processes[modelID]
|
||||
if !ok {
|
||||
var zero process.ProcessState
|
||||
return zero, false
|
||||
}
|
||||
return p.State(), true
|
||||
}
|
||||
|
||||
// StartSwap implements scheduler.Effects, launching the swap goroutine.
|
||||
func (b *baseRouter) StartSwap(modelID string, evict []string) {
|
||||
go b.doSwap(modelID, evict)
|
||||
}
|
||||
|
||||
// GrantError implements scheduler.Effects.
|
||||
func (b *baseRouter) GrantError(req scheduler.HandlerReq, err error) {
|
||||
b.grant(req, scheduler.HandlerResp{Err: err})
|
||||
}
|
||||
|
||||
// GrantServe implements scheduler.Effects. It hands the caller a wrapped
|
||||
// p.ServeHTTP (via trackedServe) so the run loop hears about the request
|
||||
// finishing, and reports whether the caller received it. The scheduler bumps
|
||||
// its in-flight count only on a true return: if grant() returns false the
|
||||
// caller already walked away and trackedServe will never run, so no matching
|
||||
// decrement will ever arrive — incrementing would strand the counter at >0 and
|
||||
// the router would never again be willing to evict this model.
|
||||
func (b *baseRouter) GrantServe(req scheduler.HandlerReq, modelID string) bool {
|
||||
p := b.processes[modelID]
|
||||
return b.grant(req, scheduler.HandlerResp{HandleFunc: b.trackedServe(modelID, p)})
|
||||
}
|
||||
|
||||
// StopProcesses implements scheduler.Effects, stopping the named processes in
|
||||
// parallel and blocking until all have stopped.
|
||||
func (b *baseRouter) StopProcesses(timeout time.Duration, ids []string) {
|
||||
var wg sync.WaitGroup
|
||||
for _, id := range ids {
|
||||
p, ok := b.processes[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(id string, p process.Process) {
|
||||
defer wg.Done()
|
||||
if err := p.Stop(timeout); err != nil {
|
||||
b.logger.Warnf("%s: stopping %s failed: %v", b.name, id, err)
|
||||
}
|
||||
}(id, p)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// trackedServe is the wrapper that closes the loop on in-flight tracking.
|
||||
@@ -224,7 +231,7 @@ func (b *baseRouter) trackedServe(modelID string, p process.Process) http.Handle
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
defer func() {
|
||||
select {
|
||||
case b.serveDoneCh <- serveDoneEvent{modelID: modelID}:
|
||||
case b.serveDoneCh <- scheduler.ServeDoneEvent{ModelID: modelID}:
|
||||
case <-b.shutdownCtx.Done():
|
||||
}
|
||||
}()
|
||||
@@ -232,240 +239,6 @@ func (b *baseRouter) trackedServe(modelID string, p process.Process) http.Handle
|
||||
}
|
||||
}
|
||||
|
||||
// handleRequest decides what to do with one incoming ServeHTTP request. It is
|
||||
// called from run() and never blocks indefinitely: any work that has to wait
|
||||
// (starting a process, stopping siblings, waiting for ready) is deferred to
|
||||
// a swap goroutine and reported back via swapDoneCh.
|
||||
//
|
||||
// The decision tree, in order:
|
||||
//
|
||||
// 1. Unknown model — respond with ErrNoLocalModelFound and move on.
|
||||
// 2. A swap to the same model is already in flight — attach this waiter so
|
||||
// one swap serves all callers that asked for the same model.
|
||||
// 3. Fast path — the target process is already ready, the planner sees
|
||||
// nothing to evict, and no in-flight swap is evicting it. Hand back its
|
||||
// ServeHTTP immediately (wrapped so the run loop knows when it ends).
|
||||
// 4. Would collide with an in-flight swap (we'd stop their target, or
|
||||
// they're stopping us) — park in the queue for handleSwapDone to drain.
|
||||
// 5. Would evict a process that is still handling requests — park in the
|
||||
// queue. handleServeDone will retry when the busy process drains.
|
||||
// 6. Otherwise — start a new swap. This may run in parallel with other
|
||||
// active swaps when their evict sets don't intersect.
|
||||
func (b *baseRouter) handleRequest(req handlerReq, active map[string]*activeSwap, inFlight map[string]int, queued *[]handlerReq) {
|
||||
// (1) Unknown model.
|
||||
p, ok := b.processes[req.model]
|
||||
if !ok {
|
||||
b.logger.Debugf("%s: model %s not handled by this router", b.name, req.model)
|
||||
b.grant(req, handlerResp{err: ErrNoLocalModelFound})
|
||||
return
|
||||
}
|
||||
|
||||
// (2) Join an in-flight swap for the same model.
|
||||
if s, ok := active[req.model]; ok {
|
||||
b.logger.Debugf("%s: joining in-flight swap for model %s (%d waiters)", b.name, req.model, len(s.waiters)+1)
|
||||
s.waiters = append(s.waiters, req)
|
||||
return
|
||||
}
|
||||
|
||||
evict := b.planner.EvictionFor(req.model, activeTargets(active, req.model))
|
||||
|
||||
// (3) Fast path: ready, nothing to evict, and nobody is evicting us.
|
||||
if p.State() == process.StateReady && len(evict) == 0 && !collidesWith(req.model, evict, active) {
|
||||
b.logger.Debugf("%s: fast-path serving model %s (already ready)", b.name, req.model)
|
||||
b.grantHandler(req, req.model, p, inFlight)
|
||||
return
|
||||
}
|
||||
|
||||
// (4) Collision with an in-flight swap — queue.
|
||||
if collidesWith(req.model, evict, active) {
|
||||
b.logger.Debugf("%s: queuing request for model %s (collides with in-flight swap)", b.name, req.model)
|
||||
*queued = append(*queued, req)
|
||||
b.broadcastQueuePositions(*queued)
|
||||
return
|
||||
}
|
||||
|
||||
// (5) Would evict a busy process — queue until it drains.
|
||||
if conflictsWithInFlight(evict, inFlight) {
|
||||
b.logger.Debugf("%s: queuing request for model %s (would evict in-flight process)", b.name, req.model)
|
||||
*queued = append(*queued, req)
|
||||
b.broadcastQueuePositions(*queued)
|
||||
return
|
||||
}
|
||||
|
||||
// (6) Start a new (possibly parallel) swap.
|
||||
b.logger.Debugf("%s: starting swap for model %s, evicting %v", b.name, req.model, evict)
|
||||
s := b.startSwap(req, evict)
|
||||
active[s.modelID] = s
|
||||
}
|
||||
|
||||
// handleSwapDone is called from run() when a swap goroutine reports that it
|
||||
// has finished. It fans out the result to every waiter that joined this swap,
|
||||
// removes the swap from the active map, and then walks the queue once,
|
||||
// promoting any items that no longer collide with the remaining active set.
|
||||
// FIFO order is preserved: items still blocked stay in place.
|
||||
func (b *baseRouter) handleSwapDone(ev swapDone, active map[string]*activeSwap, inFlight map[string]int, queued *[]handlerReq) {
|
||||
s, ok := active[ev.modelID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
delete(active, ev.modelID)
|
||||
|
||||
for _, w := range s.waiters {
|
||||
if ev.err != nil {
|
||||
b.grant(w, handlerResp{err: ev.err})
|
||||
} else {
|
||||
p := b.processes[ev.modelID]
|
||||
b.grantHandler(w, ev.modelID, p, inFlight)
|
||||
}
|
||||
}
|
||||
|
||||
b.drainQueue(active, inFlight, queued)
|
||||
}
|
||||
|
||||
// handleServeDone is called from run() each time a tracked ServeHTTP
|
||||
// finishes. It decrements the per-model in-flight count and, when that
|
||||
// drops to zero, retries the queue: requests whose swap was deferred
|
||||
// because they would have evicted this (now-idle) process can now proceed.
|
||||
func (b *baseRouter) handleServeDone(ev serveDoneEvent, active map[string]*activeSwap, inFlight map[string]int, queued *[]handlerReq) {
|
||||
inFlight[ev.modelID]--
|
||||
if inFlight[ev.modelID] <= 0 {
|
||||
delete(inFlight, ev.modelID)
|
||||
b.drainQueue(active, inFlight, queued)
|
||||
}
|
||||
}
|
||||
|
||||
// drainQueue walks the queued requests in order, re-running the handleRequest
|
||||
// decision tree against the (now smaller) active set. Items that can now start
|
||||
// or join become satisfied; items still blocked remain queued in original
|
||||
// order so they get another chance on the next swap completion.
|
||||
func (b *baseRouter) drainQueue(active map[string]*activeSwap, inFlight map[string]int, queued *[]handlerReq) {
|
||||
if len(*queued) == 0 {
|
||||
return
|
||||
}
|
||||
pending := *queued
|
||||
var remaining []handlerReq
|
||||
for _, req := range pending {
|
||||
p, ok := b.processes[req.model]
|
||||
if !ok {
|
||||
b.grant(req, handlerResp{err: ErrNoLocalModelFound})
|
||||
continue
|
||||
}
|
||||
if s, ok := active[req.model]; ok {
|
||||
b.logger.Debugf("%s: queued request for model %s now joining in-flight swap", b.name, req.model)
|
||||
s.waiters = append(s.waiters, req)
|
||||
continue
|
||||
}
|
||||
evict := b.planner.EvictionFor(req.model, activeTargets(active, req.model))
|
||||
if p.State() == process.StateReady && len(evict) == 0 && !collidesWith(req.model, evict, active) {
|
||||
b.logger.Debugf("%s: queued request for model %s now served fast-path", b.name, req.model)
|
||||
b.grantHandler(req, req.model, p, inFlight)
|
||||
continue
|
||||
}
|
||||
if collidesWith(req.model, evict, active) {
|
||||
remaining = append(remaining, req)
|
||||
continue
|
||||
}
|
||||
if conflictsWithInFlight(evict, inFlight) {
|
||||
remaining = append(remaining, req)
|
||||
continue
|
||||
}
|
||||
b.logger.Debugf("%s: queued request for model %s now starting swap, evicting %v", b.name, req.model, evict)
|
||||
s := b.startSwap(req, evict)
|
||||
active[s.modelID] = s
|
||||
}
|
||||
*queued = remaining
|
||||
b.broadcastQueuePositions(*queued)
|
||||
}
|
||||
|
||||
// broadcastQueuePositions sends each queued request its current 1-indexed
|
||||
// position. Sends are non-blocking: if the channel is full, the old value is
|
||||
// drained first so the consumer always sees the latest position.
|
||||
func (b *baseRouter) broadcastQueuePositions(queued []handlerReq) {
|
||||
for i, req := range queued {
|
||||
pos := i + 1
|
||||
select {
|
||||
case req.positionCh <- pos:
|
||||
default:
|
||||
select {
|
||||
case <-req.positionCh:
|
||||
default:
|
||||
}
|
||||
select {
|
||||
case req.positionCh <- pos:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *baseRouter) startSwap(initial handlerReq, evict []string) *activeSwap {
|
||||
swap := &activeSwap{
|
||||
modelID: initial.model,
|
||||
evict: evict,
|
||||
waiters: []handlerReq{initial},
|
||||
}
|
||||
b.planner.OnSwapStart(initial.model)
|
||||
go b.doSwap(initial.model, evict)
|
||||
return swap
|
||||
}
|
||||
|
||||
// activeTargets returns the IDs of every in-flight swap target except exclude.
|
||||
// baseRouter passes this to the planner so eviction decisions account for
|
||||
// models that have been committed to but have not yet transitioned to
|
||||
// StateStarting in their process state machine.
|
||||
func activeTargets(active map[string]*activeSwap, exclude string) []string {
|
||||
if len(active) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(active))
|
||||
for id := range active {
|
||||
if id == exclude {
|
||||
continue
|
||||
}
|
||||
out = append(out, id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// collidesWith reports whether a new swap with this target and evict set can
|
||||
// safely run alongside the currently active swaps. Same-target callers should
|
||||
// JOIN (handled before this) — they do not collide with themselves.
|
||||
func collidesWith(target string, evict []string, active map[string]*activeSwap) bool {
|
||||
for id, s := range active {
|
||||
if id == target {
|
||||
continue
|
||||
}
|
||||
if containsString(evict, id) {
|
||||
return true
|
||||
}
|
||||
if containsString(s.evict, target) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// conflictsWithInFlight reports whether any model in evict is still handling
|
||||
// requests. Stopping a busy process would cancel its callers' connections,
|
||||
// so the router defers the swap until those callers finish.
|
||||
func conflictsWithInFlight(evict []string, inFlight map[string]int) bool {
|
||||
for _, m := range evict {
|
||||
if inFlight[m] > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containsString(xs []string, s string) bool {
|
||||
for _, x := range xs {
|
||||
if x == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (b *baseRouter) doSwap(modelID string, toStop []string) {
|
||||
timeout := b.healthCheckTimeout()
|
||||
|
||||
@@ -493,31 +266,24 @@ func (b *baseRouter) doSwap(modelID string, toStop []string) {
|
||||
err := target.WaitReady(b.shutdownCtx)
|
||||
|
||||
select {
|
||||
case b.swapDoneCh <- swapDone{modelID: modelID, err: err}:
|
||||
case b.swapDoneCh <- scheduler.SwapDone{ModelID: modelID, Err: err}:
|
||||
case <-b.shutdownCtx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
func (b *baseRouter) handleShutdown(req shutdownReq, active map[string]*activeSwap, queued []handlerReq) {
|
||||
func (b *baseRouter) handleShutdown(req shutdownReq) {
|
||||
shutdownErr := fmt.Errorf("%s is shutting down", b.name)
|
||||
|
||||
// Cancel shutdownCtx first so any waiter that is currently parked on
|
||||
// its respond channel can exit via its own shutdownCtx.Done() branch.
|
||||
// The grant calls below then either land (waiter happened to receive
|
||||
// The OnShutdown grants below then either land (waiter happened to receive
|
||||
// before noticing shutdown) or fall through immediately via grant's
|
||||
// shutdownCtx case — either way the waiter sees a non-OK response.
|
||||
// This does NOT touch processes: their lifetime is procCtx, cancelled
|
||||
// only after the graceful Stop() calls below have reaped them.
|
||||
b.shutdownFn()
|
||||
|
||||
for _, s := range active {
|
||||
for _, w := range s.waiters {
|
||||
b.grant(w, handlerResp{err: shutdownErr})
|
||||
}
|
||||
}
|
||||
for _, w := range queued {
|
||||
b.grant(w, handlerResp{err: shutdownErr})
|
||||
}
|
||||
b.schedule.OnShutdown(shutdownErr)
|
||||
|
||||
stopTimeout := req.timeout
|
||||
if stopTimeout <= 0 {
|
||||
@@ -628,75 +394,6 @@ func (b *baseRouter) Unload(timeout time.Duration, models ...string) {
|
||||
<-req.respond
|
||||
}
|
||||
|
||||
// handleUnload runs on the run loop in response to an Unload call. It
|
||||
// reconciles router-owned state with the impending Stop, then performs
|
||||
// the Stop synchronously so callers of Unload remain blocked until each
|
||||
// targeted process has actually exited.
|
||||
func (b *baseRouter) handleUnload(req unloadReq, active map[string]*activeSwap, inFlight map[string]int, queued *[]handlerReq) {
|
||||
unloadErr := fmt.Errorf("%s: model unloaded", b.name)
|
||||
|
||||
targetSet := make(map[string]bool, len(req.targets))
|
||||
for _, id := range req.targets {
|
||||
targetSet[id] = true
|
||||
}
|
||||
|
||||
// Release waiters of any in-flight swap whose target is being
|
||||
// unloaded. The swap goroutine itself is left to finish on its own;
|
||||
// when its swapDone arrives, handleSwapDone will find no entry in
|
||||
// active and silently drop it.
|
||||
for id := range targetSet {
|
||||
s, ok := active[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, w := range s.waiters {
|
||||
b.grant(w, handlerResp{err: unloadErr})
|
||||
}
|
||||
delete(active, id)
|
||||
}
|
||||
|
||||
// Drop queued requests addressed to unloaded models. Requests for
|
||||
// other models stay queued and may benefit from drainQueue at the end.
|
||||
if len(*queued) > 0 {
|
||||
kept := (*queued)[:0]
|
||||
for _, w := range *queued {
|
||||
if targetSet[w.model] {
|
||||
b.grant(w, handlerResp{err: unloadErr})
|
||||
continue
|
||||
}
|
||||
kept = append(kept, w)
|
||||
}
|
||||
*queued = kept
|
||||
}
|
||||
|
||||
// Stop the targeted processes. Done synchronously so Unload's caller
|
||||
// can rely on "after Unload returns, the process is stopped". inFlight
|
||||
// is intentionally NOT cleared here: each dying handler will fire its
|
||||
// trackedServe defer and reach handleServeDone in the normal way once
|
||||
// the run loop is free again.
|
||||
var wg sync.WaitGroup
|
||||
for id := range targetSet {
|
||||
p, ok := b.processes[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(id string, p process.Process) {
|
||||
defer wg.Done()
|
||||
if err := p.Stop(req.timeout); err != nil {
|
||||
b.logger.Warnf("%s: unloading %s failed: %v", b.name, id, err)
|
||||
}
|
||||
}(id, p)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Removing entries from active above may have unblocked queued
|
||||
// requests that previously collided with the now-cancelled swaps.
|
||||
b.drainQueue(active, inFlight, queued)
|
||||
|
||||
close(req.respond)
|
||||
}
|
||||
|
||||
func (b *baseRouter) Shutdown(timeout time.Duration) error {
|
||||
if !b.shuttingDown.CompareAndSwap(false, true) {
|
||||
return fmt.Errorf("%s shutdown already in progress", b.name)
|
||||
@@ -712,24 +409,24 @@ func (b *baseRouter) Shutdown(timeout time.Duration) error {
|
||||
|
||||
func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
if b.shuttingDown.Load() {
|
||||
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
return
|
||||
}
|
||||
|
||||
data, err := FetchContext(req, b.config)
|
||||
data, err := shared.FetchContext(req, b.config)
|
||||
if err != nil {
|
||||
SendError(w, req, err)
|
||||
shared.SendError(w, req, err)
|
||||
return
|
||||
}
|
||||
|
||||
hr := handlerReq{
|
||||
model: data.ModelID,
|
||||
ctx: req.Context(),
|
||||
// Unbuffered: a successful send on respond proves the waiter is
|
||||
hr := scheduler.HandlerReq{
|
||||
Model: data.ModelID,
|
||||
Ctx: req.Context(),
|
||||
// Unbuffered: a successful send on Respond proves the waiter is
|
||||
// alive and consuming. grant() relies on this to avoid handing a
|
||||
// handleFunc to a cancelled waiter and leaking the inFlight count.
|
||||
respond: make(chan handlerResp),
|
||||
positionCh: make(chan int, 1),
|
||||
Respond: make(chan scheduler.HandlerResp),
|
||||
PositionCh: make(chan int, 1),
|
||||
}
|
||||
|
||||
select {
|
||||
@@ -737,7 +434,7 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
case <-req.Context().Done():
|
||||
return
|
||||
case <-b.shutdownCtx.Done():
|
||||
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -757,7 +454,7 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case pos := <-hr.positionCh:
|
||||
case pos := <-hr.PositionCh:
|
||||
lw.setUpdate(fmt.Sprintf("Queue position: #%d", pos))
|
||||
case <-swapCtx.Done():
|
||||
return
|
||||
@@ -779,22 +476,30 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
var resp handlerResp
|
||||
var resp scheduler.HandlerResp
|
||||
select {
|
||||
case resp = <-hr.respond:
|
||||
case resp = <-hr.Respond:
|
||||
finishLoading()
|
||||
case <-req.Context().Done():
|
||||
finishLoading()
|
||||
// Notify the scheduler so it can prune this request from its queue
|
||||
// and swap waiters. Without this, a queued request whose client left
|
||||
// would sit in the scheduler until drainQueue eventually starts a
|
||||
// wasted model load for it.
|
||||
select {
|
||||
case b.cancelCh <- hr:
|
||||
case <-b.shutdownCtx.Done():
|
||||
}
|
||||
return
|
||||
case <-b.shutdownCtx.Done():
|
||||
finishLoading()
|
||||
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
|
||||
return
|
||||
}
|
||||
|
||||
if resp.err != nil {
|
||||
SendError(w, req, resp.err)
|
||||
if resp.Err != nil {
|
||||
shared.SendError(w, req, resp.Err)
|
||||
return
|
||||
}
|
||||
resp.handleFunc(w, req)
|
||||
resp.HandleFunc(w, req)
|
||||
}
|
||||
|
||||
+15
-614
@@ -5,35 +5,34 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
|
||||
)
|
||||
|
||||
// stubPlanner is a swapPlanner that returns a fixed eviction list per target
|
||||
// and never logs. It lets the base-router tests cover shared run-loop
|
||||
// behaviour without dragging in either real router's eviction rules.
|
||||
type stubPlanner struct {
|
||||
evict map[string][]string
|
||||
}
|
||||
// These tests cover baseRouter's own machinery — the run loop, process
|
||||
// lifecycle (doSwap), grant/ServeHTTP plumbing, Unload, and Shutdown. The
|
||||
// scheduling decision logic (queueing, collation, eviction collisions) lives in
|
||||
// the scheduler package and is tested directly there; see fifo_test.go.
|
||||
|
||||
func (s *stubPlanner) EvictionFor(target string, _ []string) []string {
|
||||
if s.evict == nil {
|
||||
return nil
|
||||
}
|
||||
return s.evict[target]
|
||||
}
|
||||
// stubPlanner evicts nothing. baseRouter tests drive the run loop through the
|
||||
// default FIFO scheduler without exercising any particular eviction policy.
|
||||
type stubPlanner struct{}
|
||||
|
||||
func (s *stubPlanner) OnSwapStart(string) {}
|
||||
func (s *stubPlanner) EvictionFor(string, []string) []string { return nil }
|
||||
func (s *stubPlanner) OnSwapStart(string, []string) {}
|
||||
|
||||
func newTestBase(t *testing.T, processes map[string]process.Process, planner swapPlanner) *baseRouter {
|
||||
func newTestBase(t *testing.T, processes map[string]process.Process, planner scheduler.Swapper) *baseRouter {
|
||||
t.Helper()
|
||||
conf := config.Config{HealthCheckTimeout: 5}
|
||||
b := newBaseRouter("test", conf, processes, planner, logmon.NewWriter(io.Discard))
|
||||
b, err := newBaseRouter("test", conf, processes, logmon.NewWriter(io.Discard), planner)
|
||||
if err != nil {
|
||||
t.Fatalf("newBaseRouter: %v", err)
|
||||
}
|
||||
b.testProcessed = make(chan struct{}, 64)
|
||||
go b.run()
|
||||
t.Cleanup(func() {
|
||||
@@ -157,114 +156,6 @@ func TestBaseRouter_Unload_StopsInParallel(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_Unload_ReleasesActiveSwapWaiters verifies that Unload
|
||||
// rejoins router state: a request whose swap to the unloaded model is
|
||||
// still in progress receives an error, instead of being abandoned
|
||||
// against a process that's about to vanish.
|
||||
func TestBaseRouter_Unload_ReleasesActiveSwapWaiters(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
// autoReady=false: the swap parks on WaitReady so we can interrupt
|
||||
// it with Unload before it completes.
|
||||
|
||||
b := newTestBase(t, map[string]process.Process{"a": a}, &stubPlanner{})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w, newRequest("a"))
|
||||
close(done)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1) // handlerReq absorbed; swap started
|
||||
<-a.runStarted
|
||||
|
||||
b.Unload(time.Second, "a")
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("ServeHTTP did not return after Unload")
|
||||
}
|
||||
if w.Code == http.StatusOK {
|
||||
t.Errorf("expected non-OK status after Unload, got %d body=%q", w.Code, w.Body.String())
|
||||
}
|
||||
if a.State() != process.StateStopped {
|
||||
t.Errorf("a state=%q want stopped", a.State())
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_Unload_DropsQueuedRequests verifies that queued requests
|
||||
// for an unloaded model receive an error rather than sitting forever in
|
||||
// the queue against state the router no longer maintains.
|
||||
func TestBaseRouter_Unload_DropsQueuedRequests(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
// Loading B evicts A — so a request for B while A is loading queues.
|
||||
planner := &stubPlanner{evict: map[string][]string{"b": {"a"}}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb}, planner)
|
||||
|
||||
// r1 starts the swap to A and parks on WaitReady (autoReady=false).
|
||||
w1 := httptest.NewRecorder()
|
||||
done1 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w1, newRequest("a"))
|
||||
close(done1)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
<-a.runStarted
|
||||
|
||||
// r2 for B collides with A's in-flight swap and queues.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w2, newRequest("b"))
|
||||
close(done2)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
// Unload B — r2 (queued, targeting B) must be released with an error.
|
||||
b.Unload(time.Second, "b")
|
||||
|
||||
select {
|
||||
case <-done2:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("queued B request did not return after Unload(b)")
|
||||
}
|
||||
if w2.Code == http.StatusOK {
|
||||
t.Errorf("queued B request: expected non-OK status, got %d", w2.Code)
|
||||
}
|
||||
if got := pb.runCalls.Load(); got != 0 {
|
||||
t.Errorf("b.runCalls=%d want 0 (B should never have been started)", got)
|
||||
}
|
||||
|
||||
// Release r1 so the test cleans up cleanly.
|
||||
a.markReady()
|
||||
select {
|
||||
case <-done1:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("r1 did not complete after a.markReady")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_FastPath(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
a.markReady()
|
||||
|
||||
b := newTestBase(t, map[string]process.Process{"a": a}, &stubPlanner{})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
b.ServeHTTP(w, newRequest("a"))
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%q", w.Code, w.Body.String())
|
||||
}
|
||||
if got := a.serveCalls.Load(); got != 1 {
|
||||
t.Errorf("serveCalls=%d want 1", got)
|
||||
}
|
||||
if got := a.runCalls.Load(); got != 0 {
|
||||
t.Errorf("runCalls=%d want 0 (fast path should not start)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_OnDemandStart(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
a.autoReady = true
|
||||
@@ -285,43 +176,6 @@ func TestBaseRouter_OnDemandStart(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_ConcurrentSameModel(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
// autoReady=false so the swap parks on WaitReady until we release it.
|
||||
|
||||
b := newTestBase(t, map[string]process.Process{"a": a}, &stubPlanner{})
|
||||
|
||||
const N = 5
|
||||
var wg sync.WaitGroup
|
||||
codes := make([]int, N)
|
||||
for i := 0; i < N; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
w := httptest.NewRecorder()
|
||||
b.ServeHTTP(w, newRequest("a"))
|
||||
codes[i] = w.Code
|
||||
}(i)
|
||||
}
|
||||
|
||||
waitProcessed(t, b.testProcessed, N) // all N handlerReqs absorbed by run()
|
||||
<-a.runStarted // swap goroutine reached Run()
|
||||
a.markReady()
|
||||
wg.Wait()
|
||||
|
||||
for i, c := range codes {
|
||||
if c != http.StatusOK {
|
||||
t.Errorf("request %d: status=%d", i, c)
|
||||
}
|
||||
}
|
||||
if got := a.runCalls.Load(); got != 1 {
|
||||
t.Errorf("runCalls=%d want 1 (single swap should issue one Run)", got)
|
||||
}
|
||||
if got := a.serveCalls.Load(); got != N {
|
||||
t.Errorf("serveCalls=%d want %d", got, N)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_ContextCancel(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
// autoReady=false so swap parks forever until we mark ready.
|
||||
@@ -364,459 +218,6 @@ func TestBaseRouter_ContextCancel(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_QueuedDifferentModel(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pa := newFakeProcess("b")
|
||||
|
||||
// Loading b must stop a.
|
||||
planner := &stubPlanner{evict: map[string][]string{"b": {"a"}}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pa}, planner)
|
||||
|
||||
// First request starts a swap to A; A's autoReady=false so it parks.
|
||||
w1 := httptest.NewRecorder()
|
||||
done1 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w1, newRequest("a"))
|
||||
close(done1)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
<-a.runStarted
|
||||
|
||||
// Second request for B should queue while A's swap is in flight.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w2, newRequest("b"))
|
||||
close(done2)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
if got := pa.runCalls.Load(); got != 0 {
|
||||
t.Errorf("b started early: runCalls=%d want 0 while A's swap is pending", got)
|
||||
}
|
||||
|
||||
// Release A's swap. B's swap should then run.
|
||||
a.markReady()
|
||||
waitProcessed(t, b.testProcessed, 1) // swapDone for A → B's swap kicked off
|
||||
<-pa.runStarted
|
||||
|
||||
select {
|
||||
case <-done1:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("A request did not complete")
|
||||
}
|
||||
pa.markReady()
|
||||
select {
|
||||
case <-done2:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("queued B request did not complete after A's swap")
|
||||
}
|
||||
if w2.Code != http.StatusOK {
|
||||
t.Errorf("B status=%d body=%q", w2.Code, w2.Body.String())
|
||||
}
|
||||
if got := a.stopCalls.Load(); got != 1 {
|
||||
t.Errorf("a.stopCalls=%d want 1 (B's swap must stop A)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_QueueCollation verifies that incoming requests of the form
|
||||
// a, b, c, a, b, c collapse into three swaps (one per model) and that the
|
||||
// second request for each model rides the fast path — either joining the
|
||||
// active swap, or being pulled out of the queue when handleSwapDone promotes
|
||||
// the next model.
|
||||
func TestBaseRouter_QueueCollation(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
pc := newFakeProcess("c")
|
||||
|
||||
// Each model evicts the other two so all swaps are mutually exclusive.
|
||||
planner := &stubPlanner{evict: map[string][]string{
|
||||
"a": {"b", "c"},
|
||||
"b": {"a", "c"},
|
||||
"c": {"a", "b"},
|
||||
}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb, "c": pc}, planner)
|
||||
|
||||
var (
|
||||
completedMu sync.Mutex
|
||||
completed []string
|
||||
)
|
||||
record := func(id string) {
|
||||
completedMu.Lock()
|
||||
defer completedMu.Unlock()
|
||||
completed = append(completed, id)
|
||||
}
|
||||
|
||||
ids := []string{"a", "b", "c", "a", "b", "c"}
|
||||
var wg sync.WaitGroup
|
||||
for _, id := range ids {
|
||||
id := id
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
w := httptest.NewRecorder()
|
||||
b.ServeHTTP(w, newRequest(id))
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("%s: status=%d body=%q", id, w.Code, w.Body.String())
|
||||
return
|
||||
}
|
||||
record(id)
|
||||
}()
|
||||
// Wait for run() to absorb this request before launching the next,
|
||||
// so handlerCh receives them in launch order.
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
}
|
||||
|
||||
// All 6 are now parked in run()'s waiters/queue. Release each swap in
|
||||
// sequence, waiting deterministically for each promotion to fire.
|
||||
<-a.runStarted
|
||||
a.markReady()
|
||||
waitProcessed(t, b.testProcessed, 1) // swapDone(a) → b swap kicked off
|
||||
|
||||
<-pb.runStarted
|
||||
pb.markReady()
|
||||
waitProcessed(t, b.testProcessed, 1) // swapDone(b) → c swap kicked off
|
||||
|
||||
<-pc.runStarted
|
||||
pc.markReady()
|
||||
wg.Wait()
|
||||
|
||||
if got := len(completed); got != 6 {
|
||||
t.Fatalf("completed=%v want 6", completed)
|
||||
}
|
||||
|
||||
// run() fans out responses in model-grouped order (a1,a2 → b1,b2 → c1,c2)
|
||||
// but waiter goroutines may be scheduled in any order after their respond
|
||||
// channel fires, so completion order isn't deterministic. Per-model counts
|
||||
// (combined with the runCalls checks below) are sufficient to prove queue
|
||||
// collation collapsed each pair into a single swap.
|
||||
aDone, bDone, cDone := 0, 0, 0
|
||||
for _, id := range completed {
|
||||
switch id {
|
||||
case "a":
|
||||
aDone++
|
||||
case "b":
|
||||
bDone++
|
||||
case "c":
|
||||
cDone++
|
||||
}
|
||||
}
|
||||
if aDone != 2 || bDone != 2 || cDone != 2 {
|
||||
t.Errorf("per-model counts: a=%d b=%d c=%d, want 2 each (order=%v)", aDone, bDone, cDone, completed)
|
||||
}
|
||||
|
||||
// Single swap per model — the second request for each must have ridden
|
||||
// the fast path (joined active swap or joined a queued sibling), not
|
||||
// triggered an extra Run.
|
||||
if got := a.runCalls.Load(); got != 1 {
|
||||
t.Errorf("a.runCalls=%d want 1", got)
|
||||
}
|
||||
if got := pb.runCalls.Load(); got != 1 {
|
||||
t.Errorf("b.runCalls=%d want 1", got)
|
||||
}
|
||||
if got := pc.runCalls.Load(); got != 1 {
|
||||
t.Errorf("c.runCalls=%d want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_ConcurrentDisjointSwaps verifies that two requests with
|
||||
// non-conflicting evict sets are loaded in parallel: both Run() calls happen
|
||||
// before either process is marked ready.
|
||||
func TestBaseRouter_ConcurrentDisjointSwaps(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
|
||||
// Empty evict sets for both: they can load in parallel.
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb}, &stubPlanner{})
|
||||
|
||||
w1 := httptest.NewRecorder()
|
||||
done1 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w1, newRequest("a"))
|
||||
close(done1)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w2, newRequest("b"))
|
||||
close(done2)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
// Both swaps must have reached Run() before either is marked ready —
|
||||
// proves they ran in parallel rather than serializing.
|
||||
<-a.runStarted
|
||||
<-pb.runStarted
|
||||
|
||||
a.markReady()
|
||||
pb.markReady()
|
||||
|
||||
select {
|
||||
case <-done1:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("request A did not complete")
|
||||
}
|
||||
select {
|
||||
case <-done2:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("request B did not complete")
|
||||
}
|
||||
|
||||
if w1.Code != http.StatusOK {
|
||||
t.Errorf("A status=%d body=%q", w1.Code, w1.Body.String())
|
||||
}
|
||||
if w2.Code != http.StatusOK {
|
||||
t.Errorf("B status=%d body=%q", w2.Code, w2.Body.String())
|
||||
}
|
||||
if got := a.stopCalls.Load(); got != 0 {
|
||||
t.Errorf("a.stopCalls=%d want 0 (parallel swap, no eviction)", got)
|
||||
}
|
||||
if got := pb.stopCalls.Load(); got != 0 {
|
||||
t.Errorf("b.stopCalls=%d want 0 (parallel swap, no eviction)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_QueueDrainPromotesMultiple verifies that completing one swap
|
||||
// unblocks every queued request that no longer collides — they all start in
|
||||
// parallel rather than one-per-completion.
|
||||
func TestBaseRouter_QueueDrainPromotesMultiple(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
pc := newFakeProcess("c")
|
||||
|
||||
// A's swap evicts both B and C, so B and C must queue. Once A finishes
|
||||
// B and C themselves have empty evict sets, so they can start together.
|
||||
planner := &stubPlanner{evict: map[string][]string{
|
||||
"a": {"b", "c"},
|
||||
}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb, "c": pc}, planner)
|
||||
|
||||
w1 := httptest.NewRecorder()
|
||||
done1 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w1, newRequest("a"))
|
||||
close(done1)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
<-a.runStarted
|
||||
|
||||
// B and C arrive while A is loading. evict_b and evict_c are empty,
|
||||
// but collidesWith returns true because they appear in A's evict set.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w2, newRequest("b"))
|
||||
close(done2)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
w3 := httptest.NewRecorder()
|
||||
done3 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w3, newRequest("c"))
|
||||
close(done3)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
if got := pb.runCalls.Load(); got != 0 {
|
||||
t.Errorf("b started early: runCalls=%d", got)
|
||||
}
|
||||
if got := pc.runCalls.Load(); got != 0 {
|
||||
t.Errorf("c started early: runCalls=%d", got)
|
||||
}
|
||||
|
||||
// Release A. The swapDone handler should drain the queue and start
|
||||
// both B and C in parallel.
|
||||
a.markReady()
|
||||
waitProcessed(t, b.testProcessed, 1) // swapDone(A) → drainQueue starts B and C
|
||||
<-pb.runStarted
|
||||
<-pc.runStarted
|
||||
|
||||
pb.markReady()
|
||||
pc.markReady()
|
||||
|
||||
for i, ch := range []chan struct{}{done1, done2, done3} {
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("request %d did not complete", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_Shutdown_FailsAllInFlight verifies that shutdown returns
|
||||
// the shutdown error to every waiter on every active swap AND to every
|
||||
// queued request.
|
||||
func TestBaseRouter_Shutdown_FailsAllInFlight(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
pc := newFakeProcess("c")
|
||||
|
||||
// a and b load in parallel (empty evicts). c collides with both.
|
||||
planner := &stubPlanner{evict: map[string][]string{
|
||||
"c": {"a", "b"},
|
||||
}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb, "c": pc}, planner)
|
||||
|
||||
const waitersPer = 2
|
||||
var wg sync.WaitGroup
|
||||
codes := make([]int, 0, 2*waitersPer+1)
|
||||
var codesMu sync.Mutex
|
||||
record := func(code int) {
|
||||
codesMu.Lock()
|
||||
codes = append(codes, code)
|
||||
codesMu.Unlock()
|
||||
}
|
||||
|
||||
launch := func(model string) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
w := httptest.NewRecorder()
|
||||
b.ServeHTTP(w, newRequest(model))
|
||||
record(w.Code)
|
||||
}()
|
||||
}
|
||||
|
||||
// Active swaps for a and b, each with 2 waiters.
|
||||
for i := 0; i < waitersPer; i++ {
|
||||
launch("a")
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
}
|
||||
for i := 0; i < waitersPer; i++ {
|
||||
launch("b")
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
}
|
||||
// c collides with both → queues.
|
||||
launch("c")
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
<-a.runStarted
|
||||
<-pb.runStarted
|
||||
|
||||
if err := b.Shutdown(time.Second); err != nil {
|
||||
t.Fatalf("Shutdown: %v", err)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
codesMu.Lock()
|
||||
defer codesMu.Unlock()
|
||||
if len(codes) != 2*waitersPer+1 {
|
||||
t.Fatalf("got %d responses, want %d", len(codes), 2*waitersPer+1)
|
||||
}
|
||||
for i, c := range codes {
|
||||
if c == http.StatusOK {
|
||||
t.Errorf("response %d: status=%d, want non-200 (shutdown)", i, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestBaseRouter_NoSwapWhileServing verifies that an already-loaded model
|
||||
// is not stopped to satisfy another model's swap while it is still handling
|
||||
// a request.
|
||||
//
|
||||
// Sequence:
|
||||
// 1. r1 (A) — A loads; ServeHTTP enters and is pinned via serveBlock.
|
||||
// 2. r2 (B, planner: B evicts A) — must NOT cause A.Stop while r1 is live.
|
||||
// 3. r3 (A) — arrives next; the existing code queues it because B's swap
|
||||
// intent collides with A.
|
||||
// 4. r1 released — A finishes r1, then r3 is served by A.
|
||||
// 5. B's swap then proceeds; r2 is served by B.
|
||||
//
|
||||
// fakeProcess.stoppedWhileServing flips true if Stop is ever called while
|
||||
// a ServeHTTP is in flight — a direct, race-free signal of the violation.
|
||||
func TestBaseRouter_NoSwapWhileServing(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
// autoReady left false: we markReady manually after observing runStarted,
|
||||
// so autoReady's setState(Ready) cannot race with a later Stop and leave
|
||||
// A in Ready, masking the bug.
|
||||
a.serveBlock = make(chan struct{})
|
||||
pb := newFakeProcess("b")
|
||||
// Same reasoning for B: park its swap on WaitReady until we choose.
|
||||
|
||||
planner := &stubPlanner{evict: map[string][]string{"b": {"a"}}}
|
||||
b := newTestBase(t, map[string]process.Process{"a": a, "b": pb}, planner)
|
||||
|
||||
// r1 — load A and enter its ServeHTTP (which blocks on serveBlock).
|
||||
w1 := httptest.NewRecorder()
|
||||
done1 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w1, newRequest("a"))
|
||||
close(done1)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1) // handlerReq for r1
|
||||
<-a.runStarted
|
||||
a.markReady()
|
||||
waitProcessed(t, b.testProcessed, 1) // swapDone for A
|
||||
<-a.serveStarted
|
||||
|
||||
// r2 — would evict A. A must not be stopped while r1 is in flight.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w2, newRequest("b"))
|
||||
close(done2)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
// r3 — another request for A, arrives behind r2 and queues because
|
||||
// B's swap intent (which evicts A) is recorded as active.
|
||||
w3 := httptest.NewRecorder()
|
||||
done3 := make(chan struct{})
|
||||
go func() {
|
||||
b.ServeHTTP(w3, newRequest("a"))
|
||||
close(done3)
|
||||
}()
|
||||
waitProcessed(t, b.testProcessed, 1)
|
||||
|
||||
// Release r1 (and r3 if it is fast-pathed onto the still-loaded A).
|
||||
// The router must hold off B's swap until A has drained.
|
||||
close(a.serveBlock)
|
||||
|
||||
select {
|
||||
case <-done1:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("r1 did not complete after serveBlock release")
|
||||
}
|
||||
|
||||
// Wait for B.Run before marking it ready: markReady before Run would
|
||||
// skip the Run path entirely and leave pb.runCalls at 0. In a correct
|
||||
// implementation B's swap only starts after A has drained; in the
|
||||
// current implementation it has already started — either way runStarted
|
||||
// fires.
|
||||
<-pb.runStarted
|
||||
pb.markReady()
|
||||
|
||||
select {
|
||||
case <-done2:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("r2 did not complete after B marked ready")
|
||||
}
|
||||
select {
|
||||
case <-done3:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("r3 did not complete")
|
||||
}
|
||||
|
||||
if w1.Code != http.StatusOK || w2.Code != http.StatusOK || w3.Code != http.StatusOK {
|
||||
t.Fatalf("statuses: w1=%d w2=%d w3=%d", w1.Code, w2.Code, w3.Code)
|
||||
}
|
||||
if w1.Body.String() != "ok:a" {
|
||||
t.Errorf("r1 body=%q want ok:a", w1.Body.String())
|
||||
}
|
||||
if w3.Body.String() != "ok:a" {
|
||||
t.Errorf("r3 body=%q want ok:a (r3 must be served by A)", w3.Body.String())
|
||||
}
|
||||
if w2.Body.String() != "ok:b" {
|
||||
t.Errorf("r2 body=%q want ok:b", w2.Body.String())
|
||||
}
|
||||
if a.stoppedWhileServing.Load() {
|
||||
t.Errorf("A.Stop was called while A was still handling a request — the router swapped out a busy process")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseRouter_ModelNotFound(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
b := newTestBase(t, map[string]process.Process{"a": a}, &stubPlanner{})
|
||||
|
||||
@@ -0,0 +1,404 @@
|
||||
# Router design
|
||||
|
||||
A developer tutorial for the `internal/router` package and its `scheduler`
|
||||
sub-package.
|
||||
|
||||
## Intro
|
||||
|
||||
A llama-swap router is the component that sits behind the proxy and answers one
|
||||
question for every incoming request: _can this model serve right now, and if
|
||||
not, what has to happen first?_ Answering it means juggling three concerns that
|
||||
used to live tangled together in one type:
|
||||
|
||||
1. **Process machinery** — owning the OS processes, starting and stopping them,
|
||||
running health checks, and shuttling HTTP requests onto the right upstream.
|
||||
2. **Scheduling strategy** — the queue, in-flight bookkeeping, and the decision
|
||||
tree that turns one request into "serve now", "join an existing swap",
|
||||
"queue", or "start a swap".
|
||||
3. **Eviction policy** — given a model we want to load, which currently-running
|
||||
models have to be stopped to make room?
|
||||
|
||||
The design pulls those three apart into separate, independently replaceable
|
||||
pieces:
|
||||
|
||||
| Concern | Type | Lives in |
|
||||
| ------------------- | ------------------------------ | ------------------------------- |
|
||||
| Process machinery | `baseRouter` | `internal/router/base.go` |
|
||||
| Scheduling strategy | `scheduler.Scheduler` (`FIFO`) | `internal/router/scheduler/` |
|
||||
| Eviction policy | `scheduler.Swapper` | `groupSwapper`, `matrixSwapper` |
|
||||
|
||||
`baseRouter` keeps the channels, run loop, process lifecycle, and shutdown
|
||||
teardown, and exposes the side-effects a scheduler needs through the
|
||||
`scheduler.Effects` interface. The scheduler owns the queue and decision tree
|
||||
but performs no side-effects directly — it calls back through `Effects`. The
|
||||
`Swapper` is a pure function from "target model + currently running" to "models
|
||||
to evict", and knows nothing about queues, channels, or processes.
|
||||
|
||||
Because the seams are interfaces, you can replace the scheduling strategy
|
||||
without touching process management, or write a new eviction policy without
|
||||
touching either. `FIFO` is the first and currently only `Scheduler`;
|
||||
`groupSwapper` and `matrixSwapper` are the two `Swapper`s.
|
||||
|
||||
## Key concepts
|
||||
|
||||
### One run loop, no locks
|
||||
|
||||
`baseRouter.run()` is a single goroutine selecting over a handful of channels:
|
||||
|
||||
```go
|
||||
for {
|
||||
select {
|
||||
case req := <-b.shutdownCh: b.handleShutdown(req); return
|
||||
case req := <-b.handlerCh: b.schedule.OnRequest(req)
|
||||
case req := <-b.unloadCh: b.schedule.OnUnload(req.targets, req.timeout); close(req.respond)
|
||||
case ev := <-b.swapDoneCh: b.schedule.OnSwapDone(ev)
|
||||
case ev := <-b.serveDoneCh: b.schedule.OnServeDone(ev)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Every `Scheduler` method runs on this one goroutine. That is the single most
|
||||
important fact about the design: **the scheduler never needs a mutex for its own
|
||||
state**. All scheduler state is touched only from these callbacks, which are
|
||||
serialized by the run loop. If you write a new scheduler, you get the same
|
||||
guarantee for free — and you must not break it by spinning up goroutines that
|
||||
mutate scheduler state.
|
||||
|
||||
### Events flow in, side-effects flow out
|
||||
|
||||
The run loop turns external happenings into method calls on the scheduler:
|
||||
|
||||
- A new HTTP request becomes `OnRequest(HandlerReq)`.
|
||||
- A swap goroutine finishing becomes `OnSwapDone(SwapDone)`.
|
||||
- A tracked request handler returning becomes `OnServeDone(ServeDoneEvent)`.
|
||||
- An admin unload becomes `OnUnload(targets, timeout)`.
|
||||
- Shutdown becomes `OnShutdown(err)`.
|
||||
|
||||
The scheduler reacts by calling **back out** through `Effects`: inspect a
|
||||
process state, start a swap, grant a response to a caller, or stop processes. It
|
||||
never calls `process.Process` directly and never writes to a channel directly.
|
||||
This keeps the scheduler pure enough to unit-test against a fake `Effects` with
|
||||
no goroutines or real processes involved (see `scheduler/fifo_test.go`).
|
||||
|
||||
```
|
||||
HTTP request admin Unload / Shutdown
|
||||
│ │
|
||||
▼ ▼
|
||||
ServeHTTP ──HandlerReq──▶ baseRouter.run() ◀──unloadCh/shutdownCh
|
||||
│ (single goroutine)
|
||||
▼
|
||||
Scheduler.On*(...)
|
||||
│ calls back through
|
||||
▼
|
||||
Effects: ModelState / StartSwap /
|
||||
GrantServe / GrantError / StopProcesses
|
||||
│
|
||||
▼
|
||||
baseRouter side-effects: doSwap goroutine,
|
||||
grant() to caller, process.Stop()
|
||||
│
|
||||
swap completes ──SwapDone──▶ back into run loop
|
||||
```
|
||||
|
||||
### The swap goroutine
|
||||
|
||||
Scheduling decisions must be quick and non-blocking, but loading a model is
|
||||
slow. The two are reconciled by doing the slow part on a separate goroutine.
|
||||
|
||||
When the scheduler decides to start a swap, inside `OnRequest` it:
|
||||
|
||||
1. records "a swap for X is in flight" in its own state, then
|
||||
2. calls `Effects.StartSwap(modelID, evict)`.
|
||||
|
||||
`StartSwap` does **not** load the model itself — it just launches a detached
|
||||
goroutine (`doSwap`) and returns straight away. `doSwap` is what does the slow
|
||||
work: stop the evicted processes, start the target, wait for it to become ready.
|
||||
Because `StartSwap` returned immediately, `OnRequest` returns too, and the run
|
||||
loop is free to pick up the next event — another request, a serve-done, an
|
||||
unload — while `doSwap` runs in the background.
|
||||
|
||||
The swap's eventual result comes back as just another event: when `doSwap`
|
||||
finishes it posts a `SwapDone` onto `swapDoneCh`, which the run loop delivers as
|
||||
`OnSwapDone`. So a slow load never blocks the run loop; it brackets it with two
|
||||
quick events (`OnRequest` to start, `OnSwapDone` to finish) and everything in
|
||||
between is handled normally.
|
||||
|
||||
### In-flight tracking and `trackedServe`
|
||||
|
||||
When the scheduler grants a request, the handler it hands back is wrapped by
|
||||
`baseRouter.trackedServe`. The wrapper runs the real `ServeHTTP` and, on return,
|
||||
posts a `ServeDoneEvent` so the run loop can decrement the per-model in-flight
|
||||
count. This is why the scheduler can know whether a process is "busy": it counts
|
||||
grants out and serve-dones in. A swap that would evict a busy process is
|
||||
deferred until that process's in-flight count hits zero (`OnServeDone` then
|
||||
re-drains the queue).
|
||||
|
||||
The subtle contract here is `GrantServe`'s boolean return. The caller's
|
||||
`Respond` channel is unbuffered, so a successful send proves the HTTP goroutine
|
||||
is alive and took the handler. If the caller already disconnected, the send
|
||||
fails, `trackedServe` never runs, and **no** `ServeDoneEvent` will ever arrive —
|
||||
so the scheduler must only increment `inFlight` when `GrantServe` returns true.
|
||||
Incrementing on a false return would strand the counter above zero and the model
|
||||
could never be evicted again.
|
||||
|
||||
## The interfaces
|
||||
|
||||
All three live in `scheduler/scheduler.go`.
|
||||
|
||||
### `Scheduler`
|
||||
|
||||
```go
|
||||
type Scheduler interface {
|
||||
OnRequest(req HandlerReq)
|
||||
OnSwapDone(ev SwapDone)
|
||||
OnServeDone(ev ServeDoneEvent)
|
||||
OnUnload(targets []string, timeout time.Duration)
|
||||
OnShutdown(err error)
|
||||
}
|
||||
```
|
||||
|
||||
Owns the queue, in-flight tracking, and the decision tree. All methods run on
|
||||
the run-loop goroutine, so no internal locking is needed.
|
||||
|
||||
### `Swapper`
|
||||
|
||||
```go
|
||||
type Swapper interface {
|
||||
EvictionFor(target string, running []string) []string
|
||||
OnSwapStart(target string, running []string)
|
||||
}
|
||||
```
|
||||
|
||||
The eviction policy. `EvictionFor` is a **pure decision** — given the target and
|
||||
the complete `running` set, return the running model IDs that must stop. It must
|
||||
not log or mutate anything, and it does **not** inspect process state itself:
|
||||
the scheduler hands it `running` already assembled (every non-stopped process,
|
||||
unioned with the targets of in-flight swaps already committed but not yet
|
||||
visible in process state). That keeps the swapper a pure function of its inputs,
|
||||
with no reference to processes.
|
||||
|
||||
The reason it must not log is that it is a _speculative_ query — "what would we
|
||||
evict if we started this swap right now?" — called far more often than swaps
|
||||
actually happen. The scheduler calls it once per incoming request, and then
|
||||
**again for every still-queued request on every queue drain** (each `OnSwapDone`,
|
||||
`OnServeDone`, and `OnUnload`). Most of those calls end in "still queued",
|
||||
"collides", or "nothing to evict", not a real swap. Logging there would emit
|
||||
duplicate lines for a request that simply sits in the queue, and lines for
|
||||
decisions that never happen — the log would stop meaning "a swap occurred".
|
||||
|
||||
`OnSwapStart` is the one place a Swapper may log, because it is called exactly
|
||||
once, at the moment a swap is committed. One log line there equals one real swap,
|
||||
with the evict set that is genuinely being applied — which is why `matrixSwapper`
|
||||
re-solves and logs the full decision (set, DSL, cost) in `OnSwapStart` rather
|
||||
than in `EvictionFor`.
|
||||
|
||||
### `Effects`
|
||||
|
||||
```go
|
||||
type Effects interface {
|
||||
ModelState(modelID string) (process.ProcessState, bool)
|
||||
RunningModels() map[string]process.ProcessState
|
||||
StartSwap(modelID string, evict []string)
|
||||
GrantError(req HandlerReq, err error)
|
||||
GrantServe(req HandlerReq, modelID string) bool
|
||||
StopProcesses(timeout time.Duration, ids []string)
|
||||
}
|
||||
```
|
||||
|
||||
Implemented by `baseRouter`. This is the scheduler's entire window onto the
|
||||
outside world; everything else about the router is hidden from it. See the
|
||||
deep-dive below.
|
||||
|
||||
### `Factory` — wiring it together
|
||||
|
||||
```go
|
||||
type Factory func(name string, logger *logmon.Monitor, eff Effects) Scheduler
|
||||
```
|
||||
|
||||
`baseRouter` doesn't know which scheduler or swapper it has — it is handed a
|
||||
`Factory` at construction and calls it once, passing itself as the `Effects`.
|
||||
The concrete router captures its `Swapper` in the closure. From `group.go`:
|
||||
|
||||
```go
|
||||
swapper := &groupSwapper{ /* ... */ }
|
||||
base := newBaseRouter("group", conf, processes, proxylog,
|
||||
func(name string, logger *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
|
||||
return scheduler.NewFIFO(name, logger, swapper, eff)
|
||||
})
|
||||
```
|
||||
|
||||
This closure is the single point where the three pieces meet: it binds a
|
||||
specific `Swapper` (`swapper`) and a specific `Scheduler` (`FIFO`) to the
|
||||
`baseRouter`'s `Effects` (`eff`).
|
||||
|
||||
**The swapper is a separate type from the concrete router.** There are currently two router implementations router.Group and router.Matrix. Each of these has a custom swapper that implements scheduler.Swapper for custom eviction logic. This decoupling of responsibilities makes it easy to implement custom swapping strategies.
|
||||
|
||||
### The events
|
||||
|
||||
A single goroutine in `baseRouter.run()` owns and serializes all state changes in the router. By processing events one at a time it ensures correctness and eliminates complex mutex lock logic.
|
||||
|
||||
These are the events the router currently uses:
|
||||
|
||||
```go
|
||||
type HandlerReq struct { // one in-flight ServeHTTP awaiting a decision
|
||||
Model string
|
||||
Ctx context.Context
|
||||
Respond chan HandlerResp // UNBUFFERED — see GrantServe contract
|
||||
PositionCh chan int // queue-position updates for the loading UI
|
||||
}
|
||||
|
||||
type HandlerResp struct { // the decision handed back to the caller
|
||||
HandleFunc http.HandlerFunc // serve with this, or...
|
||||
Err error // ...fail with this
|
||||
}
|
||||
|
||||
type SwapDone struct{ ModelID string; Err error } // swap goroutine finished
|
||||
type ServeDoneEvent struct{ ModelID string } // tracked handler returned
|
||||
```
|
||||
|
||||
## Deep-dive: the `Effects` interface and why it exists
|
||||
|
||||
`Effects` is the inversion-of-control boundary that makes the split possible.
|
||||
The scheduler decides and `baseRouter` _acts_. Pulling the side-effects behind this
|
||||
interface buys three things:
|
||||
|
||||
1. **Purity and testability.** The scheduler performs no I/O, starts no
|
||||
goroutines of its own, and touches no real processes. Its tests drive the
|
||||
`On*` methods directly and assert on a `fakeEffects` that just records the
|
||||
calls — synchronous, deterministic, no sleeps. (`scheduler/fifo_test.go`.)
|
||||
2. **A single, auditable side-effect surface.** Every externally-visible thing a
|
||||
scheduler can do is one of six methods. You can reason about the whole
|
||||
contract by reading one interface.
|
||||
3. **Decoupling lifetime.** The scheduler never holds a `process.Process`,
|
||||
never sees a channel, and never learns how shutdown teardown works. It only
|
||||
knows model IDs and states.
|
||||
|
||||
Method by method, as implemented in `base.go`:
|
||||
|
||||
- **`ModelState(modelID) (state, ok)`** — read-only snapshot of a process's
|
||||
state, and whether this router handles the model at all. The scheduler uses it
|
||||
for the "unknown model" check and the "already ready" fast path. Safe to call
|
||||
any time because the process map is fixed at construction and `State()` is a
|
||||
snapshot.
|
||||
|
||||
- **`RunningModels()`** — the state of every process that isn't stopped or shut
|
||||
down. The scheduler unions its keys with its own in-flight swap targets to
|
||||
build the `running` set it hands the `Swapper`, so the swapper never has to
|
||||
touch process state itself.
|
||||
|
||||
- **`StartSwap(modelID, evict)`** — fire-and-forget. `baseRouter` launches the
|
||||
`doSwap` goroutine and returns immediately; the result comes back later as a
|
||||
`SwapDone`. The scheduler records the swap as active _before_ calling this so
|
||||
that requests arriving in the meantime can join it.
|
||||
|
||||
- **`GrantError(req, err)`** — hand a caller an error response. Used for unknown
|
||||
models, failed swaps, unloads, and shutdown.
|
||||
|
||||
- **`GrantServe(req, modelID) bool`** — hand a caller the tracked handler for a
|
||||
ready model, returning whether the caller was still there to receive it. The
|
||||
scheduler increments the in-flight count **only on a true return** (see the
|
||||
in-flight contract above). This is the one `Effects` method whose return value
|
||||
carries state-machine significance.
|
||||
|
||||
- **`StopProcesses(timeout, ids)`** — stop processes in parallel and **block**
|
||||
until all have stopped. Used by `OnUnload` so an admin `Unload` call can
|
||||
guarantee the process is dead by the time it returns. (Note `StartSwap` is
|
||||
async but `StopProcesses` is sync — the difference is deliberate and tied to
|
||||
the caller's expectations.)
|
||||
|
||||
A useful way to hold it in your head: `Effects` is the scheduler's syscall
|
||||
table. The scheduler is a pure state machine; `Effects` is how it touches the
|
||||
world, and `baseRouter` is the kernel that implements those syscalls with real
|
||||
goroutines, channels, and processes.
|
||||
|
||||
## How to implement a new `Swapper`
|
||||
|
||||
A `Swapper` is a pure decision function plus a logging hook — the easiest of the three pieces to replace.
|
||||
|
||||
1. **Write the swapper type** and give it whatever config it needs to make a
|
||||
decision. It does **not** need the process map — the scheduler supplies the
|
||||
running set as an argument. `groupSwapper` holds only its group config;
|
||||
`matrixSwapper` holds only its solver and logger:
|
||||
|
||||
```go
|
||||
type mySwapper struct {
|
||||
config config.Config
|
||||
}
|
||||
```
|
||||
|
||||
2. **Implement `EvictionFor(target, running)`** as a _pure_ decision:
|
||||
- `running` is the complete live set, already assembled for you: every
|
||||
non-stopped process unioned with the targets of in-flight swaps the
|
||||
scheduler has committed to. You don't filter process state or fold in
|
||||
in-flight targets yourself, that's the scheduler's job. Just decide against the slice you're handed.
|
||||
- Return the list of model IDs in `running` that must stop for `target` to
|
||||
run. Return `nil`/empty when nothing needs evicting.
|
||||
- Do **not** mutate state here.
|
||||
- Do **not** log here. It can be called multiple times per request. Since it is pure function have tests verify the expected behaviour.
|
||||
|
||||
3. **Implement `OnSwapStart(target, running)`** — called once when a swap
|
||||
actually begins, with the same `running` set `EvictionFor` saw. This is the
|
||||
right place to log: one call equals one real swap. `matrixSwapper` re-solves
|
||||
and logs the chosen set and cost here; `groupSwapper` logs nothing.
|
||||
|
||||
4. **Wire it in** by instantiating the swapper in your router's constructor and
|
||||
capturing it in the `Factory` closure passed to `newBaseRouter` — exactly as
|
||||
`NewGroup` and `NewMatrix` do. The router struct itself only ever embeds
|
||||
`*baseRouter`; the swapper reaches the scheduler solely through that closure.
|
||||
|
||||
Reference implementations: `groupSwapper` (static group config) in `group.go`
|
||||
and `matrixSwapper` (cost-based set solver) in `matrix.go`.
|
||||
|
||||
## How to implement a new `Scheduler`
|
||||
|
||||
Replacing the scheduler means taking over the queue and the entire decision tree. Read `scheduler/fifo.go` end to end first — it is the reference implementation and the rules below are easiest to understand in context.
|
||||
|
||||
The rules you must honour:
|
||||
|
||||
- **Single goroutine.** Every method runs on the `baseRouter.run()` goroutine. Keep your state in plain maps/slices and never read or write it from another goroutine. If you need slow work done, hand it to `Effects.StartSwap` and react to the resulting `SwapDone` — do not block a method waiting for it.
|
||||
|
||||
- **Never block the run loop.** `OnRequest`, `OnSwapDone`, and `OnServeDone` must make a decision and return. The one method allowed to block is `OnUnload`, and only because it must wait on the synchronous `StopProcesses` so the admin caller's guarantee holds.
|
||||
|
||||
- **Respect the `GrantServe` boolean.** Only count a request as in-flight when `GrantServe` returns true (see the in-flight contract above). A false return means the caller is gone; no `ServeDoneEvent` will ever arrive, so incrementing on false permanently strands the counter.
|
||||
|
||||
- **Account for in-flight swaps in your running set.** When you call `Swapper.EvictionFor`, the running set you pass must include not just live processes (`Effects.RunningModels`) but also the targets of swaps you've already started that aren't yet visible in process state — otherwise the swapper contradicts decisions already in motion.
|
||||
|
||||
What each method must do:
|
||||
|
||||
- **`OnRequest(req)`** — every request must resolve to exactly one of: granted, errored, joined (piggybacks an in-flight swap), queued, or swap-started. No request may be silently dropped.
|
||||
|
||||
- **`OnSwapDone(ev)`** — deliver the result to every waiter that joined this swap (grant on success, error on `ev.Err`), drop the swap from active tracking, then re-examine anything queued — a finished swap may have unblocked it.
|
||||
|
||||
- **`OnServeDone(ev)`** — decrement the model's in-flight count; when it hits zero, re-examine the queue. Do **not** clear in-flight counts by hand; the handlers post their own `ServeDoneEvent`s on return.
|
||||
|
||||
- **`OnUnload(targets, timeout)`** — error out any waiters or queued requests for the unloaded models, call `Effects.StopProcesses` (synchronously — the admin caller relies on the process being dead afterwards), then re-examine the queue.
|
||||
|
||||
- **`OnShutdown(err)`** — error out every waiter you still hold (active swap waiters and queued requests). Don't touch processes; teardown is `baseRouter`'s job.
|
||||
|
||||
Expose a constructor matching the `Factory` shape:
|
||||
|
||||
```go
|
||||
func NewMyScheduler(name string, logger *logmon.Monitor, swapper Swapper, eff Effects) *MyScheduler {
|
||||
// ...
|
||||
}
|
||||
|
||||
// in the concrete router:
|
||||
base := newBaseRouter(name, conf, processes, proxylog,
|
||||
func(name string, logger *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
|
||||
return scheduler.NewMyScheduler(name, logger, swapper, eff)
|
||||
})
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
- **Schedulers** are tested as pure state machines in the `scheduler` package:
|
||||
drive the `On*` methods directly against a `fakeEffects` and assert on the
|
||||
recorded grants/starts/stops. No goroutines, no sleeps. See
|
||||
`scheduler/fifo_test.go` as the reference; follow the `TestSchedulerName_<scenario>`
|
||||
naming convention.
|
||||
- **`baseRouter` mechanism** (run loop, `grant`/`ServeHTTP`, `Unload`,
|
||||
`Shutdown`) is tested in `base_test.go`. The run loop exposes a
|
||||
`testProcessed` channel so tests can wait for an event to be fully processed
|
||||
instead of sleeping.
|
||||
- Run new tests with `go test -v -run TestMyScheduler_... ./internal/router/scheduler/`,
|
||||
then `make test-dev` for a quick `go test` + `staticcheck` pass over `proxy/`.
|
||||
+13
-19
@@ -14,7 +14,7 @@ type Group struct {
|
||||
|
||||
func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group, error) {
|
||||
modelToGroup := make(map[string]string)
|
||||
for gid, gcfg := range conf.Groups {
|
||||
for gid, gcfg := range conf.Routing.Router.Settings.Groups {
|
||||
for _, mid := range gcfg.Members {
|
||||
if existing, dup := modelToGroup[mid]; dup {
|
||||
return nil, fmt.Errorf("model %q is in multiple groups: %q and %q", mid, existing, gid)
|
||||
@@ -23,14 +23,16 @@ func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group
|
||||
}
|
||||
}
|
||||
|
||||
planner := &groupPlanner{
|
||||
swapper := &groupSwapper{
|
||||
config: conf,
|
||||
modelToGroup: modelToGroup,
|
||||
}
|
||||
|
||||
processes := make(map[string]process.Process, len(modelToGroup))
|
||||
base := newBaseRouter("group", conf, processes, planner, proxylog)
|
||||
planner.processes = processes
|
||||
base, err := newBaseRouter("group", conf, processes, proxylog, swapper)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating base router: %w", err)
|
||||
}
|
||||
|
||||
for mid := range modelToGroup {
|
||||
modelCfg, _, ok := conf.FindConfig(mid)
|
||||
@@ -54,21 +56,20 @@ func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group
|
||||
return g, nil
|
||||
}
|
||||
|
||||
// groupPlanner decides evictions from static group configuration.
|
||||
// groupSwapper decides evictions from static group configuration.
|
||||
//
|
||||
// Same-group siblings are stopped when the group has swap=true. Cross-group
|
||||
// members are stopped only when the target's group is exclusive; loading a
|
||||
// model from a non-exclusive group leaves running exclusive groups alone,
|
||||
// matching the gotcha in the original ProcessGroup behaviour.
|
||||
type groupPlanner struct {
|
||||
type groupSwapper struct {
|
||||
config config.Config
|
||||
modelToGroup map[string]string
|
||||
processes map[string]process.Process
|
||||
}
|
||||
|
||||
func (p *groupPlanner) EvictionFor(target string, alsoRunning []string) []string {
|
||||
func (p *groupSwapper) EvictionFor(target string, running []string) []string {
|
||||
tg := p.modelToGroup[target]
|
||||
tgCfg := p.config.Groups[tg]
|
||||
tgCfg := p.config.Routing.Router.Settings.Groups[tg]
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
var result []string
|
||||
@@ -89,24 +90,17 @@ func (p *groupPlanner) EvictionFor(target string, alsoRunning []string) []string
|
||||
// for backwards compatibility. The newer swap matrix approach does not
|
||||
// have this issue.
|
||||
case og != tg && tgCfg.Exclusive:
|
||||
if ogCfg := p.config.Groups[og]; !ogCfg.Persistent {
|
||||
if ogCfg := p.config.Routing.Router.Settings.Groups[og]; !ogCfg.Persistent {
|
||||
seen[mID] = struct{}{}
|
||||
result = append(result, mID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for mID, proc := range p.processes {
|
||||
st := proc.State()
|
||||
if st == process.StateStopped || st == process.StateShutdown {
|
||||
continue
|
||||
}
|
||||
consider(mID)
|
||||
}
|
||||
for _, mID := range alsoRunning {
|
||||
for _, mID := range running {
|
||||
consider(mID)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (p *groupPlanner) OnSwapStart(target string) {}
|
||||
func (p *groupSwapper) OnSwapStart(target string, running []string) {}
|
||||
|
||||
@@ -17,17 +17,19 @@ import (
|
||||
func newTestGroup(t *testing.T, conf config.Config, processes map[string]process.Process) *Group {
|
||||
t.Helper()
|
||||
modelToGroup := make(map[string]string)
|
||||
for gid, gcfg := range conf.Groups {
|
||||
for gid, gcfg := range conf.Routing.Router.Settings.Groups {
|
||||
for _, mid := range gcfg.Members {
|
||||
modelToGroup[mid] = gid
|
||||
}
|
||||
}
|
||||
planner := &groupPlanner{
|
||||
swapper := &groupSwapper{
|
||||
config: conf,
|
||||
modelToGroup: modelToGroup,
|
||||
processes: processes,
|
||||
}
|
||||
base := newBaseRouter("group", conf, processes, planner, logmon.NewWriter(io.Discard))
|
||||
base, err := newBaseRouter("group", conf, processes, logmon.NewWriter(io.Discard), swapper)
|
||||
if err != nil {
|
||||
t.Fatalf("newBaseRouter: %v", err)
|
||||
}
|
||||
base.testProcessed = make(chan struct{}, 64)
|
||||
g := &Group{baseRouter: base}
|
||||
go base.run()
|
||||
@@ -41,10 +43,10 @@ func newTestGroup(t *testing.T, conf config.Config, processes map[string]process
|
||||
|
||||
func TestGroup_NewGroup_DuplicateMembership(t *testing.T) {
|
||||
conf := config.Config{
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g1": {Swap: true, Members: []string{"a"}},
|
||||
"g2": {Swap: true, Members: []string{"a"}},
|
||||
},
|
||||
}),
|
||||
Models: map[string]config.ModelConfig{
|
||||
"a": {},
|
||||
},
|
||||
@@ -65,9 +67,9 @@ func TestGroup_ServeHTTP_SwapStopsPrevious(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g": {Swap: true, Exclusive: true, Members: []string{"a", "b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": b})
|
||||
|
||||
@@ -97,9 +99,9 @@ func TestGroup_NonSwapGroup_NoStop(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g": {Swap: false, Exclusive: false, Members: []string{"a", "b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": b})
|
||||
|
||||
@@ -127,10 +129,10 @@ func TestGroup_CrossGroupExclusive(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g1": {Swap: true, Exclusive: true, Members: []string{"a"}},
|
||||
"g2": {Swap: true, Exclusive: true, Members: []string{"b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": b})
|
||||
|
||||
@@ -154,10 +156,10 @@ func TestGroup_CrossGroupNonExclusiveParallel(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g1": {Swap: true, Exclusive: false, Members: []string{"a"}},
|
||||
"g2": {Swap: true, Exclusive: false, Members: []string{"b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": pb})
|
||||
|
||||
@@ -202,16 +204,17 @@ func TestGroup_CrossGroupNonExclusiveParallel(t *testing.T) {
|
||||
|
||||
// TestGroup_SameGroupSwapSerialises verifies that two same-group requests
|
||||
// (Swap=true) serialise even when both arrive while neither has reached
|
||||
// StateStarting yet — the alsoRunning hint to the planner closes that race.
|
||||
// StateStarting yet — the in-flight swap target the scheduler folds into the
|
||||
// running set closes that race.
|
||||
func TestGroup_SameGroupSwapSerialises(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g": {Swap: true, Exclusive: false, Members: []string{"a", "b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": pb})
|
||||
|
||||
@@ -224,8 +227,9 @@ func TestGroup_SameGroupSwapSerialises(t *testing.T) {
|
||||
waitProcessed(t, g.testProcessed, 1)
|
||||
|
||||
// Request B arrives before A transitions to StateStarting in the process
|
||||
// state machine. Without the alsoRunning hint, the planner would not see
|
||||
// A as running, and B would start in parallel, violating Swap=true.
|
||||
// state machine. Without folding the in-flight swap target into the running
|
||||
// set, the swapper would not see A as running, and B would start in
|
||||
// parallel, violating Swap=true.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
@@ -269,10 +273,10 @@ func TestGroup_PersistentNotEvicted(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"persist": {Swap: true, Exclusive: false, Persistent: true, Members: []string{"a"}},
|
||||
"other": {Swap: true, Exclusive: true, Members: []string{"b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": b})
|
||||
|
||||
@@ -306,10 +310,10 @@ func TestGroup_NonExclusiveDoesNotUnloadExclusive(t *testing.T) {
|
||||
|
||||
conf := config.Config{
|
||||
HealthCheckTimeout: 5,
|
||||
Groups: map[string]config.GroupConfig{
|
||||
Routing: groupRouting(map[string]config.GroupConfig{
|
||||
"g1": {Swap: true, Exclusive: true, Members: []string{"a"}},
|
||||
"g2": {Swap: true, Exclusive: false, Members: []string{"b"}},
|
||||
},
|
||||
}),
|
||||
}
|
||||
g := newTestGroup(t, conf, map[string]process.Process{"a": a, "b": b})
|
||||
|
||||
|
||||
@@ -12,10 +12,23 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
)
|
||||
|
||||
// groupRouting builds a normalized RoutingConfig for the group router, mirroring
|
||||
// what config.LoadConfigFromReader produces. Tests use it to populate
|
||||
// config.Config.Routing without going through LoadConfig.
|
||||
func groupRouting(groups map[string]config.GroupConfig) config.RoutingConfig {
|
||||
return config.RoutingConfig{
|
||||
Router: config.RouterConfig{
|
||||
Use: "group",
|
||||
Settings: config.RouterSettings{Groups: groups},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// fakeProcess is an in-memory implementation of process.Process used to drive
|
||||
// the routers through their state machine without spawning real upstreams.
|
||||
type fakeProcess struct {
|
||||
|
||||
@@ -226,69 +226,6 @@ func TestIsLoadingPath(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_GET(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
wantStreaming bool
|
||||
}{
|
||||
{"streaming true", "model=llama3&stream=true", true},
|
||||
{"streaming false", "model=llama3&stream=false", false},
|
||||
{"no stream param", "model=llama3", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
|
||||
got, err := ExtractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if got.Streaming != tt.wantStreaming {
|
||||
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_JSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
wantStreaming bool
|
||||
}{
|
||||
{"streaming true", `{"model":"llama3","stream":true}`, true},
|
||||
{"streaming false", `{"model":"llama3","stream":false}`, false},
|
||||
{"no stream param", `{"model":"llama3"}`, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
got, err := ExtractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if got.Streaming != tt.wantStreaming {
|
||||
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_URLEncodedForm(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader("model=whisper-1&stream=true"))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
got, err := ExtractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if !got.Streaming {
|
||||
t.Error("Streaming should be true")
|
||||
}
|
||||
}
|
||||
|
||||
func countSSEMessages(s string) int {
|
||||
scanner := bufio.NewScanner(strings.NewReader(s))
|
||||
count := 0
|
||||
|
||||
+16
-45
@@ -2,7 +2,6 @@ package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
@@ -14,20 +13,23 @@ type Matrix struct {
|
||||
}
|
||||
|
||||
func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matrix, error) {
|
||||
if conf.Matrix == nil {
|
||||
mtx := conf.Routing.Router.Settings.Matrix
|
||||
if mtx == nil {
|
||||
return nil, fmt.Errorf("matrix router requires a matrix configuration")
|
||||
}
|
||||
|
||||
planner := &matrixPlanner{
|
||||
solver: newMatrixSolver(conf.ExpandedSets, conf.Matrix.ResolvedEvictCosts()),
|
||||
swapper := &matrixSwapper{
|
||||
solver: newMatrixSolver(mtx.ExpandedSets, mtx.ResolvedEvictCosts()),
|
||||
logger: proxylog,
|
||||
}
|
||||
|
||||
// Build a process for every model in the config. Any model can run alone
|
||||
// even if it is not part of a set; this mirrors proxy.NewMatrix.
|
||||
processes := make(map[string]process.Process, len(conf.Models))
|
||||
base := newBaseRouter("matrix", conf, processes, planner, proxylog)
|
||||
planner.processes = processes
|
||||
base, err := newBaseRouter("matrix", conf, processes, proxylog, swapper)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating base router: %w", err)
|
||||
}
|
||||
|
||||
for mid, modelCfg := range conf.Models {
|
||||
procLog := logmon.NewWriter(upstreamlog)
|
||||
@@ -45,20 +47,18 @@ func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matr
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// matrixPlanner decides evictions by asking the matrix solver against the
|
||||
// current running set.
|
||||
type matrixPlanner struct {
|
||||
solver *matrixSolver
|
||||
processes map[string]process.Process
|
||||
logger *logmon.Monitor
|
||||
// matrixSwapper decides evictions by asking the matrix solver against the
|
||||
// running set the scheduler hands it.
|
||||
type matrixSwapper struct {
|
||||
solver *matrixSolver
|
||||
logger *logmon.Monitor
|
||||
}
|
||||
|
||||
func (p *matrixPlanner) EvictionFor(target string, alsoRunning []string) []string {
|
||||
return p.solver.Solve(target, p.runningSet(alsoRunning)).Evict
|
||||
func (p *matrixSwapper) EvictionFor(target string, running []string) []string {
|
||||
return p.solver.Solve(target, running).Evict
|
||||
}
|
||||
|
||||
func (p *matrixPlanner) OnSwapStart(target string) {
|
||||
running := p.runningModels()
|
||||
func (p *matrixSwapper) OnSwapStart(target string, running []string) {
|
||||
result := p.solver.Solve(target, running)
|
||||
switch {
|
||||
case len(result.Evict) > 0:
|
||||
@@ -70,32 +70,3 @@ func (p *matrixPlanner) OnSwapStart(target string) {
|
||||
p.logger.Debugf("matrix: model=%s already running in set=%s dsl=%q", target, result.SetName, result.DSL)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *matrixPlanner) runningModels() []string {
|
||||
return p.runningSet(nil)
|
||||
}
|
||||
|
||||
// runningSet returns the union of live processes (State != Stopped/Shutdown)
|
||||
// and any extra IDs the baseRouter has already committed to loading but which
|
||||
// the process state machine has not yet reflected.
|
||||
func (p *matrixPlanner) runningSet(alsoRunning []string) []string {
|
||||
seen := make(map[string]struct{}, len(p.processes))
|
||||
var running []string
|
||||
for id, proc := range p.processes {
|
||||
st := proc.State()
|
||||
if st == process.StateStopped || st == process.StateShutdown {
|
||||
continue
|
||||
}
|
||||
seen[id] = struct{}{}
|
||||
running = append(running, id)
|
||||
}
|
||||
for _, id := range alsoRunning {
|
||||
if _, dup := seen[id]; dup {
|
||||
continue
|
||||
}
|
||||
seen[id] = struct{}{}
|
||||
running = append(running, id)
|
||||
}
|
||||
sort.Strings(running)
|
||||
return running
|
||||
}
|
||||
|
||||
@@ -17,12 +17,14 @@ import (
|
||||
func newTestMatrix(t *testing.T, conf config.Config, expanded []config.ExpandedSet, evictCosts map[string]int, processes map[string]process.Process) *Matrix {
|
||||
t.Helper()
|
||||
logger := logmon.NewWriter(io.Discard)
|
||||
planner := &matrixPlanner{
|
||||
solver: newMatrixSolver(expanded, evictCosts),
|
||||
processes: processes,
|
||||
logger: logger,
|
||||
swapper := &matrixSwapper{
|
||||
solver: newMatrixSolver(expanded, evictCosts),
|
||||
logger: logger,
|
||||
}
|
||||
base, err := newBaseRouter("matrix", conf, processes, logger, swapper)
|
||||
if err != nil {
|
||||
t.Fatalf("newBaseRouter: %v", err)
|
||||
}
|
||||
base := newBaseRouter("matrix", conf, processes, planner, logger)
|
||||
base.testProcessed = make(chan struct{}, 64)
|
||||
r := &Matrix{baseRouter: base}
|
||||
go base.run()
|
||||
@@ -153,8 +155,8 @@ func TestMatrix_CoexistingSetParallel(t *testing.T) {
|
||||
|
||||
// TestMatrix_IncompatibleQueues verifies that the second request for a model
|
||||
// that cannot coexist with the in-flight first model queues until the first
|
||||
// completes, and then evicts it. This exercises the alsoRunning hint via the
|
||||
// matrix solver's union into runningSet.
|
||||
// completes, and then evicts it. This exercises the scheduler folding in-flight
|
||||
// swap targets into the running set it hands the swapper.
|
||||
func TestMatrix_IncompatibleQueues(t *testing.T) {
|
||||
a := newFakeProcess("a")
|
||||
pb := newFakeProcess("b")
|
||||
@@ -173,8 +175,9 @@ func TestMatrix_IncompatibleQueues(t *testing.T) {
|
||||
}()
|
||||
waitProcessed(t, r.testProcessed, 1)
|
||||
|
||||
// B arrives before A transitions to StateStarting. The solver sees A via
|
||||
// alsoRunning and returns evict=[a], so collidesWith forces B to queue.
|
||||
// B arrives before A transitions to StateStarting. The running set the
|
||||
// scheduler builds includes A (an in-flight swap target), so the solver
|
||||
// returns evict=[a] and collidesWith forces B to queue.
|
||||
w2 := httptest.NewRecorder()
|
||||
done2 := make(chan struct{})
|
||||
go func() {
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
type peerMember struct {
|
||||
@@ -146,22 +147,22 @@ func (r *Peer) Shutdown(timeout time.Duration) error {
|
||||
|
||||
func (r *Peer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
if r.shuttingDown.Load() {
|
||||
SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
|
||||
shared.SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
|
||||
return
|
||||
}
|
||||
r.inflight.Add(1)
|
||||
defer r.inflight.Done()
|
||||
|
||||
data, err := FetchContext(req, r.cfg)
|
||||
data, err := shared.FetchContext(req, r.cfg)
|
||||
if err != nil {
|
||||
SendError(w, req, err)
|
||||
shared.SendError(w, req, err)
|
||||
return
|
||||
}
|
||||
|
||||
pp, found := r.peers[data.ModelID]
|
||||
if !found {
|
||||
r.logger.Warnf("peer model not found: %s", data.ModelID)
|
||||
SendError(w, req, ErrNoPeerModelFound)
|
||||
shared.SendError(w, req, ErrNoPeerModelFound)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
var testLogger = logmon.NewWriter(os.Stdout)
|
||||
@@ -142,7 +143,7 @@ func TestPeer_ServeHTTP_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -178,7 +179,7 @@ func TestPeer_ServeHTTP_PeerModelNotFound(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -212,7 +213,7 @@ func TestPeer_ServeHTTP_ApiKeyInjection(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -246,7 +247,7 @@ func TestPeer_ServeHTTP_NoApiKey(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -279,7 +280,7 @@ func TestPeer_ServeHTTP_HostHeaderSet(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -311,7 +312,7 @@ func TestPeer_ServeHTTP_SSEHeaderModification(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -347,7 +348,7 @@ func TestPeer_ServeHTTP_ShutdownRejectsNewRequests(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
@@ -385,7 +386,7 @@ func TestPeer_ServeHTTP_WaitsForInflightDuringShutdown(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
@@ -448,7 +449,7 @@ func TestPeer_ServeHTTP_ShutdownTimeoutCancelsInflight(t *testing.T) {
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
@@ -551,7 +552,7 @@ func TestPeer_ServeHTTP_ContextOverridesBodyModel(t *testing.T) {
|
||||
body := strings.NewReader(`{"model":"body-model","prompt":"hello"}`)
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", body)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "context-model", ModelID: "context-model"}))
|
||||
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "context-model", ModelID: "context-model"}))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
pr.ServeHTTP(w, req)
|
||||
|
||||
+4
-151
@@ -1,39 +1,18 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
type contextkey struct {
|
||||
name string
|
||||
}
|
||||
|
||||
type ReqContextData struct {
|
||||
Model string
|
||||
ModelID string
|
||||
Streaming bool
|
||||
SendLoadingState bool
|
||||
}
|
||||
|
||||
var (
|
||||
ErrNoModelInContext = fmt.Errorf("no model in request context")
|
||||
ErrNoRouterFound = fmt.Errorf("no router found for model")
|
||||
ErrNoPeerModelFound = fmt.Errorf("peer model not found")
|
||||
ErrNoLocalModelFound = fmt.Errorf("local model not found")
|
||||
|
||||
ContextKey = &contextkey{"context"}
|
||||
ErrNoRouterFound = shared.ErrNoRouterFound
|
||||
ErrNoPeerModelFound = shared.ErrNoPeerModelFound
|
||||
ErrNoLocalModelFound = shared.ErrNoLocalModelFound
|
||||
)
|
||||
|
||||
type Router interface {
|
||||
@@ -71,129 +50,3 @@ type LocalRouter interface {
|
||||
// model is not known to this router.
|
||||
ProcessLogger(modelID string) (*logmon.Monitor, bool)
|
||||
}
|
||||
|
||||
// FetchContext will attempt to get the model id from the context then
|
||||
// from the model body. If it extracts the model from the body it will
|
||||
// store the model in the context for downstream handlers. An error
|
||||
// will be returned when model can not be fetch from either location.
|
||||
func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
|
||||
data, ok := ReadContext(r.Context())
|
||||
if ok {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if data, err := ExtractContext(r); err == nil {
|
||||
realName, _ := cfg.RealModelName(data.Model)
|
||||
if realName == "" {
|
||||
realName = data.Model
|
||||
}
|
||||
data.ModelID = realName
|
||||
if mc, ok := cfg.Models[realName]; ok {
|
||||
data.SendLoadingState = mc.SendLoadingState != nil && *mc.SendLoadingState
|
||||
}
|
||||
*r = *r.WithContext(SetContext(r.Context(), data))
|
||||
return data, nil
|
||||
}
|
||||
|
||||
return ReqContextData{}, ErrNoModelInContext
|
||||
}
|
||||
|
||||
func SetContext(ctx context.Context, data ReqContextData) context.Context {
|
||||
return context.WithValue(ctx, ContextKey, data)
|
||||
}
|
||||
|
||||
func ReadContext(ctx context.Context) (ReqContextData, bool) {
|
||||
data, ok := ctx.Value(ContextKey).(ReqContextData)
|
||||
return data, ok
|
||||
}
|
||||
|
||||
// ExtractContext pulls the model name from an HTTP request without consuming the
|
||||
// body. For GET requests it reads the "model" query parameter. For POST
|
||||
// requests it inspects Content-Type and parses JSON, multipart/form-data, or
|
||||
// application/x-www-form-urlencoded bodies. The request body is always restored
|
||||
// before returning so downstream handlers — including reverse proxies that
|
||||
// forward raw bytes upstream — can still read it.
|
||||
func ExtractContext(r *http.Request) (ReqContextData, error) {
|
||||
if r.Method == http.MethodGet {
|
||||
if model := r.URL.Query().Get("model"); model != "" {
|
||||
return ReqContextData{Model: model, Streaming: r.URL.Query().Get("stream") == "true"}, nil
|
||||
}
|
||||
return ReqContextData{}, fmt.Errorf("missing 'model' query parameter")
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error reading request body: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
}()
|
||||
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
|
||||
if strings.Contains(contentType, "application/json") {
|
||||
model := gjson.GetBytes(bodyBytes, "model").String()
|
||||
if model == "" {
|
||||
return ReqContextData{}, fmt.Errorf("missing or empty 'model' in JSON body")
|
||||
}
|
||||
return ReqContextData{Model: model, Streaming: gjson.GetBytes(bodyBytes, "stream").Bool()}, nil
|
||||
}
|
||||
|
||||
// Form parsers read from r.Body, so feed them a fresh reader over the
|
||||
// buffered bytes. The deferred restore above will reset r.Body again
|
||||
// after parsing.
|
||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
if strings.Contains(contentType, "multipart/form-data") {
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error parsing multipart form: %w", err)
|
||||
}
|
||||
} else {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error parsing form: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if model := r.FormValue("model"); model != "" {
|
||||
return ReqContextData{Model: model, Streaming: r.FormValue("stream") == "true"}, nil
|
||||
}
|
||||
|
||||
return ReqContextData{}, fmt.Errorf("missing 'model' parameter")
|
||||
}
|
||||
|
||||
func SendError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
switch {
|
||||
case errors.Is(err, ErrNoModelInContext):
|
||||
SendResponse(w, r, http.StatusNotFound, "no model id could be identified")
|
||||
case errors.Is(err, ErrNoPeerModelFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no peer found for requested model")
|
||||
case errors.Is(err, ErrNoLocalModelFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
||||
case errors.Is(err, ErrNoRouterFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no router for requested model")
|
||||
default:
|
||||
SendResponse(w, r, http.StatusInternalServerError, fmt.Sprintf("unspecific error: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
// SendResponse detects what content type the client prefers and returns an error response in that format.
|
||||
func SendResponse(w http.ResponseWriter, r *http.Request, status int, message string) {
|
||||
// Check Accept header for preferred response format
|
||||
acceptHeader := r.Header.Get("Accept")
|
||||
if strings.Contains(acceptHeader, "text/plain") {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
w.WriteHeader(status)
|
||||
w.Write([]byte(fmt.Sprintf("llama-swap: %s", message)))
|
||||
return
|
||||
}
|
||||
|
||||
if strings.Contains(acceptHeader, "text/html") {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.WriteHeader(status)
|
||||
w.Write([]byte(fmt.Sprintf(`<html><body><h1>llama-swap</h1><p>%s</p></body></html>`, message)))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
w.Write([]byte(fmt.Sprintf(`{"src":"llama-swap", "error": "%s"}`, message)))
|
||||
}
|
||||
|
||||
@@ -1,275 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractContext_GET(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "model=llama3", "llama3", false},
|
||||
{"model with slashes", "model=author/model-7b", "author/model-7b", false},
|
||||
{"model missing", "", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
|
||||
got, err := ExtractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_JSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", `{"model":"llama3","stream":true}`, "llama3", false},
|
||||
{"model with slashes", `{"model":"author/model-7b"}`, "author/model-7b", false},
|
||||
{"model empty string", `{"model":""}`, "", true},
|
||||
{"model key missing", `{"stream":true}`, "", true},
|
||||
{"invalid json", `not-json`, "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
got, err := ExtractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_URLEncodedForm(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
formModel string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "whisper-1", "whisper-1", false},
|
||||
{"model missing", "", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
form := url.Values{}
|
||||
if tt.formModel != "" {
|
||||
form.Set("model", tt.formModel)
|
||||
}
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(form.Encode()))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
got, err := ExtractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_MultipartForm(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
formModel string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "whisper-1", "whisper-1", false},
|
||||
{"model missing", "", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
if tt.formModel != "" {
|
||||
fw, _ := mw.CreateFormField("model")
|
||||
fw.Write([]byte(tt.formModel))
|
||||
}
|
||||
mw.Close()
|
||||
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", &buf)
|
||||
r.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
got, err := ExtractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_JSONBodyRestored(t *testing.T) {
|
||||
body := `{"model":"llama3","stream":true}`
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
|
||||
if _, err := ExtractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if string(remaining) != body {
|
||||
t.Errorf("body not restored: want %q got %q", body, string(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_MultipartBodyRestored(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
fw, _ := mw.CreateFormField("model")
|
||||
fw.Write([]byte("whisper-1"))
|
||||
ff, _ := mw.CreateFormFile("file", "audio.wav")
|
||||
ff.Write([]byte("fake-audio-bytes"))
|
||||
mw.Close()
|
||||
|
||||
original := buf.Bytes()
|
||||
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", bytes.NewReader(original))
|
||||
r.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
|
||||
if _, err := ExtractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if !bytes.Equal(remaining, original) {
|
||||
t.Errorf("multipart body not restored: want %d bytes got %d bytes", len(original), len(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_URLEncodedBodyRestored(t *testing.T) {
|
||||
body := "model=whisper-1&extra=value"
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(body))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
|
||||
if _, err := ExtractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if string(remaining) != body {
|
||||
t.Errorf("url-encoded body not restored: want %q got %q", body, string(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext(t *testing.T) {
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"})
|
||||
data, ok := ctx.Value(ContextKey).(ReqContextData)
|
||||
if !ok {
|
||||
t.Fatalf("ContextKey not set or wrong type")
|
||||
}
|
||||
if data.Model != "llama3" {
|
||||
t.Errorf("want %q got %q", "llama3", data.Model)
|
||||
}
|
||||
if data.ModelID != "llama3" {
|
||||
t.Errorf("want %q got %q", "llama3", data.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext_WithAlias(t *testing.T) {
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"})
|
||||
data, _ := ctx.Value(ContextKey).(ReqContextData)
|
||||
if data.Model != "llama" {
|
||||
t.Errorf("want requested %q got %q", "llama", data.Model)
|
||||
}
|
||||
if data.ModelID != "llama3" {
|
||||
t.Errorf("want real %q got %q", "llama3", data.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext_DoesNotMutateParent(t *testing.T) {
|
||||
parent := context.Background()
|
||||
_ = SetContext(parent, ReqContextData{Model: "llama3", ModelID: "llama3"})
|
||||
if v := parent.Value(ContextKey); v != nil {
|
||||
t.Errorf("parent context was mutated: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadContext(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx context.Context
|
||||
wantReq string
|
||||
wantReal string
|
||||
wantBool bool
|
||||
}{
|
||||
{
|
||||
name: "model present, same name",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"}),
|
||||
wantReq: "llama3",
|
||||
wantReal: "llama3",
|
||||
wantBool: true,
|
||||
},
|
||||
{
|
||||
name: "model present, aliased",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"}),
|
||||
wantReq: "llama",
|
||||
wantReal: "llama3",
|
||||
wantBool: true,
|
||||
},
|
||||
{
|
||||
name: "model absent",
|
||||
ctx: context.Background(),
|
||||
wantReq: "",
|
||||
wantReal: "",
|
||||
wantBool: false,
|
||||
},
|
||||
{
|
||||
name: "model is empty string",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "", ModelID: ""}),
|
||||
wantReq: "",
|
||||
wantReal: "",
|
||||
wantBool: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotData, ok := ReadContext(tt.ctx)
|
||||
if gotData.Model != tt.wantReq || gotData.ModelID != tt.wantReal || ok != tt.wantBool {
|
||||
t.Errorf("want (%q, %q, %v) got (%q, %q, %v)", tt.wantReq, tt.wantReal, tt.wantBool, gotData.Model, gotData.ModelID, ok)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,489 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// defaultConcurrencyLimit caps simultaneous in-flight requests per model when
|
||||
// the model config leaves concurrencyLimit unset.
|
||||
const defaultConcurrencyLimit = 10
|
||||
|
||||
// activeSwap tracks one in-flight swap and the callers waiting on it.
|
||||
type activeSwap struct {
|
||||
modelID string
|
||||
evict []string
|
||||
waiters []HandlerReq
|
||||
}
|
||||
|
||||
// FIFO is the default scheduler. Requests are handled in a first-in, first-out order.
|
||||
// To reduce swapping requests for a model that is already running will be handled
|
||||
// immediately by the running process.
|
||||
//
|
||||
// Requests into this schedule are handled like this:
|
||||
//
|
||||
// A B C A B C --> A A B B C C
|
||||
//
|
||||
// The strategy is simple and reduces the number of swaps required.
|
||||
type FIFO struct {
|
||||
name string
|
||||
logger *logmon.Monitor
|
||||
planner Swapper
|
||||
cfg config.FifoConfig
|
||||
effects Effects
|
||||
|
||||
limits map[string]int
|
||||
active map[string]*activeSwap
|
||||
inFlight map[string]int
|
||||
queued []HandlerReq
|
||||
}
|
||||
|
||||
// NewFIFO builds a FIFO scheduler. Per-model concurrency limits are derived
|
||||
// from models: each model's ConcurrencyLimit overrides defaultConcurrencyLimit
|
||||
// when set to a value greater than zero.
|
||||
func NewFIFO(name string, logger *logmon.Monitor, planner Swapper, cfg config.FifoConfig, models map[string]config.ModelConfig, eff Effects) *FIFO {
|
||||
limits := make(map[string]int, len(models))
|
||||
for id, mc := range models {
|
||||
limit := defaultConcurrencyLimit
|
||||
if mc.ConcurrencyLimit > 0 {
|
||||
limit = mc.ConcurrencyLimit
|
||||
}
|
||||
limits[id] = limit
|
||||
}
|
||||
|
||||
return &FIFO{
|
||||
name: name,
|
||||
logger: logger,
|
||||
planner: planner,
|
||||
cfg: cfg,
|
||||
effects: eff,
|
||||
limits: limits,
|
||||
active: make(map[string]*activeSwap),
|
||||
inFlight: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
// OnRequest decides what to do with one incoming ServeHTTP request. It never
|
||||
// blocks indefinitely: any work that has to wait (starting a process, stopping
|
||||
// siblings, waiting for ready) is deferred to a swap goroutine and reported back
|
||||
// via OnSwapDone.
|
||||
//
|
||||
// The decision tree, in order:
|
||||
//
|
||||
// 1. Unknown model — respond with ErrModelNotFound and move on.
|
||||
// 2. A swap to the same model is already in flight — attach this waiter so
|
||||
// one swap serves all callers that asked for the same model.
|
||||
// 3. Fast path — the target process is already ready, the planner sees
|
||||
// nothing to evict, and no in-flight swap is evicting it. Hand back its
|
||||
// ServeHTTP immediately.
|
||||
// 4. Would collide with an in-flight swap (we'd stop their target, or they're
|
||||
// stopping us) — park in the queue for OnSwapDone to drain.
|
||||
// 5. Would evict a process that is still handling requests — park in the
|
||||
// queue. OnServeDone will retry when the busy process drains.
|
||||
// 6. Otherwise — start a new swap. This may run in parallel with other active
|
||||
// swaps when their evict sets don't intersect.
|
||||
func (s *FIFO) OnRequest(req HandlerReq) {
|
||||
// (1) Unknown model.
|
||||
state, ok := s.effects.ModelState(req.Model)
|
||||
if !ok {
|
||||
s.logger.Debugf("%s: model %s not handled by this router", s.name, req.Model)
|
||||
s.effects.GrantError(req, ErrModelNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// (2) Join an in-flight swap for the same model.
|
||||
if sw, ok := s.active[req.Model]; ok {
|
||||
s.logger.Debugf("%s: joining in-flight swap for model %s (%d waiters)", s.name, req.Model, len(sw.waiters)+1)
|
||||
sw.waiters = append(sw.waiters, req)
|
||||
return
|
||||
}
|
||||
|
||||
running := s.runningSet(req.Model)
|
||||
evict := s.planner.EvictionFor(req.Model, running)
|
||||
|
||||
// (3) Fast path: ready, nothing to evict, and nobody is evicting us.
|
||||
if state == process.StateReady && len(evict) == 0 && !collidesWith(req.Model, evict, s.active) {
|
||||
s.logger.Debugf("%s: fast-path serving model %s (already ready)", s.name, req.Model)
|
||||
s.grantHandler(req, req.Model)
|
||||
return
|
||||
}
|
||||
|
||||
// (4) Collision with an in-flight swap — queue.
|
||||
if collidesWith(req.Model, evict, s.active) {
|
||||
s.logger.Debugf("%s: queuing request for model %s (collides with in-flight swap)", s.name, req.Model)
|
||||
s.enqueue(req)
|
||||
return
|
||||
}
|
||||
|
||||
// (5) Would evict a busy process — queue until it drains.
|
||||
if conflictsWithInFlight(evict, s.inFlight) {
|
||||
s.logger.Debugf("%s: queuing request for model %s (would evict in-flight process)", s.name, req.Model)
|
||||
s.enqueue(req)
|
||||
return
|
||||
}
|
||||
|
||||
// (6) Start a new (possibly parallel) swap.
|
||||
s.logger.Debugf("%s: starting swap for model %s, evicting %v", s.name, req.Model, evict)
|
||||
s.startSwap(req, evict, running)
|
||||
}
|
||||
|
||||
// OnCancel removes a request whose client has disconnected from the queue and
|
||||
// from every in-flight swap's waiters. If the request was the sole waiter of an
|
||||
// active swap, the swap goroutine is left to complete on its own — OnSwapDone
|
||||
// will find no waiters and simply clean up. This prevents drainQueue from ever
|
||||
// starting a model load for a caller that is no longer there.
|
||||
func (s *FIFO) OnCancel(req HandlerReq) {
|
||||
removed := false
|
||||
|
||||
// Prune from the queue.
|
||||
if len(s.queued) > 0 {
|
||||
kept := s.queued[:0]
|
||||
for _, q := range s.queued {
|
||||
if q.Respond == req.Respond {
|
||||
removed = true
|
||||
continue
|
||||
}
|
||||
kept = append(kept, q)
|
||||
}
|
||||
s.queued = kept
|
||||
}
|
||||
|
||||
// Prune from any active swap's waiters.
|
||||
for _, sw := range s.active {
|
||||
filtered := sw.waiters[:0]
|
||||
for _, w := range sw.waiters {
|
||||
if w.Respond == req.Respond {
|
||||
removed = true
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, w)
|
||||
}
|
||||
sw.waiters = filtered
|
||||
}
|
||||
|
||||
if removed {
|
||||
s.logger.Debugf("%s: cancelled request for model %s pruned from scheduler", s.name, req.Model)
|
||||
broadcastQueuePositions(s.queued)
|
||||
}
|
||||
}
|
||||
|
||||
// OnSwapDone fans the result out to every waiter that joined this swap, removes
|
||||
// the swap from the active map, then walks the queue once, promoting any items
|
||||
// that no longer collide with the remaining active set. FIFO order is preserved:
|
||||
// items still blocked stay in place.
|
||||
func (s *FIFO) OnSwapDone(ev SwapDone) {
|
||||
sw, ok := s.active[ev.ModelID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
delete(s.active, ev.ModelID)
|
||||
|
||||
for _, w := range sw.waiters {
|
||||
if ev.Err != nil {
|
||||
s.effects.GrantError(w, ev.Err)
|
||||
} else {
|
||||
s.grantHandler(w, ev.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
s.drainQueue()
|
||||
}
|
||||
|
||||
// OnServeDone decrements the per-model in-flight count and, when that drops to
|
||||
// zero, retries the queue: requests whose swap was deferred because they would
|
||||
// have evicted this (now-idle) process can now proceed.
|
||||
func (s *FIFO) OnServeDone(ev ServeDoneEvent) {
|
||||
s.inFlight[ev.ModelID]--
|
||||
if s.inFlight[ev.ModelID] <= 0 {
|
||||
delete(s.inFlight, ev.ModelID)
|
||||
s.drainQueue()
|
||||
}
|
||||
}
|
||||
|
||||
// OnUnload reconciles router-owned state with the impending Stop, performs the
|
||||
// Stop (synchronously, via Effects) so callers of Unload remain blocked until
|
||||
// each targeted process has exited, then drains the queue.
|
||||
func (s *FIFO) OnUnload(targets []string, timeout time.Duration) {
|
||||
unloadErr := fmt.Errorf("%s: model unloaded", s.name)
|
||||
|
||||
targetSet := make(map[string]bool, len(targets))
|
||||
for _, id := range targets {
|
||||
targetSet[id] = true
|
||||
}
|
||||
|
||||
// Release waiters of any in-flight swap whose target is being unloaded.
|
||||
// The swap goroutine itself is left to finish on its own; when its
|
||||
// SwapDone arrives, OnSwapDone will find no entry in active and drop it.
|
||||
for id := range targetSet {
|
||||
sw, ok := s.active[id]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, w := range sw.waiters {
|
||||
s.effects.GrantError(w, unloadErr)
|
||||
}
|
||||
delete(s.active, id)
|
||||
}
|
||||
|
||||
// Drop queued requests addressed to unloaded models. Requests for other
|
||||
// models stay queued and may benefit from drainQueue at the end.
|
||||
if len(s.queued) > 0 {
|
||||
kept := s.queued[:0]
|
||||
for _, w := range s.queued {
|
||||
if targetSet[w.Model] {
|
||||
s.effects.GrantError(w, unloadErr)
|
||||
continue
|
||||
}
|
||||
kept = append(kept, w)
|
||||
}
|
||||
s.queued = kept
|
||||
}
|
||||
|
||||
// Stop the targeted processes. Done synchronously so Unload's caller can
|
||||
// rely on "after Unload returns, the process is stopped". inFlight is
|
||||
// intentionally NOT cleared here: each dying handler will fire its tracked
|
||||
// serve and reach OnServeDone in the normal way.
|
||||
s.effects.StopProcesses(timeout, targets)
|
||||
|
||||
// Removing entries from active above may have unblocked queued requests
|
||||
// that previously collided with the now-cancelled swaps.
|
||||
s.drainQueue()
|
||||
}
|
||||
|
||||
// OnShutdown grants err to every waiter still held by the scheduler.
|
||||
func (s *FIFO) OnShutdown(err error) {
|
||||
for _, sw := range s.active {
|
||||
for _, w := range sw.waiters {
|
||||
s.effects.GrantError(w, err)
|
||||
}
|
||||
}
|
||||
for _, w := range s.queued {
|
||||
s.effects.GrantError(w, err)
|
||||
}
|
||||
}
|
||||
|
||||
// grantHandler hands the caller a tracked handler for modelID and, only if the
|
||||
// caller was still there to receive it, bumps the in-flight count. Incrementing
|
||||
// when the grant failed would strand the counter and block future evictions.
|
||||
// Requests that would exceed the model's concurrency limit are rejected with a
|
||||
// shared.NewConcurrencyLimitError (HTTP 429 with Retry-After).
|
||||
func (s *FIFO) grantHandler(req HandlerReq, modelID string) {
|
||||
if s.inFlight[modelID] >= s.limit(modelID) {
|
||||
s.effects.GrantError(req, shared.ConcurrencyLimitError{})
|
||||
return
|
||||
}
|
||||
|
||||
if err := shared.SetReqData(req.Ctx, "fifo_priority", strconv.Itoa(s.cfg.Priority[req.Model])); err != nil {
|
||||
s.logger.Debugf("failed to set fifo_priority metadata: %v", err)
|
||||
}
|
||||
|
||||
if s.effects.GrantServe(req, modelID) {
|
||||
s.inFlight[modelID]++
|
||||
}
|
||||
}
|
||||
|
||||
// limit returns the per-model concurrency cap, defaulting to
|
||||
// defaultConcurrencyLimit when the model has no explicit entry.
|
||||
func (s *FIFO) limit(modelID string) int {
|
||||
if l, ok := s.limits[modelID]; ok {
|
||||
return l
|
||||
}
|
||||
return defaultConcurrencyLimit
|
||||
}
|
||||
|
||||
// startSwap records the swap as active and launches it via Effects. running is
|
||||
// the set EvictionFor saw, forwarded to OnSwapStart so the planner logs against
|
||||
// the same picture it decided on.
|
||||
func (s *FIFO) startSwap(initial HandlerReq, evict, running []string) {
|
||||
s.active[initial.Model] = &activeSwap{
|
||||
modelID: initial.Model,
|
||||
evict: evict,
|
||||
waiters: []HandlerReq{initial},
|
||||
}
|
||||
s.planner.OnSwapStart(initial.Model, running)
|
||||
s.effects.StartSwap(initial.Model, evict)
|
||||
}
|
||||
|
||||
// enqueue inserts req into the queue in priority order: it goes just before the
|
||||
// first queued item whose priority is strictly lower, so higher-priority models
|
||||
// are serviced first while equal-priority requests keep their arrival (FIFO)
|
||||
// order. Priorities come from the FifoConfig; unlisted models default to 0.
|
||||
func (s *FIFO) enqueue(req HandlerReq) {
|
||||
p := s.cfg.Priority[req.Model]
|
||||
i := len(s.queued)
|
||||
for j, q := range s.queued {
|
||||
if s.cfg.Priority[q.Model] < p {
|
||||
i = j
|
||||
break
|
||||
}
|
||||
}
|
||||
s.queued = append(s.queued, HandlerReq{})
|
||||
copy(s.queued[i+1:], s.queued[i:])
|
||||
s.queued[i] = req
|
||||
broadcastQueuePositions(s.queued)
|
||||
}
|
||||
|
||||
// drainQueue walks the queued requests in order, re-running the OnRequest
|
||||
// decision tree against the (now smaller) active set. Items that can now start
|
||||
// or join become satisfied; items still blocked remain queued in original order
|
||||
// so they get another chance on the next swap completion.
|
||||
func (s *FIFO) drainQueue() {
|
||||
if len(s.queued) == 0 {
|
||||
return
|
||||
}
|
||||
pending := s.queued
|
||||
var remaining []HandlerReq
|
||||
for _, req := range pending {
|
||||
state, ok := s.effects.ModelState(req.Model)
|
||||
if !ok {
|
||||
s.effects.GrantError(req, ErrModelNotFound)
|
||||
continue
|
||||
}
|
||||
if sw, ok := s.active[req.Model]; ok {
|
||||
s.logger.Debugf("%s: queued request for model %s now joining in-flight swap", s.name, req.Model)
|
||||
sw.waiters = append(sw.waiters, req)
|
||||
continue
|
||||
}
|
||||
running := s.runningSet(req.Model)
|
||||
evict := s.planner.EvictionFor(req.Model, running)
|
||||
if state == process.StateReady && len(evict) == 0 && !collidesWith(req.Model, evict, s.active) {
|
||||
s.logger.Debugf("%s: queued request for model %s now served fast-path", s.name, req.Model)
|
||||
s.grantHandler(req, req.Model)
|
||||
continue
|
||||
}
|
||||
if collidesWith(req.Model, evict, s.active) {
|
||||
remaining = append(remaining, req)
|
||||
continue
|
||||
}
|
||||
if conflictsWithInFlight(evict, s.inFlight) {
|
||||
remaining = append(remaining, req)
|
||||
continue
|
||||
}
|
||||
s.logger.Debugf("%s: queued request for model %s now starting swap, evicting %v", s.name, req.Model, evict)
|
||||
s.startSwap(req, evict, running)
|
||||
}
|
||||
s.queued = remaining
|
||||
broadcastQueuePositions(s.queued)
|
||||
}
|
||||
|
||||
// runningSet is the live model set handed to the Swapper: every process the
|
||||
// baseRouter reports as running, unioned with the targets of in-flight swaps
|
||||
// (excluding excludeActive, the model whose own swap is being decided — its
|
||||
// in-flight entry must not count as "already running"). The result is sorted so
|
||||
// eviction decisions derived from it are deterministic.
|
||||
func (s *FIFO) runningSet(excludeActive string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var out []string
|
||||
add := func(id string) {
|
||||
if _, dup := seen[id]; dup {
|
||||
return
|
||||
}
|
||||
seen[id] = struct{}{}
|
||||
out = append(out, id)
|
||||
}
|
||||
for id := range s.effects.RunningModels() {
|
||||
add(id)
|
||||
}
|
||||
for _, id := range activeTargets(s.active, excludeActive) {
|
||||
add(id)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// activeTargets returns the IDs of every in-flight swap target except exclude.
|
||||
// The planner uses this to account for models committed to but not yet reflected
|
||||
// in process state.
|
||||
func activeTargets(active map[string]*activeSwap, exclude string) []string {
|
||||
if len(active) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(active))
|
||||
for id := range active {
|
||||
if id == exclude {
|
||||
continue
|
||||
}
|
||||
out = append(out, id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// collidesWith reports whether a new swap with this target and evict set can
|
||||
// safely run alongside the currently active swaps. Same-target callers should
|
||||
// JOIN (handled before this) — they do not collide with themselves.
|
||||
func collidesWith(target string, evict []string, active map[string]*activeSwap) bool {
|
||||
for id, sw := range active {
|
||||
if id == target {
|
||||
continue
|
||||
}
|
||||
if containsString(evict, id) {
|
||||
return true
|
||||
}
|
||||
if containsString(sw.evict, target) {
|
||||
return true
|
||||
}
|
||||
if slicesOverlap(evict, sw.evict) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// slicesOverlap reports whether xs and ys share any common element.
|
||||
func slicesOverlap(xs, ys []string) bool {
|
||||
for _, x := range xs {
|
||||
if containsString(ys, x) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// conflictsWithInFlight reports whether any model in evict is still handling
|
||||
// requests. Stopping a busy process would cancel its callers' connections, so
|
||||
// the scheduler defers the swap until those callers finish.
|
||||
func conflictsWithInFlight(evict []string, inFlight map[string]int) bool {
|
||||
for _, m := range evict {
|
||||
if inFlight[m] > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containsString(xs []string, s string) bool {
|
||||
for _, x := range xs {
|
||||
if x == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// broadcastQueuePositions sends each queued request its current 1-indexed
|
||||
// position. Sends are non-blocking: if the channel is full, the old value is
|
||||
// drained first so the consumer always sees the latest position.
|
||||
func broadcastQueuePositions(queued []HandlerReq) {
|
||||
for i, req := range queued {
|
||||
pos := i + 1
|
||||
select {
|
||||
case req.PositionCh <- pos:
|
||||
default:
|
||||
select {
|
||||
case <-req.PositionCh:
|
||||
default:
|
||||
}
|
||||
select {
|
||||
case req.PositionCh <- pos:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,779 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// FIFO methods all run on the router's single run-loop goroutine, so these
|
||||
// tests drive them directly and synchronously. A swap is "completed" by calling
|
||||
// OnSwapDone, a served request "finishes" by calling OnServeDone — exactly the
|
||||
// events the run loop would deliver. fakeEffects records every side-effect and
|
||||
// stubPlanner supplies a fixed eviction set per target.
|
||||
|
||||
// stubPlanner returns a fixed eviction list per target.
|
||||
type stubPlanner struct {
|
||||
evict map[string][]string
|
||||
}
|
||||
|
||||
func (s *stubPlanner) EvictionFor(target string, _ []string) []string {
|
||||
if s.evict == nil {
|
||||
return nil
|
||||
}
|
||||
return s.evict[target]
|
||||
}
|
||||
|
||||
func (s *stubPlanner) OnSwapStart(string, []string) {}
|
||||
|
||||
// grantRec is one GrantError / GrantServe call. err!=nil marks an error grant;
|
||||
// otherwise it is a serve grant and serve reports whether the caller received it.
|
||||
type grantRec struct {
|
||||
model string
|
||||
err error
|
||||
serve bool
|
||||
}
|
||||
|
||||
type startRec struct {
|
||||
model string
|
||||
evict []string
|
||||
}
|
||||
|
||||
type stopRec struct {
|
||||
timeout time.Duration
|
||||
ids []string
|
||||
}
|
||||
|
||||
// fakeEffects is an in-memory scheduler.Effects. Tests program process states
|
||||
// and GrantServe outcomes, then assert on the recorded calls.
|
||||
type fakeEffects struct {
|
||||
states map[string]process.ProcessState // model -> state; missing => not handled
|
||||
serveResult map[string]bool // GrantServe return per model (default true)
|
||||
lastServeReq HandlerReq
|
||||
|
||||
starts []startRec
|
||||
grants []grantRec
|
||||
stops []stopRec
|
||||
}
|
||||
|
||||
func newFakeEffects() *fakeEffects {
|
||||
return &fakeEffects{
|
||||
states: map[string]process.ProcessState{},
|
||||
serveResult: map[string]bool{},
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fakeEffects) ModelState(modelID string) (process.ProcessState, bool) {
|
||||
st, ok := f.states[modelID]
|
||||
return st, ok
|
||||
}
|
||||
|
||||
func (f *fakeEffects) RunningModels() map[string]process.ProcessState {
|
||||
out := make(map[string]process.ProcessState)
|
||||
for id, st := range f.states {
|
||||
if st == process.StateStopped || st == process.StateShutdown {
|
||||
continue
|
||||
}
|
||||
out[id] = st
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (f *fakeEffects) StartSwap(modelID string, evict []string) {
|
||||
f.starts = append(f.starts, startRec{model: modelID, evict: evict})
|
||||
}
|
||||
|
||||
func (f *fakeEffects) GrantError(req HandlerReq, err error) {
|
||||
f.grants = append(f.grants, grantRec{model: req.Model, err: err})
|
||||
}
|
||||
|
||||
func (f *fakeEffects) GrantServe(req HandlerReq, modelID string) bool {
|
||||
ok := true
|
||||
if v, set := f.serveResult[modelID]; set {
|
||||
ok = v
|
||||
}
|
||||
f.lastServeReq = req
|
||||
f.grants = append(f.grants, grantRec{model: modelID, serve: ok})
|
||||
return ok
|
||||
}
|
||||
|
||||
func (f *fakeEffects) StopProcesses(timeout time.Duration, ids []string) {
|
||||
f.stops = append(f.stops, stopRec{timeout: timeout, ids: ids})
|
||||
}
|
||||
|
||||
// served counts grants that handed modelID a handler and were received.
|
||||
func (f *fakeEffects) served(modelID string) int {
|
||||
n := 0
|
||||
for _, g := range f.grants {
|
||||
if g.err == nil && g.serve && g.model == modelID {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// errored counts error grants, optionally filtered by model ("" = any).
|
||||
func (f *fakeEffects) errored(model string) int {
|
||||
n := 0
|
||||
for _, g := range f.grants {
|
||||
if g.err != nil && (model == "" || g.model == model) {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// startsFor counts StartSwap calls for modelID.
|
||||
func (f *fakeEffects) startsFor(modelID string) int {
|
||||
n := 0
|
||||
for _, s := range f.starts {
|
||||
if s.model == modelID {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func newFIFO(planner Swapper, eff Effects) *FIFO {
|
||||
return NewFIFO("test", logmon.NewWriter(io.Discard), planner, config.FifoConfig{}, nil, eff)
|
||||
}
|
||||
|
||||
func req(model string) HandlerReq { return HandlerReq{Model: model} }
|
||||
|
||||
// reqCh creates a HandlerReq with a unique Respond channel so OnCancel can
|
||||
// identify it among queued requests and swap waiters.
|
||||
func reqCh(model string) HandlerReq {
|
||||
return HandlerReq{
|
||||
Model: model,
|
||||
Respond: make(chan HandlerResp, 1),
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_FastPath(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
|
||||
if got := eff.startsFor("a"); got != 0 {
|
||||
t.Errorf("StartSwap calls=%d want 0 (fast path should not swap)", got)
|
||||
}
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Errorf("served(a)=%d want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_GrantSetsPriorityMetadata(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
cfg := config.FifoConfig{Priority: map[string]int{"a": 7}}
|
||||
s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
|
||||
|
||||
ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "a", Metadata: make(map[string]string)})
|
||||
s.OnRequest(HandlerReq{Model: "a", Ctx: ctx})
|
||||
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Fatalf("served(a)=%d want 1", got)
|
||||
}
|
||||
data, ok := shared.ReadContext(eff.lastServeReq.Ctx)
|
||||
if !ok {
|
||||
t.Fatal("context data missing from granted request")
|
||||
}
|
||||
if data.Metadata["fifo_priority"] != "7" {
|
||||
t.Errorf("fifo_priority = %q, want 7", data.Metadata["fifo_priority"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_ModelNotFound(t *testing.T) {
|
||||
eff := newFakeEffects() // no states => model unknown
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("ghost"))
|
||||
|
||||
if got := len(eff.starts); got != 0 {
|
||||
t.Errorf("StartSwap calls=%d want 0", got)
|
||||
}
|
||||
if eff.errored("ghost") != 1 {
|
||||
t.Fatalf("want 1 error grant for ghost, grants=%+v", eff.grants)
|
||||
}
|
||||
if !errors.Is(eff.grants[0].err, ErrModelNotFound) {
|
||||
t.Errorf("err=%v want ErrModelNotFound", eff.grants[0].err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_OnDemandStartThenServe(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
if got := eff.startsFor("a"); got != 1 {
|
||||
t.Fatalf("StartSwap(a)=%d want 1", got)
|
||||
}
|
||||
if got := eff.served("a"); got != 0 {
|
||||
t.Errorf("served(a)=%d want 0 before swap completes", got)
|
||||
}
|
||||
|
||||
// Swap finishes, model is now ready.
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Errorf("served(a)=%d want 1 after swap done", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_JoinInFlightSwap(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // starts swap
|
||||
s.OnRequest(req("a")) // joins
|
||||
s.OnRequest(req("a")) // joins
|
||||
|
||||
if got := eff.startsFor("a"); got != 1 {
|
||||
t.Fatalf("StartSwap(a)=%d want 1 (all three share one swap)", got)
|
||||
}
|
||||
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
if got := eff.served("a"); got != 3 {
|
||||
t.Errorf("served(a)=%d want 3 (one swap serves all waiters)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_SwapDoneError_FailsAllWaiters(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("a"))
|
||||
|
||||
s.OnSwapDone(SwapDone{ModelID: "a", Err: errors.New("boom")})
|
||||
|
||||
if eff.served("a") != 0 {
|
||||
t.Errorf("served(a)=%d want 0 on swap error", eff.served("a"))
|
||||
}
|
||||
if eff.errored("a") != 2 {
|
||||
t.Errorf("errored(a)=%d want 2 (both waiters fail)", eff.errored("a"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_QueueOnEvictionCollision covers a request whose target evicts the
|
||||
// model currently being swapped: it must queue until that swap finishes AND its
|
||||
// served request drains, because starting it would stop a busy process.
|
||||
func TestFIFO_QueueOnEvictionCollision(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
// Loading b evicts a.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a)
|
||||
s.OnRequest(req("b")) // collides with a's in-flight swap -> queue
|
||||
if got := eff.startsFor("b"); got != 0 {
|
||||
t.Fatalf("b started early: StartSwap(b)=%d want 0", got)
|
||||
}
|
||||
|
||||
// a becomes ready and is granted (now serving, inFlight[a]=1).
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
if got := eff.startsFor("b"); got != 0 {
|
||||
t.Fatalf("b started while a is serving: StartSwap(b)=%d want 0", got)
|
||||
}
|
||||
|
||||
// a's request finishes -> a no longer in-flight -> b may now swap.
|
||||
s.OnServeDone(ServeDoneEvent{ModelID: "a"})
|
||||
if got := eff.startsFor("b"); got != 1 {
|
||||
t.Fatalf("StartSwap(b)=%d want 1 after a drained", got)
|
||||
}
|
||||
if got := eff.starts[len(eff.starts)-1].evict; len(got) != 1 || got[0] != "a" {
|
||||
t.Errorf("b swap evict=%v want [a]", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_DisjointSwapsRunInParallel verifies two requests with
|
||||
// non-conflicting evict sets both start without waiting for each other.
|
||||
func TestFIFO_DisjointSwapsRunInParallel(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff) // empty evicts
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("b"))
|
||||
|
||||
if eff.startsFor("a") != 1 || eff.startsFor("b") != 1 {
|
||||
t.Fatalf("StartSwap a=%d b=%d want 1 each (parallel)", eff.startsFor("a"), eff.startsFor("b"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_OverlappingEvictSetsDoNotRunInParallel verifies two swaps with
|
||||
// different targets that evict the *same* model do not run concurrently: the
|
||||
// second must queue rather than double-evict the shared model. Neither target is
|
||||
// in the other's evict set, so this is only caught by the evict-set overlap
|
||||
// check in collidesWith.
|
||||
func TestFIFO_OverlappingEvictSetsDoNotRunInParallel(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
eff.states["x"] = process.StateReady // shared eviction target, running
|
||||
// Loading a or b both require evicting x.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"a": {"x"}, "b": {"x"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a, [x])
|
||||
s.OnRequest(req("b")) // overlaps a's evict set ([x]) -> queue
|
||||
if eff.startsFor("a") != 1 {
|
||||
t.Fatalf("StartSwap(a)=%d want 1", eff.startsFor("a"))
|
||||
}
|
||||
if got := eff.startsFor("b"); got != 0 {
|
||||
t.Fatalf("b started in parallel while a evicts x: StartSwap(b)=%d want 0", got)
|
||||
}
|
||||
|
||||
// a's swap completes and x is gone; b can now evict nothing and start.
|
||||
eff.states["a"] = process.StateReady
|
||||
eff.states["x"] = process.StateStopped
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
if got := eff.startsFor("b"); got != 1 {
|
||||
t.Fatalf("StartSwap(b)=%d want 1 after a's swap drained", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_QueueDrainPromotesMultiple verifies completing one swap unblocks
|
||||
// every queued request that no longer collides — they all start together.
|
||||
func TestFIFO_QueueDrainPromotesMultiple(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
eff.states["c"] = process.StateStopped
|
||||
// a's swap evicts both b and c; b and c evict nothing.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"a": {"b", "c"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a, [b,c])
|
||||
s.OnRequest(req("b")) // collides (in a's evict set) -> queue
|
||||
s.OnRequest(req("c")) // collides -> queue
|
||||
if eff.startsFor("b") != 0 || eff.startsFor("c") != 0 {
|
||||
t.Fatalf("b/c started early")
|
||||
}
|
||||
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
// b and c have empty evict sets and don't evict a, so both start now.
|
||||
if eff.startsFor("b") != 1 || eff.startsFor("c") != 1 {
|
||||
t.Fatalf("StartSwap b=%d c=%d want 1 each after a done", eff.startsFor("b"), eff.startsFor("c"))
|
||||
}
|
||||
if eff.served("a") != 1 {
|
||||
t.Errorf("served(a)=%d want 1", eff.served("a"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_QueueCollation verifies duplicate requests collapse into one swap
|
||||
// per model: the second request for each model joins the active swap (at arrival
|
||||
// or at drain time) rather than triggering its own swap.
|
||||
func TestFIFO_QueueCollation(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
for _, id := range []string{"a", "b", "c"} {
|
||||
eff.states[id] = process.StateStopped
|
||||
}
|
||||
// Each model evicts the other two: all swaps are mutually exclusive.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{
|
||||
"a": {"b", "c"},
|
||||
"b": {"a", "c"},
|
||||
"c": {"a", "b"},
|
||||
}}, eff)
|
||||
|
||||
for _, id := range []string{"a", "b", "c", "a", "b", "c"} {
|
||||
s.OnRequest(req(id))
|
||||
}
|
||||
|
||||
// Drain a, then its served requests, which promotes b; repeat for b -> c.
|
||||
drain := func(model string, waiters int) {
|
||||
eff.states[model] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: model})
|
||||
for i := 0; i < waiters; i++ {
|
||||
s.OnServeDone(ServeDoneEvent{ModelID: model})
|
||||
}
|
||||
}
|
||||
drain("a", 2)
|
||||
drain("b", 2)
|
||||
drain("c", 2)
|
||||
|
||||
for _, id := range []string{"a", "b", "c"} {
|
||||
if got := eff.startsFor(id); got != 1 {
|
||||
t.Errorf("StartSwap(%s)=%d want 1 (collation)", id, got)
|
||||
}
|
||||
if got := eff.served(id); got != 2 {
|
||||
t.Errorf("served(%s)=%d want 2", id, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_NoSwapWhileServing verifies a model still handling requests is not
|
||||
// evicted: the evicting request waits until every in-flight request drains.
|
||||
func TestFIFO_NoSwapWhileServing(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
eff.states["b"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // fast path, inFlight[a]=1
|
||||
s.OnRequest(req("a")) // fast path, inFlight[a]=2
|
||||
s.OnRequest(req("b")) // would evict busy a -> queue
|
||||
if eff.startsFor("b") != 0 {
|
||||
t.Fatalf("b started while a serving")
|
||||
}
|
||||
|
||||
s.OnServeDone(ServeDoneEvent{ModelID: "a"}) // inFlight[a]=1
|
||||
if eff.startsFor("b") != 0 {
|
||||
t.Fatalf("b started while a still serving one request")
|
||||
}
|
||||
|
||||
s.OnServeDone(ServeDoneEvent{ModelID: "a"}) // inFlight[a]=0
|
||||
if eff.startsFor("b") != 1 {
|
||||
t.Fatalf("StartSwap(b)=%d want 1 after a fully drained", eff.startsFor("b"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_GrantServeFalseDoesNotLeakInFlight verifies that when a caller has
|
||||
// walked away (GrantServe returns false) the in-flight count is not bumped, so a
|
||||
// later evicting request is not blocked forever.
|
||||
func TestFIFO_GrantServeFalseDoesNotLeakInFlight(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
eff.serveResult["a"] = false // a's waiter is gone by grant time
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"}) // grant fails, inFlight[a] stays 0
|
||||
|
||||
// b evicts a; since a is not in-flight, b should start immediately.
|
||||
s.OnRequest(req("b"))
|
||||
if eff.startsFor("b") != 1 {
|
||||
t.Fatalf("StartSwap(b)=%d want 1 (no leaked in-flight on a)", eff.startsFor("b"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_OnShutdown_FailsAllWaiters verifies shutdown errors every waiter the
|
||||
// scheduler holds: active-swap waiters and queued requests alike.
|
||||
func TestFIFO_OnShutdown_FailsAllWaiters(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
for _, id := range []string{"a", "b", "c"} {
|
||||
eff.states[id] = process.StateStopped
|
||||
}
|
||||
// a and b load in parallel; c collides with both and queues.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"c": {"a", "b"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a)
|
||||
s.OnRequest(req("a")) // join a
|
||||
s.OnRequest(req("b")) // StartSwap(b)
|
||||
s.OnRequest(req("b")) // join b
|
||||
s.OnRequest(req("c")) // queued
|
||||
|
||||
s.OnShutdown(errors.New("shutting down"))
|
||||
|
||||
if got := eff.errored(""); got != 5 {
|
||||
t.Errorf("error grants=%d want 5 (2 a + 2 b + 1 c)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_OnUnload_ReleasesActiveWaiters(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // active swap a with one waiter
|
||||
s.OnRequest(req("a")) // join
|
||||
|
||||
s.OnUnload([]string{"a"}, time.Second)
|
||||
|
||||
if got := eff.errored("a"); got != 2 {
|
||||
t.Errorf("errored(a)=%d want 2 (active swap waiters released)", got)
|
||||
}
|
||||
if len(eff.stops) != 1 || len(eff.stops[0].ids) != 1 || eff.stops[0].ids[0] != "a" {
|
||||
t.Errorf("StopProcesses=%+v want one call stopping [a]", eff.stops)
|
||||
}
|
||||
if eff.stops[0].timeout != time.Second {
|
||||
t.Errorf("StopProcesses timeout=%v want 1s", eff.stops[0].timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_OnUnload_DropsQueuedRequests(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
// b evicts a, so a request for b queues while a is loading.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a)
|
||||
s.OnRequest(req("b")) // queued
|
||||
|
||||
s.OnUnload([]string{"b"}, time.Second)
|
||||
|
||||
if got := eff.errored("b"); got != 1 {
|
||||
t.Errorf("errored(b)=%d want 1 (queued request dropped)", got)
|
||||
}
|
||||
if got := eff.startsFor("b"); got != 0 {
|
||||
t.Errorf("StartSwap(b)=%d want 0 (b should never start)", got)
|
||||
}
|
||||
// a's swap is untouched: its waiter is neither served nor errored yet.
|
||||
if eff.served("a") != 0 || eff.errored("a") != 0 {
|
||||
t.Errorf("a swap should be untouched: served=%d errored=%d", eff.served("a"), eff.errored("a"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_PriorityQueueOrder verifies queued requests are ordered by descending
|
||||
// priority, with arrival (FIFO) order preserved among equal-priority models.
|
||||
func TestFIFO_PriorityQueueOrder(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
for _, m := range []string{"z", "A", "B", "C", "D"} {
|
||||
eff.states[m] = process.StateStopped
|
||||
}
|
||||
// z's swap evicts every other model, so any request that arrives while z is
|
||||
// loading collides with z's in-flight swap and parks in the queue.
|
||||
planner := &stubPlanner{evict: map[string][]string{"z": {"A", "B", "C", "D"}}}
|
||||
cfg := config.FifoConfig{Priority: map[string]int{"A": 10, "B": 5, "C": 5, "D": 1}}
|
||||
s := NewFIFO("test", logmon.NewWriter(io.Discard), planner, cfg, nil, eff)
|
||||
|
||||
s.OnRequest(req("z")) // StartSwap(z, [A,B,C,D])
|
||||
|
||||
// Arrive out of priority order; B before C exercises FIFO tie-breaking.
|
||||
for _, m := range []string{"B", "D", "C", "A"} {
|
||||
s.OnRequest(req(m))
|
||||
}
|
||||
|
||||
got := make([]string, len(s.queued))
|
||||
for i, q := range s.queued {
|
||||
got[i] = q.Model
|
||||
}
|
||||
want := []string{"A", "B", "C", "D"}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("queue=%v want %v", got, want)
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("queue=%v want %v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_OnCancel_QueuedRequest verifies that cancelling a queued request
|
||||
// prevents drainQueue from ever starting a model load for it. Without OnCancel
|
||||
// the dead request would sit in the queue until a drain triggers a wasted swap.
|
||||
func TestFIFO_OnCancel_QueuedRequest(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
eff.states["b"] = process.StateStopped
|
||||
// b evicts a, so a request for b queues while a is loading.
|
||||
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||
|
||||
s.OnRequest(req("a")) // StartSwap(a)
|
||||
|
||||
cancelledReq := reqCh("b")
|
||||
s.OnRequest(cancelledReq) // queued (collides with a's in-flight swap)
|
||||
if len(s.queued) != 1 {
|
||||
t.Fatalf("queue len=%d want 1 before cancel", len(s.queued))
|
||||
}
|
||||
|
||||
// Client disconnects.
|
||||
s.OnCancel(cancelledReq)
|
||||
|
||||
if len(s.queued) != 0 {
|
||||
t.Fatalf("queue len=%d want 0 after cancel", len(s.queued))
|
||||
}
|
||||
|
||||
// a's swap finishes; drainQueue runs but b is gone — no swap for b.
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
if got := eff.startsFor("b"); got != 0 {
|
||||
t.Errorf("StartSwap(b)=%d want 0 (cancelled request should not trigger a load)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_OnCancel_SwapWaiter verifies that cancelling a request that joined an
|
||||
// in-flight swap removes it from the waiter list. When the swap completes, the
|
||||
// cancelled waiter receives no grant and does not bump the in-flight count.
|
||||
func TestFIFO_OnCancel_SwapWaiter(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
liveReq := reqCh("a")
|
||||
cancelledReq := reqCh("a")
|
||||
s.OnRequest(liveReq) // starts swap
|
||||
s.OnRequest(cancelledReq) // joins
|
||||
|
||||
if sw := s.active["a"]; len(sw.waiters) != 2 {
|
||||
t.Fatalf("waiters=%d want 2", len(sw.waiters))
|
||||
}
|
||||
|
||||
s.OnCancel(cancelledReq)
|
||||
|
||||
if sw := s.active["a"]; len(sw.waiters) != 1 {
|
||||
t.Fatalf("waiters=%d want 1 after cancel", len(sw.waiters))
|
||||
}
|
||||
|
||||
// Swap finishes: only the live waiter is granted.
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Errorf("served(a)=%d want 1 (only the non-cancelled waiter)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_OnCancel_NotPresent is a no-op: cancelling a request that was already
|
||||
// granted (and is no longer queued or waiting) must not affect anything.
|
||||
func TestFIFO_OnCancel_NotPresent(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
r := reqCh("a")
|
||||
s.OnRequest(r) // fast-path served immediately
|
||||
|
||||
// Cancel after grant — should be a harmless no-op.
|
||||
s.OnCancel(r)
|
||||
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Errorf("served(a)=%d want 1 (cancel of granted request is a no-op)", got)
|
||||
}
|
||||
if len(s.queued) != 0 {
|
||||
t.Errorf("queue should be empty, len=%d", len(s.queued))
|
||||
}
|
||||
}
|
||||
|
||||
// newFIFOWithLimit builds a FIFO whose single model has the given concurrency
|
||||
// limit, already in StateReady so every request exercises the fast path.
|
||||
func newFIFOWithLimit(t *testing.T, model string, limit int) (*FIFO, *fakeEffects) {
|
||||
t.Helper()
|
||||
eff := newFakeEffects()
|
||||
eff.states[model] = process.StateReady
|
||||
models := map[string]config.ModelConfig{
|
||||
model: {ConcurrencyLimit: limit},
|
||||
}
|
||||
s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, config.FifoConfig{}, models, eff)
|
||||
return s, eff
|
||||
}
|
||||
|
||||
// TestFIFO_ConcurrencyLimit_RejectsOverLimit verifies that a request arriving
|
||||
// while the model is at capacity gets an error grant instead of being served,
|
||||
// and that a new request succeeds once an in-flight one completes.
|
||||
func TestFIFO_ConcurrencyLimit_RejectsOverLimit(t *testing.T) {
|
||||
s, eff := newFIFOWithLimit(t, "a", 1)
|
||||
|
||||
// First request: served (inFlight 0 → 1).
|
||||
s.OnRequest(req("a"))
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Fatalf("served(a)=%d want 1", got)
|
||||
}
|
||||
|
||||
// Second request while slot is occupied: rejected with HTTPError 429.
|
||||
s.OnRequest(req("a"))
|
||||
if got := eff.errored("a"); got != 1 {
|
||||
t.Fatalf("errored(a)=%d want 1 (over-limit)", got)
|
||||
}
|
||||
var httpErr shared.HTTPError
|
||||
if !errors.As(eff.grants[len(eff.grants)-1].err, &httpErr) {
|
||||
t.Fatalf("err=%v want HTTPError", eff.grants[len(eff.grants)-1].err)
|
||||
}
|
||||
if httpErr.StatusCode() != http.StatusTooManyRequests {
|
||||
t.Fatalf("StatusCode()=%d want 429", httpErr.StatusCode())
|
||||
}
|
||||
if httpErr.Header().Get("Retry-After") == "" {
|
||||
t.Fatal("missing Retry-After header")
|
||||
}
|
||||
|
||||
// After the in-flight request finishes, a new request succeeds.
|
||||
s.OnServeDone(ServeDoneEvent{ModelID: "a"})
|
||||
s.OnRequest(req("a"))
|
||||
if got := eff.served("a"); got != 2 {
|
||||
t.Fatalf("served(a)=%d want 2 after drain", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_ConcurrencyLimit_DefaultIsTen verifies that a model without an
|
||||
// explicit ConcurrencyLimit gets the default cap of 10.
|
||||
func TestFIFO_ConcurrencyLimit_DefaultIsTen(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
// nil models → every model gets defaultConcurrencyLimit (10).
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
s.OnRequest(req("a"))
|
||||
}
|
||||
if got := eff.served("a"); got != 10 {
|
||||
t.Fatalf("served(a)=%d want 10 (default limit)", got)
|
||||
}
|
||||
|
||||
// 11th request is rejected.
|
||||
s.OnRequest(req("a"))
|
||||
if got := eff.errored("a"); got != 1 {
|
||||
t.Fatalf("errored(a)=%d want 1 (over default limit)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_ConcurrencyLimit_CustomLimit verifies a ConcurrencyLimit greater
|
||||
// than zero overrides the default.
|
||||
func TestFIFO_ConcurrencyLimit_CustomLimit(t *testing.T) {
|
||||
s, eff := newFIFOWithLimit(t, "a", 2)
|
||||
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("a"))
|
||||
|
||||
if got := eff.served("a"); got != 2 {
|
||||
t.Fatalf("served(a)=%d want 2 (custom limit)", got)
|
||||
}
|
||||
if got := eff.errored("a"); got != 1 {
|
||||
t.Fatalf("errored(a)=%d want 1 (over custom limit)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFIFO_ConcurrencyLimit_SwapWaiters verifies that when more swap waiters
|
||||
// exist than the concurrency limit, excess waiters are rejected on swap
|
||||
// completion rather than exceeding the limit.
|
||||
func TestFIFO_ConcurrencyLimit_SwapWaiters(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateStopped
|
||||
models := map[string]config.ModelConfig{
|
||||
"a": {ConcurrencyLimit: 2},
|
||||
}
|
||||
s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, config.FifoConfig{}, models, eff)
|
||||
|
||||
// Three requests arrive while model is loading: one starts swap, two join.
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("a"))
|
||||
s.OnRequest(req("a"))
|
||||
|
||||
if got := eff.startsFor("a"); got != 1 {
|
||||
t.Fatalf("StartSwap(a)=%d want 1", got)
|
||||
}
|
||||
|
||||
// Swap completes: two served (limit), one rejected.
|
||||
eff.states["a"] = process.StateReady
|
||||
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||
|
||||
if got := eff.served("a"); got != 2 {
|
||||
t.Fatalf("served(a)=%d want 2 (limit on swap completion)", got)
|
||||
}
|
||||
if got := eff.errored("a"); got != 1 {
|
||||
t.Fatalf("errored(a)=%d want 1 (excess waiter rejected)", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
// Package scheduler contains the request-scheduling strategies used by the
|
||||
// router's baseRouter. A Scheduler owns the queue, in-flight tracking, and the
|
||||
// decision tree for when to start a swap versus queue a request. The baseRouter
|
||||
// owns the channels, run loop, and process machinery, and exposes the
|
||||
// side-effects a scheduler needs through the Effects interface.
|
||||
//
|
||||
// Splitting these apart lets the scheduling strategy be swapped out
|
||||
// independently of both the process machinery (baseRouter) and the eviction
|
||||
// policy (Swapper). FIFO is the first and currently only implementation.
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// ErrModelNotFound is granted to callers whose model is not handled by this
|
||||
// router. It is an alias for shared.ErrNoLocalModelFound.
|
||||
var ErrModelNotFound = shared.ErrNoLocalModelFound
|
||||
|
||||
// Swapper is the eviction policy: it decides which running models must be
|
||||
// stopped before a target can serve. It is orthogonal to the scheduling
|
||||
// strategy — any Scheduler works with any Swapper.
|
||||
type Swapper interface {
|
||||
// EvictionFor returns running model IDs that must be stopped before
|
||||
// target can serve. running is the complete set the scheduler considers
|
||||
// live: every process that is not stopped, unioned with the targets of
|
||||
// in-flight swaps the scheduler has already committed to (which are not yet
|
||||
// visible in process state). The planner does not inspect process state
|
||||
// itself. Pure decision; must not log.
|
||||
EvictionFor(target string, running []string) []string
|
||||
|
||||
// OnSwapStart runs once at the start of every swap, with the same running
|
||||
// set EvictionFor was given for this decision. Planners may log their
|
||||
// decision here at whatever verbosity they choose.
|
||||
OnSwapStart(target string, running []string)
|
||||
}
|
||||
|
||||
// Scheduler decides what happens to each event the router's run loop receives.
|
||||
// All methods run on that single run-loop goroutine, so implementations need no
|
||||
// internal locking for their own state.
|
||||
type Scheduler interface {
|
||||
// OnRequest handles one incoming ServeHTTP request.
|
||||
OnRequest(req HandlerReq)
|
||||
// OnCancel handles a request whose client has disconnected before it was
|
||||
// granted. The scheduler must remove the request from its queue and from
|
||||
// any in-flight swap's waiters so it never triggers a model load or grant
|
||||
// for a caller that is no longer there.
|
||||
OnCancel(req HandlerReq)
|
||||
// OnSwapDone handles a swap goroutine reporting completion.
|
||||
OnSwapDone(ev SwapDone)
|
||||
// OnServeDone handles a tracked ServeHTTP finishing (in-flight decrement).
|
||||
OnServeDone(ev ServeDoneEvent)
|
||||
// OnUnload reconciles scheduler state for an unload, stops the targeted
|
||||
// processes via Effects, and drains the queue. It must block until the
|
||||
// targeted processes have stopped.
|
||||
OnUnload(targets []string, timeout time.Duration)
|
||||
// OnShutdown grants err to every waiter the scheduler still holds (active
|
||||
// swap waiters and queued requests). Process teardown is the baseRouter's
|
||||
// responsibility.
|
||||
OnShutdown(err error)
|
||||
}
|
||||
|
||||
// Effects is implemented by the baseRouter. The scheduler calls back through it
|
||||
// for every side-effect: inspecting process state, launching swaps, responding
|
||||
// to callers, and stopping processes.
|
||||
type Effects interface {
|
||||
// ModelState returns the current state of a model's process. ok is false
|
||||
// when the model is not handled by this router.
|
||||
ModelState(modelID string) (process.ProcessState, bool)
|
||||
// RunningModels returns the state of every process that is not stopped or
|
||||
// shut down, keyed by model ID. The scheduler uses it to build the running
|
||||
// set it hands the Swapper.
|
||||
RunningModels() map[string]process.ProcessState
|
||||
// StartSwap launches the swap goroutine for modelID, stopping evict first.
|
||||
StartSwap(modelID string, evict []string)
|
||||
// GrantError responds to a caller with an error.
|
||||
GrantError(req HandlerReq, err error)
|
||||
// GrantServe hands a caller the wrapped handler for modelID and reports
|
||||
// whether the caller was still there to receive it. The scheduler bumps
|
||||
// its in-flight count only when this returns true.
|
||||
GrantServe(req HandlerReq, modelID string) bool
|
||||
// StopProcesses stops the named processes in parallel and blocks until all
|
||||
// have stopped. Unknown IDs are skipped.
|
||||
StopProcesses(timeout time.Duration, ids []string)
|
||||
}
|
||||
|
||||
// New returns a Scheduler selected by conf.Routing.Scheduler.Use, configured
|
||||
// from conf and bound to the given planner and effects. Currently only "fifo"
|
||||
// (the default) is supported.
|
||||
func New(conf config.Config, name string, logger *logmon.Monitor, planner Swapper, eff Effects) (Scheduler, error) {
|
||||
use := conf.Routing.Scheduler.Use
|
||||
if use == "" {
|
||||
use = "fifo"
|
||||
}
|
||||
switch use {
|
||||
case "fifo":
|
||||
return NewFIFO(name, logger, planner, conf.Routing.Scheduler.Settings.Fifo, conf.Models, eff), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported scheduler type: %q", use)
|
||||
}
|
||||
}
|
||||
|
||||
// HandlerReq is one in-flight ServeHTTP request waiting for a routing decision.
|
||||
type HandlerReq struct {
|
||||
Model string
|
||||
Ctx context.Context
|
||||
Respond chan HandlerResp
|
||||
PositionCh chan int
|
||||
}
|
||||
|
||||
// HandlerResp is the routing decision returned to a HandlerReq's caller: either
|
||||
// a handler to serve with, or an error.
|
||||
type HandlerResp struct {
|
||||
HandleFunc http.HandlerFunc
|
||||
Err error
|
||||
}
|
||||
|
||||
// SwapDone is reported by a swap goroutine when its target is ready (or failed).
|
||||
type SwapDone struct {
|
||||
ModelID string
|
||||
Err error
|
||||
}
|
||||
|
||||
// ServeDoneEvent is reported when a tracked ServeHTTP handler returns.
|
||||
type ServeDoneEvent struct {
|
||||
ModelID string
|
||||
}
|
||||
+149
-43
@@ -2,6 +2,7 @@ package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -9,7 +10,7 @@ import (
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
@@ -18,13 +19,118 @@ const apiUnloadTimeout = 10 * time.Second
|
||||
|
||||
// modelRecord is one entry in the OpenAI-compatible /v1/models listing.
|
||||
type modelRecord struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int64 `json:"created"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Architecture map[string]any `json:"architecture,omitempty"`
|
||||
Capabilities map[string]any `json:"capabilities,omitempty"`
|
||||
SupportedParameters []string `json:"supported_parameters,omitempty"`
|
||||
ContextLength int `json:"context_length,omitempty"`
|
||||
Meta map[string]any `json:"meta,omitempty"`
|
||||
}
|
||||
|
||||
// cappedMetadataKeys are top-level /v1/models fields produced by the
|
||||
// capabilities renderer. If a model's metadata block defines any of these
|
||||
// keys, the renderer's values win and the metadata keys are dropped.
|
||||
var cappedMetadataKeys = map[string]struct{}{
|
||||
"architecture": {},
|
||||
"capabilities": {},
|
||||
"supported_parameters": {},
|
||||
"context_length": {},
|
||||
}
|
||||
|
||||
// renderCapabilities converts a model's capabilities config into additional
|
||||
// /v1/models fields. Returns zero values when caps.Empty() is true.
|
||||
func renderCapabilities(caps config.ModelCapConfig) (arch map[string]any, capsMap map[string]any, params []string, ctxLen int) {
|
||||
if caps.Empty() {
|
||||
return
|
||||
}
|
||||
|
||||
hasIn := len(caps.In) > 0
|
||||
hasOut := len(caps.Out) > 0
|
||||
|
||||
if hasIn || hasOut {
|
||||
arch = make(map[string]any)
|
||||
}
|
||||
if hasIn {
|
||||
arch["input_modalities"] = caps.In
|
||||
}
|
||||
if hasOut {
|
||||
arch["output_modalities"] = caps.Out
|
||||
}
|
||||
if hasIn && hasOut {
|
||||
arch["modality"] = strings.Join(caps.In, "+") + "->" + strings.Join(caps.Out, "+")
|
||||
}
|
||||
|
||||
// Build capabilities map only if there's something to put in it.
|
||||
if hasIn || hasOut || caps.Tools || caps.Reranker {
|
||||
capsMap = make(map[string]any)
|
||||
}
|
||||
|
||||
if hasIn {
|
||||
if contains(caps.In, "image") {
|
||||
capsMap["vision"] = true
|
||||
}
|
||||
}
|
||||
if hasIn && hasOut {
|
||||
if contains(caps.In, "audio") && contains(caps.Out, "text") {
|
||||
capsMap["audio_transcriptions"] = true
|
||||
}
|
||||
if contains(caps.In, "text") && contains(caps.Out, "audio") {
|
||||
capsMap["audio_speech"] = true
|
||||
}
|
||||
if contains(caps.In, "text") && contains(caps.Out, "image") {
|
||||
capsMap["image_generation"] = true
|
||||
}
|
||||
if contains(caps.In, "image") && contains(caps.Out, "image") {
|
||||
capsMap["image_to_image"] = true
|
||||
}
|
||||
}
|
||||
|
||||
if caps.Tools {
|
||||
capsMap["function_calling"] = true
|
||||
params = []string{"tools", "tool_choice"}
|
||||
}
|
||||
|
||||
if caps.Reranker {
|
||||
capsMap["reranker"] = true
|
||||
}
|
||||
|
||||
if caps.Context > 0 {
|
||||
ctxLen = caps.Context
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// contains reports whether s is present in ss.
|
||||
func contains(ss []string, s string) bool {
|
||||
for _, v := range ss {
|
||||
if v == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// filterCappedMetadata returns metadata with renderer-owned keys removed.
|
||||
func filterCappedMetadata(md map[string]any) map[string]any {
|
||||
if len(md) == 0 {
|
||||
return nil
|
||||
}
|
||||
filtered := make(map[string]any, len(md))
|
||||
for k, v := range md {
|
||||
if _, capped := cappedMetadataKeys[k]; !capped {
|
||||
filtered[k] = v
|
||||
}
|
||||
}
|
||||
if len(filtered) == 0 {
|
||||
return nil
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
// handleListModels serves the OpenAI-compatible model listing: local models
|
||||
@@ -33,7 +139,7 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
|
||||
created := time.Now().Unix()
|
||||
data := make([]modelRecord, 0, len(s.cfg.Models))
|
||||
|
||||
newRecord := func(id, name, description string, metadata map[string]any) modelRecord {
|
||||
newRecord := func(id, name, description string, metadata map[string]any, caps config.ModelCapConfig) modelRecord {
|
||||
rec := modelRecord{
|
||||
ID: id,
|
||||
Object: "model",
|
||||
@@ -42,6 +148,10 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
|
||||
Name: strings.TrimSpace(name),
|
||||
Description: strings.TrimSpace(description),
|
||||
}
|
||||
rec.Architecture, rec.Capabilities, rec.SupportedParameters, rec.ContextLength = renderCapabilities(caps)
|
||||
if !caps.Empty() {
|
||||
metadata = filterCappedMetadata(metadata)
|
||||
}
|
||||
if len(metadata) > 0 {
|
||||
rec.Meta = map[string]any{"llamaswap": metadata}
|
||||
}
|
||||
@@ -52,12 +162,12 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
|
||||
if mc.Unlisted {
|
||||
continue
|
||||
}
|
||||
data = append(data, newRecord(id, mc.Name, mc.Description, mc.Metadata))
|
||||
data = append(data, newRecord(id, mc.Name, mc.Description, mc.Metadata, mc.Capabilities))
|
||||
|
||||
if s.cfg.IncludeAliasesInList {
|
||||
for _, alias := range mc.Aliases {
|
||||
if alias := strings.TrimSpace(alias); alias != "" {
|
||||
data = append(data, newRecord(alias, mc.Name, mc.Description, mc.Metadata))
|
||||
data = append(data, newRecord(alias, mc.Name, mc.Description, mc.Metadata, mc.Capabilities))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,7 +175,7 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
for peerID, peer := range s.cfg.Peers {
|
||||
for _, modelID := range peer.Models {
|
||||
data = append(data, newRecord(modelID, peerID+": "+modelID, "", map[string]any{"peerID": peerID}))
|
||||
data = append(data, newRecord(modelID, peerID+": "+modelID, "", map[string]any{"peerID": peerID}, config.ModelCapConfig{}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,7 +273,7 @@ func (s *Server) startPreload() {
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
req = req.WithContext(router.SetContext(req.Context(), router.ReqContextData{Model: modelID, ModelID: modelID}))
|
||||
req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID, Metadata: make(map[string]string)}))
|
||||
|
||||
dw := &discardResponseWriter{status: http.StatusOK}
|
||||
s.local.ServeHTTP(dw, req)
|
||||
@@ -206,9 +316,9 @@ func handleUpstreamRedirect(w http.ResponseWriter, r *http.Request) {
|
||||
func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
|
||||
upstreamPath := r.PathValue("upstreamPath")
|
||||
|
||||
searchName, modelID, remainingPath, found := findModelInPath(s.cfg, "/"+upstreamPath)
|
||||
searchName, modelID, remainingPath, found := shared.FindModelInPath(s.cfg, "/"+upstreamPath)
|
||||
if !found {
|
||||
router.SendResponse(w, r, http.StatusNotFound, "model not found")
|
||||
shared.SendResponse(w, r, http.StatusNotFound, "model not found")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -230,7 +340,29 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
|
||||
// Strip the /upstream/<model> prefix before forwarding.
|
||||
r.URL.Path = remainingPath
|
||||
// Pin the resolved model so the router skips body/query extraction.
|
||||
*r = *r.WithContext(router.SetContext(r.Context(), router.ReqContextData{Model: searchName, ModelID: modelID}))
|
||||
*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID, Metadata: make(map[string]string)}))
|
||||
|
||||
// If the path matches an upstream.ignorePaths entry and the model is
|
||||
// not already loaded, refuse the request without triggering a swap. The
|
||||
// server was not able to process the response because the model was not
|
||||
// already loaded.
|
||||
for _, re := range s.cfg.Upstream.IgnorePaths {
|
||||
if !re.MatchString(remainingPath) {
|
||||
continue
|
||||
}
|
||||
if s.local.Handles(modelID) {
|
||||
state, ok := s.local.RunningModels()[modelID]
|
||||
if !ok || state != process.StateReady {
|
||||
shared.SendResponse(w, r, http.StatusConflict,
|
||||
fmt.Sprintf("model %s is not loaded; path matches upstream.ignorePaths", modelID))
|
||||
return
|
||||
}
|
||||
}
|
||||
// Either the model is already loaded (no swap would be triggered)
|
||||
// or this is a peer model (peer proxying never swaps). Fall through
|
||||
// to normal dispatch.
|
||||
break
|
||||
}
|
||||
|
||||
switch {
|
||||
case s.local.Handles(modelID):
|
||||
@@ -238,32 +370,6 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
|
||||
case s.peer.Handles(modelID):
|
||||
s.peer.ServeHTTP(w, r)
|
||||
default:
|
||||
router.SendResponse(w, r, http.StatusNotFound, "no router for model "+modelID)
|
||||
shared.SendResponse(w, r, http.StatusNotFound, "no router for model "+modelID)
|
||||
}
|
||||
}
|
||||
|
||||
// findModelInPath walks a slash-separated path, building up segments until one
|
||||
// matches a configured model. This resolves model names that contain slashes
|
||||
// (e.g. "author/model"). Returns the matched name, its real model ID, the
|
||||
// remaining path, and whether a match was found.
|
||||
func findModelInPath(cfg config.Config, path string) (searchName, realName, remainingPath string, found bool) {
|
||||
parts := strings.Split(strings.TrimSpace(path), "/")
|
||||
name := ""
|
||||
|
||||
for i, part := range parts {
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
if name == "" {
|
||||
name = part
|
||||
} else {
|
||||
name = name + "/" + part
|
||||
}
|
||||
|
||||
if modelID, ok := cfg.RealModelName(name); ok {
|
||||
return name, modelID, "/" + strings.Join(parts[i+1:], "/"), true
|
||||
}
|
||||
}
|
||||
|
||||
return "", "", "", false
|
||||
}
|
||||
|
||||
+428
-2
@@ -2,11 +2,17 @@ package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
func TestServer_HandleListModels(t *testing.T) {
|
||||
@@ -78,6 +84,7 @@ func TestServer_HandleListModels_Aliases(t *testing.T) {
|
||||
|
||||
func TestServer_FindModelInPath(t *testing.T) {
|
||||
cfg := config.Config{Models: map[string]config.ModelConfig{
|
||||
"author": {},
|
||||
"author/model": {},
|
||||
"simple": {},
|
||||
}}
|
||||
@@ -91,13 +98,14 @@ func TestServer_FindModelInPath(t *testing.T) {
|
||||
{"/simple/v1/chat", "simple", "/v1/chat", true},
|
||||
{"/author/model/v1/chat", "author/model", "/v1/chat", true},
|
||||
{"/author/model", "author/model", "/", true},
|
||||
{"/author/v1/chat", "author", "/v1/chat", true},
|
||||
{"/missing/v1", "", "", false},
|
||||
{"/", "", "", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
name, _, rem, found := findModelInPath(cfg, c.path)
|
||||
name, _, rem, found := shared.FindModelInPath(cfg, c.path)
|
||||
if found != c.wantFound || name != c.wantName || (found && rem != c.wantRem) {
|
||||
t.Errorf("findModelInPath(%q) = (%q,%q,%v), want (%q,%q,%v)",
|
||||
t.Errorf("FindModelInPath(%q) = (%q,%q,%v), want (%q,%q,%v)",
|
||||
c.path, name, rem, found, c.wantName, c.wantRem, c.wantFound)
|
||||
}
|
||||
}
|
||||
@@ -133,6 +141,165 @@ func TestServer_HandleUpstream(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func upstreamMetricsServer(response string) *Server {
|
||||
cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
|
||||
proxylog := logmon.NewWriter(io.Discard)
|
||||
s := &Server{
|
||||
cfg: cfg,
|
||||
muxlog: logmon.NewWriter(io.Discard),
|
||||
proxylog: proxylog,
|
||||
upstreamlog: logmon.NewWriter(io.Discard),
|
||||
inflight: &inflightCounter{},
|
||||
metrics: newMetricsMonitor(proxylog, 10, 0),
|
||||
local: newStubRouter([]string{"m1"}, response),
|
||||
peer: newStubRouter(nil, ""),
|
||||
}
|
||||
s.routes()
|
||||
return s
|
||||
}
|
||||
|
||||
func TestServer_HandleUpstream_IgnorePaths(t *testing.T) {
|
||||
// Compile a pattern that matches static asset suffixes.
|
||||
pattern := regexp.MustCompile(`.*\.(js|json|css|png|gif|jpg|jpeg|txt)$`)
|
||||
|
||||
t.Run("matched path, model not loaded, returns 409", func(t *testing.T) {
|
||||
local := newStubRouter([]string{"m1"}, "upstream-body")
|
||||
// running is nil/empty: model is not in RunningModels() => not loaded.
|
||||
s := newTestServer(local, newStubRouter(nil, ""))
|
||||
s.cfg = config.Config{
|
||||
Models: map[string]config.ModelConfig{"m1": {}},
|
||||
Upstream: config.UpstreamConfig{
|
||||
IgnorePaths: []*regexp.Regexp{pattern},
|
||||
},
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
|
||||
|
||||
if w.Code != http.StatusConflict {
|
||||
t.Fatalf("status = %d, want %d (body=%q)", w.Code, http.StatusConflict, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "not loaded") {
|
||||
t.Errorf("body = %q, want it to contain 'not loaded'", w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("matched path, model already loaded, serves normally", func(t *testing.T) {
|
||||
local := newStubRouter([]string{"m1"}, "upstream-body")
|
||||
local.running = map[string]process.ProcessState{"m1": process.StateReady}
|
||||
s := newTestServer(local, newStubRouter(nil, ""))
|
||||
s.cfg = config.Config{
|
||||
Models: map[string]config.ModelConfig{"m1": {}},
|
||||
Upstream: config.UpstreamConfig{
|
||||
IgnorePaths: []*regexp.Regexp{pattern},
|
||||
},
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
|
||||
|
||||
if w.Code != http.StatusOK || w.Body.String() != "upstream-body" {
|
||||
t.Fatalf("status=%d body=%q, want 200 'upstream-body'", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("non-matched path, model not loaded, serves normally", func(t *testing.T) {
|
||||
local := newStubRouter([]string{"m1"}, "upstream-body")
|
||||
s := newTestServer(local, newStubRouter(nil, ""))
|
||||
s.cfg = config.Config{
|
||||
Models: map[string]config.ModelConfig{"m1": {}},
|
||||
Upstream: config.UpstreamConfig{
|
||||
IgnorePaths: []*regexp.Regexp{pattern},
|
||||
},
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/v1/chat/completions", nil))
|
||||
|
||||
if w.Code != http.StatusOK || w.Body.String() != "upstream-body" {
|
||||
t.Fatalf("status=%d body=%q, want 200 'upstream-body'", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("matched path, peer model, serves normally", func(t *testing.T) {
|
||||
// Peer routers do not appear via RunningModels on the local router;
|
||||
// they should fall through to normal dispatch without 409.
|
||||
local := newStubRouter(nil, "")
|
||||
peer := newStubRouter([]string{"m1"}, "peer-body")
|
||||
s := newTestServer(local, peer)
|
||||
s.cfg = config.Config{
|
||||
Models: map[string]config.ModelConfig{"m1": {}},
|
||||
Upstream: config.UpstreamConfig{
|
||||
IgnorePaths: []*regexp.Regexp{pattern},
|
||||
},
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/foo.js", nil))
|
||||
|
||||
if w.Code != http.StatusOK || w.Body.String() != "peer-body" {
|
||||
t.Fatalf("status=%d body=%q, want 200 'peer-body'", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestServer_HandleUpstream_MetricsRecordsSupportedPath(t *testing.T) {
|
||||
resp := `{"usage":{"prompt_tokens":3,"completion_tokens":5}}`
|
||||
s := upstreamMetricsServer(resp)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/chat/completions", strings.NewReader(`{}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
s.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK || w.Body.String() != resp {
|
||||
t.Fatalf("status=%d body=%q", w.Code, w.Body.String())
|
||||
}
|
||||
entries := s.metrics.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 metrics entry, got %d", len(entries))
|
||||
}
|
||||
if entries[0].Model != "m1" {
|
||||
t.Errorf("model = %q, want m1", entries[0].Model)
|
||||
}
|
||||
if entries[0].ReqPath != "/v1/chat/completions" {
|
||||
t.Errorf("req_path = %q, want /v1/chat/completions", entries[0].ReqPath)
|
||||
}
|
||||
if entries[0].Tokens.InputTokens != 3 || entries[0].Tokens.OutputTokens != 5 {
|
||||
t.Errorf("tokens = %+v, want input=3 output=5", entries[0].Tokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_HandleUpstream_MetricsSkipsUnsupportedPath(t *testing.T) {
|
||||
s := upstreamMetricsServer("ok")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/upstream/m1/probe", strings.NewReader(`{}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
s.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK || w.Body.String() != "ok" {
|
||||
t.Fatalf("status=%d body=%q", w.Code, w.Body.String())
|
||||
}
|
||||
if len(s.metrics.getMetrics()) != 0 {
|
||||
t.Errorf("want no metrics entries for unsupported path, got %d", len(s.metrics.getMetrics()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_HandleUpstream_MetricsSkipsGET(t *testing.T) {
|
||||
s := upstreamMetricsServer(`{"usage":{}}`)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/upstream/m1/v1/chat/completions", nil))
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", w.Code)
|
||||
}
|
||||
if len(s.metrics.getMetrics()) != 0 {
|
||||
t.Errorf("want no metrics entries for GET upstream, got %d", len(s.metrics.getMetrics()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_HandleMetrics_Unavailable(t *testing.T) {
|
||||
s := newTestServer(newStubRouter(nil, ""), newStubRouter(nil, ""))
|
||||
|
||||
@@ -157,3 +324,262 @@ func TestServer_Redirects(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_HandleListModels_Capabilities(t *testing.T) {
|
||||
newServer := func(mc config.ModelConfig) *Server {
|
||||
s := newTestServer(newStubRouter(nil, ""), newStubRouter(nil, ""))
|
||||
s.cfg = config.Config{Models: map[string]config.ModelConfig{"m": mc}}
|
||||
return s
|
||||
}
|
||||
getModel := func(t *testing.T, s *Server) modelRecord {
|
||||
t.Helper()
|
||||
w := httptest.NewRecorder()
|
||||
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/v1/models", nil))
|
||||
var resp struct {
|
||||
Data []modelRecord `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if len(resp.Data) != 1 {
|
||||
t.Fatalf("expected 1 model, got %d", len(resp.Data))
|
||||
}
|
||||
return resp.Data[0]
|
||||
}
|
||||
|
||||
t.Run("all_fields", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{
|
||||
In: []string{"text", "image"},
|
||||
Out: []string{"text", "audio"},
|
||||
Tools: true,
|
||||
Context: 100000,
|
||||
},
|
||||
}))
|
||||
if m.Architecture == nil {
|
||||
t.Fatal("architecture is nil")
|
||||
}
|
||||
if !anySliceStrEqual(m.Architecture["input_modalities"], []string{"text", "image"}) {
|
||||
t.Errorf("input_modalities = %v", m.Architecture["input_modalities"])
|
||||
}
|
||||
if !anySliceStrEqual(m.Architecture["output_modalities"], []string{"text", "audio"}) {
|
||||
t.Errorf("output_modalities = %v", m.Architecture["output_modalities"])
|
||||
}
|
||||
if m.Architecture["modality"] != "text+image->text+audio" {
|
||||
t.Errorf("modality = %v", m.Architecture["modality"])
|
||||
}
|
||||
if m.Capabilities == nil || m.Capabilities["vision"] != true {
|
||||
t.Errorf("vision = %v", m.Capabilities)
|
||||
}
|
||||
if m.Capabilities["audio_speech"] != true {
|
||||
t.Errorf("audio_speech = %v", m.Capabilities["audio_speech"])
|
||||
}
|
||||
if m.Capabilities["function_calling"] != true {
|
||||
t.Errorf("function_calling = %v", m.Capabilities["function_calling"])
|
||||
}
|
||||
if !stringSliceEqual(m.SupportedParameters, []string{"tools", "tool_choice"}) {
|
||||
t.Errorf("supported_parameters = %v", m.SupportedParameters)
|
||||
}
|
||||
if m.ContextLength != 100000 {
|
||||
t.Errorf("context_length = %d", m.ContextLength)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("in_only", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{In: []string{"text", "image"}},
|
||||
}))
|
||||
if m.Architecture == nil {
|
||||
t.Fatal("architecture is nil")
|
||||
}
|
||||
if _, ok := m.Architecture["output_modalities"]; ok {
|
||||
t.Error("should not have output_modalities")
|
||||
}
|
||||
if _, ok := m.Architecture["modality"]; ok {
|
||||
t.Error("should not have modality")
|
||||
}
|
||||
if m.Capabilities == nil || m.Capabilities["vision"] != true {
|
||||
t.Error("expected vision: true")
|
||||
}
|
||||
if m.SupportedParameters != nil {
|
||||
t.Error("should not have supported_parameters")
|
||||
}
|
||||
if m.ContextLength != 0 {
|
||||
t.Error("should not have context_length")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("out_only", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{Out: []string{"audio"}},
|
||||
}))
|
||||
if m.Architecture == nil {
|
||||
t.Fatal("architecture is nil")
|
||||
}
|
||||
if _, ok := m.Architecture["input_modalities"]; ok {
|
||||
t.Error("should not have input_modalities")
|
||||
}
|
||||
if len(m.Capabilities) > 0 {
|
||||
t.Errorf("expected no capabilities, got %v", m.Capabilities)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("tools", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{Tools: true},
|
||||
}))
|
||||
if m.Capabilities == nil || m.Capabilities["function_calling"] != true {
|
||||
t.Error("expected function_calling: true")
|
||||
}
|
||||
if !stringSliceEqual(m.SupportedParameters, []string{"tools", "tool_choice"}) {
|
||||
t.Errorf("supported_parameters = %v", m.SupportedParameters)
|
||||
}
|
||||
if m.Architecture != nil {
|
||||
t.Error("should not have architecture")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("reranker", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{Reranker: true},
|
||||
}))
|
||||
if m.Capabilities == nil || m.Capabilities["reranker"] != true {
|
||||
t.Error("expected reranker: true")
|
||||
}
|
||||
if m.Architecture != nil {
|
||||
t.Error("should not have architecture")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("context", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{Context: 32768},
|
||||
}))
|
||||
if m.ContextLength != 32768 {
|
||||
t.Errorf("context_length = %d", m.ContextLength)
|
||||
}
|
||||
if m.Architecture != nil {
|
||||
t.Error("should not have architecture")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("audio_transcriptions", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{In: []string{"audio"}, Out: []string{"text"}},
|
||||
}))
|
||||
if m.Capabilities == nil || m.Capabilities["audio_transcriptions"] != true {
|
||||
t.Error("expected audio_transcriptions: true")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("image_generation", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{In: []string{"text"}, Out: []string{"image"}},
|
||||
}))
|
||||
if m.Capabilities == nil || m.Capabilities["image_generation"] != true {
|
||||
t.Error("expected image_generation: true")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("image_to_image", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{In: []string{"image"}, Out: []string{"image"}},
|
||||
}))
|
||||
if m.Capabilities == nil || m.Capabilities["image_to_image"] != true {
|
||||
t.Error("expected image_to_image: true")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty_skip", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{}))
|
||||
if m.Architecture != nil {
|
||||
t.Error("should not have architecture")
|
||||
}
|
||||
if m.Capabilities != nil {
|
||||
t.Error("should not have capabilities")
|
||||
}
|
||||
if m.SupportedParameters != nil {
|
||||
t.Error("should not have supported_parameters")
|
||||
}
|
||||
if m.ContextLength != 0 {
|
||||
t.Error("should not have context_length")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("metadata_precedence", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Capabilities: config.ModelCapConfig{In: []string{"text"}},
|
||||
Metadata: map[string]any{
|
||||
"architecture": "should-be-dropped",
|
||||
"custom_field": "should-remain",
|
||||
"capabilities": "also-dropped",
|
||||
"other_metadata": "also-remain",
|
||||
},
|
||||
}))
|
||||
if m.Architecture == nil || m.Architecture["input_modalities"] == nil {
|
||||
t.Fatal("architecture should be rendered, not from metadata")
|
||||
}
|
||||
if m.Meta == nil || m.Meta["llamaswap"] == nil {
|
||||
t.Fatal("meta.llamaswap should exist")
|
||||
}
|
||||
meta := m.Meta["llamaswap"].(map[string]any)
|
||||
if _, ok := meta["architecture"]; ok {
|
||||
t.Error("architecture should be filtered from metadata")
|
||||
}
|
||||
if _, ok := meta["custom_field"]; !ok {
|
||||
t.Error("custom_field should remain in metadata")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("metadata_passthrough_no_caps", func(t *testing.T) {
|
||||
m := getModel(t, newServer(config.ModelConfig{
|
||||
Metadata: map[string]any{
|
||||
"architecture": "preserved",
|
||||
"context_length": 4096,
|
||||
"capabilities": "preserved",
|
||||
"custom_field": "preserved",
|
||||
},
|
||||
}))
|
||||
if m.Architecture != nil {
|
||||
t.Error("should not have architecture when caps is empty")
|
||||
}
|
||||
if m.Meta == nil || m.Meta["llamaswap"] == nil {
|
||||
t.Fatal("meta.llamaswap should exist")
|
||||
}
|
||||
meta := m.Meta["llamaswap"].(map[string]any)
|
||||
if _, ok := meta["architecture"]; !ok {
|
||||
t.Error("architecture should be preserved in metadata when caps is empty")
|
||||
}
|
||||
if _, ok := meta["context_length"]; !ok {
|
||||
t.Error("context_length should be preserved in metadata when caps is empty")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func stringSliceEqual(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func anySliceStrEqual(v any, want []string) bool {
|
||||
arr, ok := v.([]any)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if len(arr) != len(want) {
|
||||
return false
|
||||
}
|
||||
for i := range arr {
|
||||
if s, ok := arr[i].(string); !ok || s != want[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
+28
-23
@@ -12,19 +12,19 @@ import (
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// apiModel is one entry in the /api/events modelStatus payload.
|
||||
type apiModel struct {
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
State string `json:"state"`
|
||||
Unlisted bool `json:"unlisted"`
|
||||
PeerID string `json:"peerID"`
|
||||
Aliases []string `json:"aliases,omitempty"`
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
State string `json:"state"`
|
||||
Unlisted bool `json:"unlisted"`
|
||||
PeerID string `json:"peerID"`
|
||||
Aliases []string `json:"aliases,omitempty"`
|
||||
Capabilities map[string]any `json:"capabilities,omitempty"`
|
||||
}
|
||||
|
||||
// modelStatus returns every configured model joined with its current process
|
||||
@@ -45,13 +45,15 @@ func (s *Server) modelStatus() []apiModel {
|
||||
if st, ok := running[id]; ok {
|
||||
state = string(st)
|
||||
}
|
||||
_, capsMap, _, _ := renderCapabilities(mc.Capabilities)
|
||||
models = append(models, apiModel{
|
||||
Id: id,
|
||||
Name: mc.Name,
|
||||
Description: mc.Description,
|
||||
State: state,
|
||||
Unlisted: mc.Unlisted,
|
||||
Aliases: mc.Aliases,
|
||||
Id: id,
|
||||
Name: mc.Name,
|
||||
Description: mc.Description,
|
||||
State: state,
|
||||
Unlisted: mc.Unlisted,
|
||||
Aliases: mc.Aliases,
|
||||
Capabilities: capsMap,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -76,11 +78,11 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
|
||||
requested := strings.TrimPrefix(r.PathValue("model"), "/")
|
||||
realName, found := s.cfg.RealModelName(requested)
|
||||
if !found {
|
||||
router.SendResponse(w, r, http.StatusNotFound, "model not found")
|
||||
shared.SendResponse(w, r, http.StatusNotFound, "model not found")
|
||||
return
|
||||
}
|
||||
if !s.local.Handles(realName) {
|
||||
router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
||||
shared.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
||||
return
|
||||
}
|
||||
s.local.Unload(apiUnloadTimeout, realName)
|
||||
@@ -92,7 +94,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
|
||||
func (s *Server) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
data, err := s.metrics.getMetricsJSON()
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, "failed to get metrics")
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, "failed to get metrics")
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -103,7 +105,9 @@ func (s *Server) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
// filtered to samples after the ?after=<RFC3339> timestamp.
|
||||
func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
|
||||
if s.perf == nil {
|
||||
router.SendResponse(w, r, http.StatusServiceUnavailable, "performance monitor not available")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
json.NewEncoder(w).Encode(map[string]bool{"enabled": false})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -112,7 +116,7 @@ func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
|
||||
if afterStr := r.URL.Query().Get("after"); afterStr != "" {
|
||||
after, err := time.Parse(time.RFC3339, afterStr)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusBadRequest, "invalid 'after' timestamp, use RFC3339 format")
|
||||
shared.SendResponse(w, r, http.StatusBadRequest, "invalid 'after' timestamp, use RFC3339 format")
|
||||
return
|
||||
}
|
||||
filteredSys := make([]perf.SysStat, 0, len(sysStats))
|
||||
@@ -134,6 +138,7 @@ func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]any{
|
||||
"enabled": true,
|
||||
"sys_stats": sysStats,
|
||||
"gpu_stats": gpuStats,
|
||||
})
|
||||
@@ -153,19 +158,19 @@ func (s *Server) handleAPIVersion(w http.ResponseWriter, r *http.Request) {
|
||||
func (s *Server) handleAPICapture(w http.ResponseWriter, r *http.Request) {
|
||||
id, err := strconv.Atoi(r.PathValue("id"))
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusBadRequest, "invalid capture ID")
|
||||
shared.SendResponse(w, r, http.StatusBadRequest, "invalid capture ID")
|
||||
return
|
||||
}
|
||||
|
||||
capture := s.metrics.getCaptureByID(id)
|
||||
if capture == nil {
|
||||
router.SendResponse(w, r, http.StatusNotFound, "capture not found")
|
||||
shared.SendResponse(w, r, http.StatusNotFound, "capture not found")
|
||||
return
|
||||
}
|
||||
|
||||
jsonBytes, err := json.Marshal(capture)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, "failed to marshal capture")
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, "failed to marshal capture")
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -198,7 +203,7 @@ func (s *Server) handleAPIEvents(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
+17
-31
@@ -1,19 +1,17 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/chain"
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// CreateAuthMiddleware returns middleware that validates API keys when the
|
||||
// config declares any. It accepts the key via Authorization: Bearer,
|
||||
// Authorization: Basic (password field), or x-api-key. On success the auth
|
||||
// headers are stripped so they never leak to upstream. When no keys are
|
||||
// Authorization: Basic (password field), or x-api-key. When no keys are
|
||||
// configured the middleware is a pass-through.
|
||||
func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
|
||||
keys := cfg.RequiredAPIKeys
|
||||
@@ -22,7 +20,7 @@ func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
|
||||
return next
|
||||
}
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
provided := extractAPIKey(r)
|
||||
provided := shared.ExtractAPIKey(r)
|
||||
|
||||
valid := false
|
||||
for _, key := range keys {
|
||||
@@ -33,41 +31,29 @@ func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
|
||||
}
|
||||
if !valid {
|
||||
w.Header().Set("WWW-Authenticate", `Basic realm="llama-swap"`)
|
||||
router.SendResponse(w, r, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
|
||||
shared.SendResponse(w, r, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
|
||||
return
|
||||
}
|
||||
|
||||
r.Header.Del("Authorization")
|
||||
r.Header.Del("x-api-key")
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// extractAPIKey pulls a candidate API key from the request, preferring Basic,
|
||||
// then Bearer, then x-api-key.
|
||||
func extractAPIKey(r *http.Request) string {
|
||||
var bearerKey, basicKey string
|
||||
if auth := r.Header.Get("Authorization"); auth != "" {
|
||||
if strings.HasPrefix(auth, "Bearer ") {
|
||||
bearerKey = strings.TrimPrefix(auth, "Bearer ")
|
||||
} else if strings.HasPrefix(auth, "Basic ") {
|
||||
encoded := strings.TrimPrefix(auth, "Basic ")
|
||||
if decoded, err := base64.StdEncoding.DecodeString(encoded); err == nil {
|
||||
if parts := strings.SplitN(string(decoded), ":", 2); len(parts) == 2 {
|
||||
basicKey = parts[1] // password field is the API key
|
||||
}
|
||||
// CreateRequestContextMiddleware returns middleware that extracts model and
|
||||
// auth info from the request into the context. Requests where no model can be
|
||||
// identified are rejected with a 404.
|
||||
func CreateRequestContextMiddleware(cfg config.Config) chain.Middleware {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
data, err := shared.FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
shared.SendError(w, r, shared.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case basicKey != "":
|
||||
return basicKey
|
||||
case bearerKey != "":
|
||||
return bearerKey
|
||||
default:
|
||||
return r.Header.Get("x-api-key")
|
||||
_ = data
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,48 +1,14 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
)
|
||||
|
||||
func TestServer_ExtractAPIKey(t *testing.T) {
|
||||
basicHeader := func(user, pass string) string {
|
||||
return "Basic " + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
auth string
|
||||
xapi string
|
||||
want string
|
||||
}{
|
||||
{"none", "", "", ""},
|
||||
{"bearer", "Bearer tok123", "", "tok123"},
|
||||
{"basic", basicHeader("user", "pw-key"), "", "pw-key"},
|
||||
{"x-api-key", "", "xkey", "xkey"},
|
||||
{"basic beats bearer", basicHeader("u", "bk"), "", "bk"},
|
||||
{"bearer beats x-api-key", "Bearer btok", "xkey", "btok"},
|
||||
{"malformed basic falls back to x-api-key", "Basic !!!notbase64", "xkey", "xkey"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
if c.auth != "" {
|
||||
r.Header.Set("Authorization", c.auth)
|
||||
}
|
||||
if c.xapi != "" {
|
||||
r.Header.Set("x-api-key", c.xapi)
|
||||
}
|
||||
if got := extractAPIKey(r); got != c.want {
|
||||
t.Errorf("extractAPIKey() = %q, want %q", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_SanitizeAccessControlRequestHeaders(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
@@ -74,11 +40,42 @@ func TestServer_IsTokenChar(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_RequestContextMiddleware(t *testing.T) {
|
||||
cfg := config.Config{
|
||||
Models: map[string]config.ModelConfig{
|
||||
"llama3": {},
|
||||
},
|
||||
}
|
||||
|
||||
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
mw := CreateRequestContextMiddleware(cfg)
|
||||
|
||||
t.Run("known model passes through", func(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"llama3"}`))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
mw(final).ServeHTTP(w, r)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", w.Code)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("missing model returns 404", func(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{}`))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
mw(final).ServeHTTP(w, r)
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("status = %d, want 404", w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestServer_AuthMiddleware(t *testing.T) {
|
||||
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Header.Get("Authorization") != "" || r.Header.Get("x-api-key") != "" {
|
||||
t.Error("auth headers leaked to upstream")
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/chain"
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
)
|
||||
|
||||
// defaultConcurrencyLimit caps simultaneous in-flight requests per model when
|
||||
// the model config leaves concurrencyLimit unset. Matches the legacy
|
||||
// proxy.Process default.
|
||||
const defaultConcurrencyLimit = 10
|
||||
|
||||
// CreateConcurrencyMiddleware returns middleware that limits simultaneous
|
||||
// model-dispatched requests per model. Each model gets a semaphore sized to
|
||||
// its concurrencyLimit (or defaultConcurrencyLimit). A request that cannot
|
||||
// immediately acquire a slot is rejected with 429. Models without a local
|
||||
// config entry (e.g. peer-routed models) are not limited.
|
||||
func CreateConcurrencyMiddleware(cfg config.Config) chain.Middleware {
|
||||
semaphores := make(map[string]*semaphore.Weighted, len(cfg.Models))
|
||||
for id, mc := range cfg.Models {
|
||||
limit := defaultConcurrencyLimit
|
||||
if mc.ConcurrencyLimit > 0 {
|
||||
limit = mc.ConcurrencyLimit
|
||||
}
|
||||
semaphores[id] = semaphore.NewWeighted(int64(limit))
|
||||
}
|
||||
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
data, err := router.FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
router.SendError(w, r, router.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
|
||||
// fall through for peer models
|
||||
sem, ok := semaphores[data.ModelID]
|
||||
if !ok {
|
||||
next.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
if !sem.TryAcquire(1) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
w.Write([]byte(`{"error":"Too many requests"}`))
|
||||
return
|
||||
}
|
||||
defer sem.Release(1)
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
)
|
||||
|
||||
func concurrencyTestReq(model string) *http.Request {
|
||||
r := httptest.NewRequest("GET", "/v1/chat/completions", nil)
|
||||
return r.WithContext(router.SetContext(r.Context(), router.ReqContextData{Model: model, ModelID: model}))
|
||||
}
|
||||
|
||||
func TestServer_ConcurrencyMiddleware_RejectsOverLimit(t *testing.T) {
|
||||
cfg := config.Config{
|
||||
Models: map[string]config.ModelConfig{
|
||||
"m1": {ConcurrencyLimit: 1},
|
||||
},
|
||||
}
|
||||
|
||||
entered := make(chan struct{})
|
||||
release := make(chan struct{})
|
||||
var once sync.Once
|
||||
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
once.Do(func() { close(entered) })
|
||||
<-release
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
h := CreateConcurrencyMiddleware(cfg)(final)
|
||||
|
||||
// First request occupies the only slot.
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
h.ServeHTTP(httptest.NewRecorder(), concurrencyTestReq("m1"))
|
||||
}()
|
||||
<-entered
|
||||
|
||||
// Second concurrent request is rejected with 429.
|
||||
w := httptest.NewRecorder()
|
||||
h.ServeHTTP(w, concurrencyTestReq("m1"))
|
||||
if w.Code != http.StatusTooManyRequests {
|
||||
t.Fatalf("over-limit status = %d, want 429", w.Code)
|
||||
}
|
||||
|
||||
// Once the slot frees, a new request succeeds.
|
||||
close(release)
|
||||
<-done
|
||||
w = httptest.NewRecorder()
|
||||
h.ServeHTTP(w, concurrencyTestReq("m1"))
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("post-release status = %d, want 200", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_ConcurrencyMiddleware_UnconfiguredModelPassesThrough(t *testing.T) {
|
||||
cfg := config.Config{Models: map[string]config.ModelConfig{}}
|
||||
|
||||
called := 0
|
||||
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
called++
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
h := CreateConcurrencyMiddleware(cfg)(final)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.ServeHTTP(w, concurrencyTestReq("peer-model"))
|
||||
if w.Code != http.StatusOK || called != 1 {
|
||||
t.Fatalf("unconfigured model: status=%d called=%d, want 200/1", w.Code, called)
|
||||
}
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/chain"
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
@@ -34,9 +34,9 @@ func CreateFilterMiddleware(cfg config.Config) chain.Middleware {
|
||||
return
|
||||
}
|
||||
|
||||
data, err := router.FetchContext(r, cfg)
|
||||
data, err := shared.FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
router.SendError(w, r, router.ErrNoModelInContext)
|
||||
shared.SendError(w, r, shared.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -48,13 +48,13 @@ func CreateFilterMiddleware(cfg config.Config) chain.Middleware {
|
||||
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusBadRequest, "could not read request body")
|
||||
shared.SendResponse(w, r, http.StatusBadRequest, "could not read request body")
|
||||
return
|
||||
}
|
||||
|
||||
body, err = applyFilters(body, data.Model, useModelName, filters)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, err.Error())
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
@@ -84,9 +84,9 @@ func CreateFormFilterMiddleware(cfg config.Config) chain.Middleware {
|
||||
return
|
||||
}
|
||||
|
||||
data, err := router.FetchContext(r, cfg)
|
||||
data, err := shared.FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
router.SendError(w, r, router.ErrNoModelInContext)
|
||||
shared.SendError(w, r, shared.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -97,13 +97,13 @@ func CreateFormFilterMiddleware(cfg config.Config) chain.Middleware {
|
||||
}
|
||||
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
router.SendResponse(w, r, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
|
||||
shared.SendResponse(w, r, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
body, contentType, err := rewriteMultipartModel(r.MultipartForm, useModelName)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, err.Error())
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/mostlygeek/llama-swap/internal/chain"
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// NewLoggers builds the proxy, upstream, and combined (mux) log monitors,
|
||||
@@ -76,7 +76,7 @@ func (s *Server) getLogger(logMonitorID string) (*logmon.Monitor, error) {
|
||||
case "upstream":
|
||||
return s.upstreamlog, nil
|
||||
default:
|
||||
if _, modelID, _, found := findModelInPath(s.cfg, "/"+logMonitorID); found {
|
||||
if _, modelID, _, found := shared.FindModelInPath(s.cfg, "/"+logMonitorID); found {
|
||||
if log, ok := s.local.ProcessLogger(modelID); ok {
|
||||
return log, nil
|
||||
}
|
||||
@@ -102,13 +102,13 @@ func (s *Server) handleLogStream(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
logger, err := s.getLogger(logMonitorID)
|
||||
if err != nil {
|
||||
router.SendResponse(w, r, http.StatusBadRequest, err.Error())
|
||||
shared.SendResponse(w, r, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
router.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
|
||||
shared.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
+132
-33
@@ -25,6 +25,8 @@ import (
|
||||
// TokenMetrics holds token usage and performance metrics.
|
||||
type TokenMetrics struct {
|
||||
CachedTokens int `json:"cache_tokens"`
|
||||
DraftTokens int `json:"draft_tokens"`
|
||||
DraftAccTokens int `json:"draft_acc_tokens"`
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||
@@ -33,15 +35,17 @@ type TokenMetrics struct {
|
||||
|
||||
// ActivityLogEntry represents parsed token statistics from llama-server logs.
|
||||
type ActivityLogEntry struct {
|
||||
ID int `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Model string `json:"model"`
|
||||
ReqPath string `json:"req_path"`
|
||||
RespContentType string `json:"resp_content_type"`
|
||||
RespStatusCode int `json:"resp_status_code"`
|
||||
Tokens TokenMetrics `json:"tokens"`
|
||||
DurationMs int `json:"duration_ms"`
|
||||
HasCapture bool `json:"has_capture"`
|
||||
ID int `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Model string `json:"model"`
|
||||
ReqPath string `json:"req_path"`
|
||||
RespContentType string `json:"resp_content_type"`
|
||||
RespStatusCode int `json:"resp_status_code"`
|
||||
Tokens TokenMetrics `json:"tokens"`
|
||||
DurationMs int `json:"duration_ms"`
|
||||
HasCapture bool `json:"has_capture"`
|
||||
ErrorMsg string `json:"error_msg,omitempty"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ActivityLogEvent carries a single activity log entry to event subscribers.
|
||||
@@ -122,9 +126,11 @@ func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) {
|
||||
}
|
||||
|
||||
// record parses a completed response body and stores/emits an activity entry.
|
||||
// When captures are enabled, a zstd+CBOR capture is stored for successful
|
||||
// requests, with cf controlling which request/response parts are retained.
|
||||
// reqBody and reqHeaders are the request data buffered before dispatch.
|
||||
// Successful requests store a zstd+CBOR capture (when enabled) with cf
|
||||
// controlling which parts are retained. Failed (non-200) requests capture the
|
||||
// request only and set ErrorMsg to a description of the failure, so the error
|
||||
// can be inspected without storing unreadable raw response bytes. reqBody and
|
||||
// reqHeaders are the request data buffered before dispatch.
|
||||
func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *responseBodyCopier, cf captureFields, reqBody []byte, reqHeaders map[string]string) {
|
||||
tm := ActivityLogEntry{
|
||||
Timestamp: time.Now(),
|
||||
@@ -135,6 +141,13 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
|
||||
DurationMs: int(time.Since(recorder.StartTime()).Milliseconds()),
|
||||
}
|
||||
|
||||
if ctxData, ok := shared.ReadContext(r.Context()); ok && len(ctxData.Metadata) > 0 {
|
||||
tm.Metadata = make(map[string]string, len(ctxData.Metadata))
|
||||
for k, v := range ctxData.Metadata {
|
||||
tm.Metadata[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
queueAndEmit := func() {
|
||||
tm.ID = mp.queueMetrics(tm)
|
||||
mp.emitMetric(tm)
|
||||
@@ -142,7 +155,13 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
|
||||
|
||||
if recorder.Status() != http.StatusOK {
|
||||
mp.logger.Warnf("non-200 response, recording partial metrics: status=%d, path=%s", recorder.Status(), r.URL.Path)
|
||||
queueAndEmit()
|
||||
decoded, decErr := mp.decodeResponseBody(recorder, r.URL.Path)
|
||||
tm.ErrorMsg = failedErrorMessage(recorder.Status(), decoded, decErr)
|
||||
tm.ID = mp.queueMetrics(tm)
|
||||
// Capture the request only; the failure is surfaced via ErrorMsg
|
||||
// rather than storing the (possibly undisplayable) response body.
|
||||
tm.HasCapture = mp.storeCapture(tm.ID, r, recorder, cf&^captureRespBody, reqBody, reqHeaders, nil)
|
||||
mp.emitMetric(tm)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -157,6 +176,7 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
|
||||
decoded, err := decompressBody(body, encoding)
|
||||
if err != nil {
|
||||
mp.logger.Warnf("metrics: decompression failed: %v, path=%s, recording minimal metrics", err, r.URL.Path)
|
||||
tm.ErrorMsg = fmt.Sprintf("response decompression failed: %v", err)
|
||||
queueAndEmit()
|
||||
return
|
||||
}
|
||||
@@ -195,28 +215,99 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
|
||||
}
|
||||
|
||||
tm.ID = mp.queueMetrics(tm)
|
||||
if mp.enableCaptures {
|
||||
capture := ReqRespCapture{
|
||||
ID: tm.ID,
|
||||
ReqPath: r.URL.Path,
|
||||
ReqHeaders: reqHeaders,
|
||||
}
|
||||
if cf&captureReqBody != 0 {
|
||||
capture.ReqBody = reqBody
|
||||
}
|
||||
if cf&captureRespHeaders != 0 {
|
||||
capture.RespHeaders = headerMap(recorder.Header())
|
||||
redactHeaders(capture.RespHeaders)
|
||||
delete(capture.RespHeaders, "Content-Encoding")
|
||||
}
|
||||
if cf&captureRespBody != 0 {
|
||||
capture.RespBody = body
|
||||
}
|
||||
if mp.addCapture(capture) {
|
||||
tm.HasCapture = true
|
||||
tm.HasCapture = mp.storeCapture(tm.ID, r, recorder, cf, reqBody, reqHeaders, body)
|
||||
mp.emitMetric(tm)
|
||||
}
|
||||
|
||||
// storeCapture assembles a ReqRespCapture for id, honoring the captureFields
|
||||
// mask, and stores it when captures are enabled. body is the response body to
|
||||
// capture (already decompressed by the caller); pass nil to omit it. Returns
|
||||
// true if a capture was stored.
|
||||
func (mp *metricsMonitor) storeCapture(id int, r *http.Request, recorder *responseBodyCopier, cf captureFields, reqBody []byte, reqHeaders map[string]string, body []byte) bool {
|
||||
if !mp.enableCaptures {
|
||||
return false
|
||||
}
|
||||
capture := ReqRespCapture{
|
||||
ID: id,
|
||||
ReqPath: r.URL.Path,
|
||||
ReqHeaders: reqHeaders,
|
||||
}
|
||||
if cf&captureReqBody != 0 {
|
||||
capture.ReqBody = reqBody
|
||||
}
|
||||
if cf&captureRespHeaders != 0 {
|
||||
capture.RespHeaders = headerMap(recorder.Header())
|
||||
redactHeaders(capture.RespHeaders)
|
||||
delete(capture.RespHeaders, "Content-Encoding")
|
||||
}
|
||||
if cf&captureRespBody != 0 {
|
||||
capture.RespBody = body
|
||||
}
|
||||
return mp.addCapture(capture)
|
||||
}
|
||||
|
||||
// decodeResponseBody returns the buffered response body, decompressing it when
|
||||
// the upstream set a Content-Encoding we recognize. On decompression failure it
|
||||
// logs a warning and returns an error so the caller can record a description
|
||||
// (via ErrorMsg) instead of storing unreadable raw bytes.
|
||||
func (mp *metricsMonitor) decodeResponseBody(recorder *responseBodyCopier, path string) ([]byte, error) {
|
||||
body := recorder.body.Bytes()
|
||||
if len(body) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
encoding := recorder.Header().Get("Content-Encoding")
|
||||
if encoding == "" {
|
||||
return body, nil
|
||||
}
|
||||
decoded, err := decompressBody(body, encoding)
|
||||
if err != nil {
|
||||
mp.logger.Warnf("metrics: response decompression failed: %v, path=%s", err, path)
|
||||
return nil, err
|
||||
}
|
||||
return decoded, nil
|
||||
}
|
||||
|
||||
// errorMessagePaths lists JSON paths where a human-readable error message can
|
||||
// live across OpenAI- and llama.cpp-style error responses.
|
||||
var errorMessagePaths = []string{"error.message", "error", "message", "detail"}
|
||||
|
||||
// extractErrorMessage pulls a human-readable error string from a JSON error
|
||||
// response. Returns "" if no message is found or the body is not valid JSON.
|
||||
func extractErrorMessage(body []byte) string {
|
||||
if !gjson.ValidBytes(body) {
|
||||
return ""
|
||||
}
|
||||
parsed := gjson.ParseBytes(body)
|
||||
for _, path := range errorMessagePaths {
|
||||
v := parsed.Get(path)
|
||||
if v.Exists() && v.Type == gjson.String {
|
||||
if s := strings.TrimSpace(v.String()); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
mp.emitMetric(tm)
|
||||
return ""
|
||||
}
|
||||
|
||||
// failedErrorMessage builds a human-readable description for a non-200 response.
|
||||
// It prefers an error message parsed from the (decompressed) body and falls back
|
||||
// to the HTTP status text. A non-nil decErr indicates the body could not be
|
||||
// decoded, in which case the decode error is described instead.
|
||||
func failedErrorMessage(status int, body []byte, decErr error) string {
|
||||
const maxLen = 500
|
||||
if decErr != nil {
|
||||
return fmt.Sprintf("response decode failed: %v", decErr)
|
||||
}
|
||||
if msg := extractErrorMessage(body); msg != "" {
|
||||
if len(msg) > maxLen {
|
||||
msg = msg[:maxLen] + "..."
|
||||
}
|
||||
return msg
|
||||
}
|
||||
if text := http.StatusText(status); text != "" {
|
||||
return fmt.Sprintf("%d %s", status, text)
|
||||
}
|
||||
return fmt.Sprintf("HTTP %d", status)
|
||||
}
|
||||
|
||||
// usagePaths lists the JSON paths where a per-event usage object can live.
|
||||
@@ -337,6 +428,8 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
|
||||
durationMs := wallDurationMs
|
||||
tokensPerSecond := -1.0
|
||||
promptPerSecond := -1.0
|
||||
draftTokens := -1
|
||||
draftAccTokens := -1
|
||||
|
||||
if timings.Exists() {
|
||||
inputTokens = timings.Get("prompt_n").Int()
|
||||
@@ -350,6 +443,10 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
|
||||
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
||||
cachedTokens = cachedValue.Int()
|
||||
}
|
||||
if timings.Get("draft_n").Exists() && timings.Get("draft_n_accepted").Exists() {
|
||||
draftTokens = int(timings.Get("draft_n").Int())
|
||||
draftAccTokens = int(timings.Get("draft_n_accepted").Int())
|
||||
}
|
||||
}
|
||||
|
||||
return ActivityLogEntry{
|
||||
@@ -357,6 +454,8 @@ func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, ca
|
||||
Model: modelID,
|
||||
Tokens: TokenMetrics{
|
||||
CachedTokens: int(cachedTokens),
|
||||
DraftTokens: draftTokens,
|
||||
DraftAccTokens: draftAccTokens,
|
||||
InputTokens: int(inputTokens),
|
||||
OutputTokens: int(outputTokens),
|
||||
PromptPerSecond: promptPerSecond,
|
||||
|
||||
@@ -4,10 +4,11 @@ import (
|
||||
"bytes"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/chain"
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// CreateMetricsMiddleware returns middleware that records token metrics for
|
||||
@@ -21,17 +22,36 @@ func CreateMetricsMiddleware(mm *metricsMonitor, cfg config.Config) chain.Middle
|
||||
return
|
||||
}
|
||||
|
||||
// Determine the model-routed endpoint path. Regular routes are
|
||||
// already meterable; /upstream/<model>/<path> is metered only when
|
||||
// the remaining path matches a model-dispatched endpoint.
|
||||
checkPath := r.URL.Path
|
||||
if strings.HasPrefix(r.URL.Path, "/upstream/") {
|
||||
var found bool
|
||||
_, _, checkPath, found = shared.FindModelInPath(cfg, strings.TrimPrefix(r.URL.Path, "/upstream"))
|
||||
if !found {
|
||||
next.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !isMetricsRecordPath(checkPath) {
|
||||
next.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// Resolve the model now so downstream dispatch hits the context
|
||||
// fast path; FetchContext restores the request body.
|
||||
data, err := router.FetchContext(r, cfg)
|
||||
// fast path; FetchContext restores the request body for regular
|
||||
// routes and extracts the model from the URL for /upstream routes.
|
||||
data, err := shared.FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
router.SendError(w, r, router.ErrNoModelInContext)
|
||||
shared.SendError(w, r, shared.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
|
||||
// Buffer the request body/headers for capture before dispatch
|
||||
// consumes them.
|
||||
cf := captureFieldsFor(r.URL.Path)
|
||||
cf := captureFieldsFor(checkPath)
|
||||
var reqBody []byte
|
||||
var reqHeaders map[string]string
|
||||
if mm.enableCaptures {
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
@@ -56,6 +63,199 @@ func TestServer_ProcessStreamingResponse_NoData(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_RecordMetadata(t *testing.T) {
|
||||
mm := newMetricsMonitor(nil, 10, 0)
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"usage":{}}`))
|
||||
r = r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{
|
||||
ModelID: "m",
|
||||
Metadata: map[string]string{"client": "web", "trace": "abc"},
|
||||
}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.WriteHeader(http.StatusOK)
|
||||
copier.Write([]byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2}}`))
|
||||
|
||||
mm.record("m", r, copier, 0, nil, nil)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 entry, got %d", len(entries))
|
||||
}
|
||||
if entries[0].Metadata["client"] != "web" {
|
||||
t.Errorf("client = %q, want web", entries[0].Metadata["client"])
|
||||
}
|
||||
if entries[0].Metadata["trace"] != "abc" {
|
||||
t.Errorf("trace = %q, want abc", entries[0].Metadata["trace"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_RecordFailedRequestCapture(t *testing.T) {
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
reqHeaders := map[string]string{"content-type": "application/json"}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.Header().Set("Content-Type", "application/json")
|
||||
copier.WriteHeader(http.StatusBadGateway)
|
||||
copier.Write([]byte(`{"error":{"message":"model unavailable"}}`))
|
||||
|
||||
reqBody := []byte(`{"model":"m","messages":[]}`)
|
||||
mm.record("m", r, copier, captureAll, reqBody, reqHeaders)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 entry, got %d", len(entries))
|
||||
}
|
||||
entry := entries[0]
|
||||
if entry.RespStatusCode != http.StatusBadGateway {
|
||||
t.Errorf("status = %d, want %d", entry.RespStatusCode, http.StatusBadGateway)
|
||||
}
|
||||
if entry.ErrorMsg != "model unavailable" {
|
||||
t.Errorf("error_msg = %q, want extracted message", entry.ErrorMsg)
|
||||
}
|
||||
if !entry.HasCapture {
|
||||
t.Fatal("failed request should capture the request so it can be inspected")
|
||||
}
|
||||
|
||||
got := mm.getCaptureByID(entry.ID)
|
||||
if got == nil {
|
||||
t.Fatal("capture not found")
|
||||
}
|
||||
if string(got.ReqBody) != `{"model":"m","messages":[]}` {
|
||||
t.Errorf("req body = %q", got.ReqBody)
|
||||
}
|
||||
if len(got.RespBody) != 0 {
|
||||
t.Errorf("resp body stored for failed request (len=%d); want none", len(got.RespBody))
|
||||
}
|
||||
if got.RespHeaders["Content-Type"] != "application/json" {
|
||||
t.Errorf("resp Content-Type = %q", got.RespHeaders["Content-Type"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_RecordFailedRequestStatusFallback(t *testing.T) {
|
||||
// Non-JSON error body: ErrorMsg falls back to the HTTP status text.
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.WriteHeader(http.StatusBadGateway)
|
||||
copier.Write([]byte("<html>upstream down</html>"))
|
||||
|
||||
mm.record("m", r, copier, captureAll, nil, nil)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 entry, got %d", len(entries))
|
||||
}
|
||||
if entries[0].ErrorMsg != "502 Bad Gateway" {
|
||||
t.Errorf("error_msg = %q, want status text", entries[0].ErrorMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_RecordFailedRequestCaptureDisabled(t *testing.T) {
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 0) // captures disabled
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.WriteHeader(http.StatusInternalServerError)
|
||||
copier.Write([]byte(`{"error":"boom"}`))
|
||||
|
||||
mm.record("m", r, copier, captureAll, []byte("req"), nil)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 entry, got %d", len(entries))
|
||||
}
|
||||
if entries[0].HasCapture {
|
||||
t.Fatal("captures disabled, HasCapture should be false")
|
||||
}
|
||||
// ErrorMsg is independent of whether captures are enabled.
|
||||
if entries[0].ErrorMsg != "boom" {
|
||||
t.Errorf("error_msg = %q, want boom", entries[0].ErrorMsg)
|
||||
}
|
||||
if mm.getCaptureByID(entries[0].ID) != nil {
|
||||
t.Fatal("no capture should be stored when disabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_RecordDecompressionFailureSetsErrorMsg(t *testing.T) {
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
|
||||
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.Header().Set("Content-Encoding", "gzip")
|
||||
copier.WriteHeader(http.StatusOK)
|
||||
copier.Write([]byte("not-really-gzip"))
|
||||
|
||||
mm.record("m", r, copier, captureAll, []byte("req"), nil)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("want 1 entry, got %d", len(entries))
|
||||
}
|
||||
if entries[0].ErrorMsg == "" {
|
||||
t.Fatal("expected ErrorMsg for decompression failure")
|
||||
}
|
||||
// Raw bytes must not be stored when the body could not be decoded.
|
||||
if entries[0].HasCapture {
|
||||
t.Fatal("decompression failure should not store a capture")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_DecodeResponseBody(t *testing.T) {
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 10, 5)
|
||||
|
||||
// No Content-Encoding: body returned unchanged.
|
||||
w := httptest.NewRecorder()
|
||||
copier := newBodyCopier(w)
|
||||
copier.Write([]byte("plain"))
|
||||
got, err := mm.decodeResponseBody(copier, "/p")
|
||||
if err != nil || string(got) != "plain" {
|
||||
t.Fatalf("plain body = %q, err = %v", got, err)
|
||||
}
|
||||
|
||||
// Bogus gzip payload: returns an error and no body (no raw bytes kept).
|
||||
w2 := httptest.NewRecorder()
|
||||
copier2 := newBodyCopier(w2)
|
||||
copier2.Header().Set("Content-Encoding", "gzip")
|
||||
copier2.Write([]byte("not-really-gzip"))
|
||||
got, err = mm.decodeResponseBody(copier2, "/p")
|
||||
if err == nil {
|
||||
t.Fatal("expected decompression error")
|
||||
}
|
||||
if got != nil {
|
||||
t.Errorf("expected nil body on failure, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_ExtractErrorMessage(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
body string
|
||||
want string
|
||||
}{
|
||||
{"openai object", `{"error":{"message":"rate limited"}}`, "rate limited"},
|
||||
{"string error", `{"error":"bad request"}`, "bad request"},
|
||||
{"message field", `{"message":"nope"}`, "nope"},
|
||||
{"detail field", `{"detail":"oops"}`, "oops"},
|
||||
{"object error ignored", `{"error":{"code":42}}`, ""},
|
||||
{"no error", `{"usage":{}}`, ""},
|
||||
{"invalid json", `not-json`, ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := extractErrorMessage([]byte(tc.body)); got != tc.want {
|
||||
t.Errorf("extractErrorMessage = %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_ParseMetrics_Infill(t *testing.T) {
|
||||
// /infill responses are arrays; timings live in the last element.
|
||||
body := `[{"content":"a"},{"content":"b","timings":{"prompt_n":5,"predicted_n":9,"prompt_ms":10,"predicted_ms":20}}]`
|
||||
@@ -72,3 +272,40 @@ func TestServer_ParseMetrics_Infill(t *testing.T) {
|
||||
t.Fatalf("tokens = %+v", entry.Tokens)
|
||||
}
|
||||
}
|
||||
|
||||
// TestServer_MetricsMiddleware_UpstreamAudioCaptureSkipsRespBody verifies that
|
||||
// an /upstream/<model>/v1/audio/speech request uses the path-specific capture
|
||||
// mask (headers only) rather than falling back to captureAll.
|
||||
func TestServer_MetricsMiddleware_UpstreamAudioCaptureSkipsRespBody(t *testing.T) {
|
||||
mm := newMetricsMonitor(logmon.NewWriter(io.Discard), 100, 5)
|
||||
cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
|
||||
|
||||
inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "audio/mpeg")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("BINARY-AUDIO-DATA"))
|
||||
})
|
||||
handler := CreateMetricsMiddleware(mm, cfg)(inner)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/audio/speech", strings.NewReader(`{"model":"m1"}`))
|
||||
handler.ServeHTTP(httptest.NewRecorder(), req)
|
||||
|
||||
entries := mm.getMetrics()
|
||||
if len(entries) == 0 {
|
||||
t.Fatal("no metrics recorded")
|
||||
}
|
||||
last := entries[len(entries)-1]
|
||||
if !last.HasCapture {
|
||||
t.Fatal("expected capture to be stored")
|
||||
}
|
||||
cap := mm.getCaptureByID(last.ID)
|
||||
if cap == nil {
|
||||
t.Fatal("capture not found")
|
||||
}
|
||||
if len(cap.RespBody) != 0 {
|
||||
t.Errorf("RespBody stored for /upstream audio route (len=%d); want path-specific mask to skip body", len(cap.RespBody))
|
||||
}
|
||||
if len(cap.RespHeaders) == 0 {
|
||||
t.Error("RespHeaders not stored; want captureRespHeaders mask")
|
||||
}
|
||||
}
|
||||
|
||||
+40
-23
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/internal/router"
|
||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||
)
|
||||
|
||||
// Server owns the HTTP mux, cross-cutting middleware, and the local/peer model
|
||||
@@ -88,6 +89,27 @@ var modelGetRoutes = []string{
|
||||
"/sdapi/v1/loras",
|
||||
}
|
||||
|
||||
// isMetricsRecordPath reports whether path is one of the model-dispatched
|
||||
// endpoints that the metrics middleware records in the activity log.
|
||||
func isMetricsRecordPath(path string) bool {
|
||||
for _, p := range modelPostJSONRoutes {
|
||||
if p == path {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, p := range modelPostFormRoutes {
|
||||
if p == path {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, p := range modelGetRoutes {
|
||||
if p == path {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// BuildInfo carries version metadata surfaced by GET /api/version.
|
||||
type BuildInfo struct {
|
||||
Version string
|
||||
@@ -99,12 +121,13 @@ func New(cfg config.Config, muxlog *logmon.Monitor, proxylog *logmon.Monitor, up
|
||||
var local router.LocalRouter
|
||||
var err error
|
||||
|
||||
if cfg.Matrix != nil {
|
||||
switch cfg.Routing.Router.Use {
|
||||
case "matrix":
|
||||
local, err = router.NewMatrix(cfg, proxylog, upstreamlog)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating matrix router: %w", err)
|
||||
}
|
||||
} else {
|
||||
default: // "group"
|
||||
local, err = router.NewGroup(cfg, proxylog, upstreamlog)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating group router: %w", err)
|
||||
@@ -137,13 +160,13 @@ func New(cfg config.Config, muxlog *logmon.Monitor, proxylog *logmon.Monitor, up
|
||||
}
|
||||
|
||||
// localPeerHandler dispatches a model-routed request to the local or peer
|
||||
// router. The model is resolved once via router.FetchContext.
|
||||
// router. The model is resolved once via shared.FetchContext.
|
||||
func (s *Server) localPeerHandler(w http.ResponseWriter, r *http.Request) {
|
||||
stripVersionPrefix(r)
|
||||
|
||||
data, err := router.FetchContext(r, s.cfg)
|
||||
data, err := shared.FetchContext(r, s.cfg)
|
||||
if err != nil {
|
||||
router.SendError(w, r, router.ErrNoModelInContext)
|
||||
shared.SendError(w, r, shared.ErrNoModelInContext)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -155,7 +178,7 @@ func (s *Server) localPeerHandler(w http.ResponseWriter, r *http.Request) {
|
||||
s.proxylog.Debugf("dispatch: using peer for model: %s", data.ModelID)
|
||||
s.peer.ServeHTTP(w, r)
|
||||
default:
|
||||
router.SendError(w, r, router.ErrNoRouterFound)
|
||||
shared.SendError(w, r, router.ErrNoRouterFound)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,21 +193,13 @@ func stripVersionPrefix(r *http.Request) {
|
||||
// routes builds the mux, registers every route, and wraps the mux with the
|
||||
// global CORS middleware.
|
||||
func (s *Server) routes() {
|
||||
authMW := CreateAuthMiddleware(s.cfg)
|
||||
filterMW := CreateFilterMiddleware(s.cfg)
|
||||
formFilterMW := CreateFormFilterMiddleware(s.cfg)
|
||||
|
||||
// Model-dispatched routes get auth + per-model concurrency limiting + body
|
||||
// filters + in-flight tracking + token metrics. concurrencyMW rejects with
|
||||
// 429 before the body filters do any rewrite work. filterMW rewrites JSON
|
||||
// bodies and formFilterMW rewrites multipart bodies; each is a no-op for the
|
||||
// other's Content-Type. Both run before the metrics middleware so it buffers
|
||||
// the rewritten body.
|
||||
authMW := CreateAuthMiddleware(s.cfg)
|
||||
modelChain := chain.New(
|
||||
authMW,
|
||||
CreateConcurrencyMiddleware(s.cfg),
|
||||
filterMW,
|
||||
formFilterMW,
|
||||
CreateRequestContextMiddleware(s.cfg),
|
||||
CreateFilterMiddleware(s.cfg),
|
||||
CreateFormFilterMiddleware(s.cfg),
|
||||
CreateInflightMiddleware(s.inflight),
|
||||
CreateMetricsMiddleware(s.metrics, s.cfg),
|
||||
)
|
||||
@@ -215,19 +230,21 @@ func (s *Server) routes() {
|
||||
mux.HandleFunc("GET /{$}", handleRootRedirect)
|
||||
|
||||
// Embedded UI.
|
||||
mux.HandleFunc("GET /ui/", s.handleUI)
|
||||
mux.Handle("GET /ui/", chain.New(authMW).ThenFunc(s.handleUI))
|
||||
mux.HandleFunc("GET /favicon.ico", s.handleFavicon)
|
||||
|
||||
// Prometheus metrics (no auth, matches the legacy endpoint).
|
||||
mux.HandleFunc("GET /metrics", s.handleMetrics)
|
||||
// Prometheus metrics (wrapped by apiChain, matches the legacy endpoint).
|
||||
mux.Handle("GET /metrics", apiChain.ThenFunc(s.handleMetrics))
|
||||
|
||||
// Operations endpoints.
|
||||
mux.Handle("GET /unload", apiChain.ThenFunc(s.handleUnload))
|
||||
mux.Handle("GET /running", apiChain.ThenFunc(s.handleRunning))
|
||||
|
||||
// Upstream passthrough.
|
||||
// Upstream passthrough. Meter only the model-dispatched endpoints that can
|
||||
// produce token usage/timings.
|
||||
upstreamChain := apiChain.Append(CreateMetricsMiddleware(s.metrics, s.cfg))
|
||||
mux.HandleFunc("GET /upstream", handleUpstreamRedirect)
|
||||
mux.Handle("/upstream/{upstreamPath...}", apiChain.ThenFunc(s.handleUpstream))
|
||||
mux.Handle("/upstream/{upstreamPath...}", upstreamChain.ThenFunc(s.handleUpstream))
|
||||
|
||||
// API group (API-key protected) consumed by the UI.
|
||||
mux.Handle("POST /api/models/unload", apiChain.ThenFunc(s.handleAPIUnloadAll))
|
||||
|
||||
@@ -84,10 +84,15 @@ func chatRequest(model string) *http.Request {
|
||||
|
||||
func TestServer_New_GroupConfig(t *testing.T) {
|
||||
discard := logmon.NewWriter(io.Discard)
|
||||
s, err := New(config.Config{HealthCheckTimeout: 15}, discard, discard, discard, nil, BuildInfo{})
|
||||
cfg := config.Config{HealthCheckTimeout: 15}
|
||||
cfg.Routing.Router.Use = "group"
|
||||
s, err := New(cfg, discard, discard, discard, nil, BuildInfo{})
|
||||
if err != nil {
|
||||
t.Fatalf("New (group): %v", err)
|
||||
}
|
||||
if _, ok := s.local.(*router.Group); !ok {
|
||||
t.Fatalf("localRouter=%T want *router.Group", s.local)
|
||||
}
|
||||
if err := s.Shutdown(time.Second); err != nil {
|
||||
t.Fatalf("Shutdown: %v", err)
|
||||
}
|
||||
@@ -95,11 +100,16 @@ func TestServer_New_GroupConfig(t *testing.T) {
|
||||
|
||||
func TestServer_New_MatrixConfig(t *testing.T) {
|
||||
discard := logmon.NewWriter(io.Discard)
|
||||
cfg := config.Config{HealthCheckTimeout: 15, Matrix: &config.MatrixConfig{}}
|
||||
cfg := config.Config{HealthCheckTimeout: 15}
|
||||
cfg.Routing.Router.Use = "matrix"
|
||||
cfg.Routing.Router.Settings.Matrix = &config.MatrixConfig{}
|
||||
s, err := New(cfg, discard, discard, discard, nil, BuildInfo{})
|
||||
if err != nil {
|
||||
t.Fatalf("New (matrix): %v", err)
|
||||
}
|
||||
if _, ok := s.local.(*router.Matrix); !ok {
|
||||
t.Fatalf("localRouter=%T want *router.Matrix", s.local)
|
||||
}
|
||||
if err := s.Shutdown(time.Second); err != nil {
|
||||
t.Fatalf("Shutdown: %v", err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,298 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
type contextkey struct {
|
||||
name string
|
||||
}
|
||||
|
||||
type ReqContextData struct {
|
||||
ApiKey string
|
||||
Model string
|
||||
ModelID string
|
||||
Streaming bool
|
||||
SendLoadingState bool
|
||||
// Metadata is a request-scoped key/value bag that handlers may mutate
|
||||
// while processing. The metrics middleware copies it into ActivityLogEntry.
|
||||
Metadata map[string]string
|
||||
}
|
||||
|
||||
var (
|
||||
ReqContextKey = &contextkey{"context"}
|
||||
ErrNoModelInContext = fmt.Errorf("no model in request context")
|
||||
ErrNoRouterFound = fmt.Errorf("no router found for model")
|
||||
ErrNoPeerModelFound = fmt.Errorf("peer model not found")
|
||||
ErrNoLocalModelFound = fmt.Errorf("local model not found")
|
||||
)
|
||||
|
||||
func SendError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
var httpErr HTTPError
|
||||
if errors.As(err, &httpErr) {
|
||||
for k, v := range httpErr.Header() {
|
||||
w.Header()[k] = v
|
||||
}
|
||||
w.WriteHeader(httpErr.StatusCode())
|
||||
w.Write(httpErr.Body())
|
||||
return
|
||||
}
|
||||
|
||||
switch {
|
||||
case errors.Is(err, ErrNoModelInContext):
|
||||
SendResponse(w, r, http.StatusNotFound, "no model id could be identified")
|
||||
case errors.Is(err, ErrNoPeerModelFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no peer found for requested model")
|
||||
case errors.Is(err, ErrNoLocalModelFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
||||
case errors.Is(err, ErrNoRouterFound):
|
||||
SendResponse(w, r, http.StatusNotFound, "no router for requested model")
|
||||
default:
|
||||
SendResponse(w, r, http.StatusInternalServerError, fmt.Sprintf("unspecific error: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
// SendResponse detects what content type the client prefers and returns an error response in that format.
|
||||
func SendResponse(w http.ResponseWriter, r *http.Request, status int, message string) {
|
||||
acceptHeader := r.Header.Get("Accept")
|
||||
if strings.Contains(acceptHeader, "text/plain") {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
w.WriteHeader(status)
|
||||
w.Write([]byte(fmt.Sprintf("llama-swap: %s", message)))
|
||||
return
|
||||
}
|
||||
|
||||
if strings.Contains(acceptHeader, "text/html") {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.WriteHeader(status)
|
||||
w.Write([]byte(fmt.Sprintf(`<html><body><h1>llama-swap</h1><p>%s</p></body></html>`, html.EscapeString(message))))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
resp, err := json.Marshal(map[string]string{"src": "llama-swap", "error": message})
|
||||
if err != nil {
|
||||
w.Write([]byte(`{"src":"llama-swap", "error": "failed to marshal response"}`))
|
||||
return
|
||||
}
|
||||
w.Write(resp)
|
||||
}
|
||||
|
||||
// FetchContext will attempt to get the model id from the context, then
|
||||
// from an /upstream/<model> path prefix, then from the request body/query.
|
||||
// If it extracts the model it will store it in the context for downstream
|
||||
// handlers. An error will be returned when a model cannot be identified.
|
||||
func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
|
||||
data, ok := ReadContext(r.Context())
|
||||
if ok {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if strings.HasPrefix(r.URL.Path, "/upstream/") {
|
||||
if data, ok := extractUpstreamContext(r, cfg); ok {
|
||||
*r = *r.WithContext(SetContext(r.Context(), data))
|
||||
return data, nil
|
||||
}
|
||||
return ReqContextData{}, ErrNoModelInContext
|
||||
}
|
||||
|
||||
if data, err := extractContext(r); err == nil && data.Model != "" {
|
||||
realName, _ := cfg.RealModelName(data.Model)
|
||||
if realName == "" {
|
||||
realName = data.Model
|
||||
}
|
||||
data.ModelID = realName
|
||||
if mc, ok := cfg.Models[realName]; ok {
|
||||
data.SendLoadingState = mc.SendLoadingState != nil && *mc.SendLoadingState
|
||||
}
|
||||
*r = *r.WithContext(SetContext(r.Context(), data))
|
||||
return data, nil
|
||||
}
|
||||
|
||||
return ReqContextData{}, ErrNoModelInContext
|
||||
}
|
||||
|
||||
// extractUpstreamContext resolves the model from an /upstream/<model>/... path.
|
||||
func extractUpstreamContext(r *http.Request, cfg config.Config) (ReqContextData, bool) {
|
||||
searchName, realName, _, found := FindModelInPath(cfg, strings.TrimPrefix(r.URL.Path, "/upstream"))
|
||||
if !found {
|
||||
return ReqContextData{}, false
|
||||
}
|
||||
return ReqContextData{
|
||||
Model: searchName,
|
||||
ModelID: realName,
|
||||
ApiKey: ExtractAPIKey(r),
|
||||
Streaming: r.URL.Query().Get("stream") == "true",
|
||||
SendLoadingState: sendLoadingState(cfg, realName),
|
||||
Metadata: make(map[string]string),
|
||||
}, true
|
||||
}
|
||||
|
||||
// sendLoadingState reports whether the configured model wants loading-state SSEs.
|
||||
func sendLoadingState(cfg config.Config, modelID string) bool {
|
||||
if mc, ok := cfg.Models[modelID]; ok {
|
||||
return mc.SendLoadingState != nil && *mc.SendLoadingState
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// FindModelInPath walks a slash-separated path, building up segments until one
|
||||
// matches a configured model. This resolves model names that contain slashes
|
||||
// (e.g. "author/model"). Returns the matched name, its real model ID, the
|
||||
// remaining path, and whether a match was found.
|
||||
func FindModelInPath(cfg config.Config, path string) (searchName, realName, remainingPath string, found bool) {
|
||||
parts := strings.Split(strings.TrimSpace(path), "/")
|
||||
name := ""
|
||||
|
||||
for i, part := range parts {
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
if name == "" {
|
||||
name = part
|
||||
} else {
|
||||
name = name + "/" + part
|
||||
}
|
||||
|
||||
if modelID, ok := cfg.RealModelName(name); ok {
|
||||
searchName = name
|
||||
realName = modelID
|
||||
remainingPath = "/" + strings.Join(parts[i+1:], "/")
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func SetContext(ctx context.Context, data ReqContextData) context.Context {
|
||||
return context.WithValue(ctx, ReqContextKey, data)
|
||||
}
|
||||
|
||||
func ReadContext(ctx context.Context) (ReqContextData, bool) {
|
||||
data, ok := ctx.Value(ReqContextKey).(ReqContextData)
|
||||
return data, ok
|
||||
}
|
||||
|
||||
// SetReqData attaches a key/value pair to the request context's metadata map.
|
||||
// The metadata map must already exist in the context's ReqContextData; callers
|
||||
// should ensure FetchContext has run or initialize the map themselves.
|
||||
// It returns an error for nil contexts or contexts without request data.
|
||||
func SetReqData(ctx context.Context, key, value string) error {
|
||||
if ctx == nil {
|
||||
return fmt.Errorf("cannot set request metadata on nil context")
|
||||
}
|
||||
data, ok := ReadContext(ctx)
|
||||
if !ok {
|
||||
return fmt.Errorf("no request context data found")
|
||||
}
|
||||
if data.Metadata == nil {
|
||||
return fmt.Errorf("no metadata map in request context")
|
||||
}
|
||||
data.Metadata[key] = value
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractContext pulls fields from an HTTP request into a ReqContextData,
|
||||
// returning whatever is available. For GET requests it reads query parameters.
|
||||
// For POST requests it inspects Content-Type and parses JSON,
|
||||
// multipart/form-data, or application/x-www-form-urlencoded bodies. The
|
||||
// request body is always restored before returning. An error is returned only
|
||||
// for I/O or parse failures, not for missing fields.
|
||||
func extractContext(r *http.Request) (ReqContextData, error) {
|
||||
|
||||
apiKey := ExtractAPIKey(r)
|
||||
|
||||
if r.Method == http.MethodGet {
|
||||
q := r.URL.Query()
|
||||
return ReqContextData{
|
||||
Model: q.Get("model"),
|
||||
Streaming: q.Get("stream") == "true",
|
||||
ApiKey: apiKey,
|
||||
Metadata: make(map[string]string),
|
||||
}, nil
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error reading request body: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
}()
|
||||
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
|
||||
if strings.Contains(contentType, "application/json") {
|
||||
return ReqContextData{
|
||||
Model: gjson.GetBytes(bodyBytes, "model").String(),
|
||||
Streaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
|
||||
ApiKey: apiKey,
|
||||
Metadata: make(map[string]string),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Form parsers read from r.Body, so feed them a fresh reader over the
|
||||
// buffered bytes. The deferred restore above will reset r.Body again
|
||||
// after parsing.
|
||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
if strings.Contains(contentType, "multipart/form-data") {
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error parsing multipart form: %w", err)
|
||||
}
|
||||
} else {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return ReqContextData{}, fmt.Errorf("error parsing form: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return ReqContextData{
|
||||
Model: r.FormValue("model"),
|
||||
Streaming: r.FormValue("stream") == "true",
|
||||
ApiKey: apiKey,
|
||||
Metadata: make(map[string]string),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// extractAPIKey pulls a candidate API key from the request, preferring Basic,
|
||||
// then Bearer, then x-api-key.
|
||||
func ExtractAPIKey(r *http.Request) string {
|
||||
var bearerKey, basicKey string
|
||||
if auth := r.Header.Get("Authorization"); auth != "" {
|
||||
scheme, credentials, ok := strings.Cut(auth, " ")
|
||||
if ok {
|
||||
switch strings.ToLower(scheme) {
|
||||
case "bearer":
|
||||
bearerKey = credentials
|
||||
case "basic":
|
||||
if decoded, err := base64.StdEncoding.DecodeString(credentials); err == nil {
|
||||
if parts := strings.SplitN(string(decoded), ":", 2); len(parts) == 2 {
|
||||
basicKey = parts[1] // password field is the API key
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case basicKey != "":
|
||||
return basicKey
|
||||
case bearerKey != "":
|
||||
return bearerKey
|
||||
default:
|
||||
return r.Header.Get("x-api-key")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,525 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
)
|
||||
|
||||
func TestExtractContext_GET(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "model=llama3", "llama3", false},
|
||||
{"model with slashes", "model=author/model-7b", "author/model-7b", false},
|
||||
{"model missing", "", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
|
||||
got, err := extractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_JSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", `{"model":"llama3","stream":true}`, "llama3", false},
|
||||
{"model with slashes", `{"model":"author/model-7b"}`, "author/model-7b", false},
|
||||
{"model empty string", `{"model":""}`, "", false},
|
||||
{"model key missing", `{"stream":true}`, "", false},
|
||||
{"invalid json", `not-json`, "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
got, err := extractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_URLEncodedForm(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
formModel string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "whisper-1", "whisper-1", false},
|
||||
{"model missing", "", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
form := url.Values{}
|
||||
if tt.formModel != "" {
|
||||
form.Set("model", tt.formModel)
|
||||
}
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(form.Encode()))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
got, err := extractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_MultipartForm(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
formModel string
|
||||
wantModel string
|
||||
wantErr bool
|
||||
}{
|
||||
{"model present", "whisper-1", "whisper-1", false},
|
||||
{"model missing", "", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
if tt.formModel != "" {
|
||||
fw, _ := mw.CreateFormField("model")
|
||||
fw.Write([]byte(tt.formModel))
|
||||
}
|
||||
mw.Close()
|
||||
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", &buf)
|
||||
r.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
got, err := extractContext(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
|
||||
}
|
||||
if got.Model != tt.wantModel {
|
||||
t.Errorf("want %q got %q", tt.wantModel, got.Model)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_JSONBodyRestored(t *testing.T) {
|
||||
body := `{"model":"llama3","stream":true}`
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
|
||||
if _, err := extractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if string(remaining) != body {
|
||||
t.Errorf("body not restored: want %q got %q", body, string(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_MultipartBodyRestored(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
fw, _ := mw.CreateFormField("model")
|
||||
fw.Write([]byte("whisper-1"))
|
||||
ff, _ := mw.CreateFormFile("file", "audio.wav")
|
||||
ff.Write([]byte("fake-audio-bytes"))
|
||||
mw.Close()
|
||||
|
||||
original := buf.Bytes()
|
||||
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", bytes.NewReader(original))
|
||||
r.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
|
||||
if _, err := extractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if !bytes.Equal(remaining, original) {
|
||||
t.Errorf("multipart body not restored: want %d bytes got %d bytes", len(original), len(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_URLEncodedBodyRestored(t *testing.T) {
|
||||
body := "model=whisper-1&extra=value"
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(body))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
|
||||
if _, err := extractContext(r); err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
|
||||
remaining, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("reading body after ExtractContext: %v", err)
|
||||
}
|
||||
if string(remaining) != body {
|
||||
t.Errorf("url-encoded body not restored: want %q got %q", body, string(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext(t *testing.T) {
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"})
|
||||
data, ok := ctx.Value(ReqContextKey).(ReqContextData)
|
||||
if !ok {
|
||||
t.Fatalf("ContextKey not set or wrong type")
|
||||
}
|
||||
if data.Model != "llama3" {
|
||||
t.Errorf("want %q got %q", "llama3", data.Model)
|
||||
}
|
||||
if data.ModelID != "llama3" {
|
||||
t.Errorf("want %q got %q", "llama3", data.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext_WithAlias(t *testing.T) {
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"})
|
||||
data, _ := ctx.Value(ReqContextKey).(ReqContextData)
|
||||
if data.Model != "llama" {
|
||||
t.Errorf("want requested %q got %q", "llama", data.Model)
|
||||
}
|
||||
if data.ModelID != "llama3" {
|
||||
t.Errorf("want real %q got %q", "llama3", data.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetContext_DoesNotMutateParent(t *testing.T) {
|
||||
parent := context.Background()
|
||||
_ = SetContext(parent, ReqContextData{Model: "llama3", ModelID: "llama3"})
|
||||
if v := parent.Value(ReqContextKey); v != nil {
|
||||
t.Errorf("parent context was mutated: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadContext(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx context.Context
|
||||
wantReq string
|
||||
wantReal string
|
||||
wantBool bool
|
||||
}{
|
||||
{
|
||||
name: "model present, same name",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"}),
|
||||
wantReq: "llama3",
|
||||
wantReal: "llama3",
|
||||
wantBool: true,
|
||||
},
|
||||
{
|
||||
name: "model present, aliased",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"}),
|
||||
wantReq: "llama",
|
||||
wantReal: "llama3",
|
||||
wantBool: true,
|
||||
},
|
||||
{
|
||||
name: "model absent",
|
||||
ctx: context.Background(),
|
||||
wantReq: "",
|
||||
wantReal: "",
|
||||
wantBool: false,
|
||||
},
|
||||
{
|
||||
name: "model is empty string",
|
||||
ctx: SetContext(context.Background(), ReqContextData{Model: "", ModelID: ""}),
|
||||
wantReq: "",
|
||||
wantReal: "",
|
||||
wantBool: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotData, ok := ReadContext(tt.ctx)
|
||||
if gotData.Model != tt.wantReq || gotData.ModelID != tt.wantReal || ok != tt.wantBool {
|
||||
t.Errorf("want (%q, %q, %v) got (%q, %q, %v)", tt.wantReq, tt.wantReal, tt.wantBool, gotData.Model, gotData.ModelID, ok)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_GET(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
wantStreaming bool
|
||||
}{
|
||||
{"streaming true", "model=llama3&stream=true", true},
|
||||
{"streaming false", "model=llama3&stream=false", false},
|
||||
{"no stream param", "model=llama3", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
|
||||
got, err := extractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if got.Streaming != tt.wantStreaming {
|
||||
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_JSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
wantStreaming bool
|
||||
}{
|
||||
{"streaming true", `{"model":"llama3","stream":true}`, true},
|
||||
{"streaming false", `{"model":"llama3","stream":false}`, false},
|
||||
{"no stream param", `{"model":"llama3"}`, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
|
||||
r.Header.Set("Content-Type", "application/json")
|
||||
got, err := extractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if got.Streaming != tt.wantStreaming {
|
||||
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_Streaming_URLEncodedForm(t *testing.T) {
|
||||
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader("model=whisper-1&stream=true"))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
got, err := extractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractContext: %v", err)
|
||||
}
|
||||
if !got.Streaming {
|
||||
t.Error("Streaming should be true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContext_ApiKey(t *testing.T) {
|
||||
basicHeader := func(user, pass string) string {
|
||||
return "Basic " + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
method string
|
||||
ct string
|
||||
body string
|
||||
auth string
|
||||
xapi string
|
||||
wantKey string
|
||||
}{
|
||||
{"GET bearer", http.MethodGet, "", "", "Bearer sk-get", "", "sk-get"},
|
||||
{"GET x-api-key", http.MethodGet, "", "", "", "xk-get", "xk-get"},
|
||||
{"GET basic", http.MethodGet, "", "", basicHeader("u", "pw-get"), "", "pw-get"},
|
||||
{"JSON bearer", http.MethodPost, "application/json", `{"model":"m"}`, "Bearer sk-json", "", "sk-json"},
|
||||
{"JSON x-api-key", http.MethodPost, "application/json", `{"model":"m"}`, "", "xk-json", "xk-json"},
|
||||
{"form bearer", http.MethodPost, "application/x-www-form-urlencoded", "model=m", "Bearer sk-form", "", "sk-form"},
|
||||
{"no key", http.MethodGet, "", "", "", "", ""},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
var body io.Reader
|
||||
if c.body != "" {
|
||||
body = strings.NewReader(c.body)
|
||||
}
|
||||
r, _ := http.NewRequest(c.method, "/", body)
|
||||
if c.ct != "" {
|
||||
r.Header.Set("Content-Type", c.ct)
|
||||
}
|
||||
if c.auth != "" {
|
||||
r.Header.Set("Authorization", c.auth)
|
||||
}
|
||||
if c.xapi != "" {
|
||||
r.Header.Set("x-api-key", c.xapi)
|
||||
}
|
||||
got, err := extractContext(r)
|
||||
if err != nil {
|
||||
t.Fatalf("extractContext: %v", err)
|
||||
}
|
||||
if got.ApiKey != c.wantKey {
|
||||
t.Errorf("ApiKey = %q, want %q", got.ApiKey, c.wantKey)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetReqData(t *testing.T) {
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3", Metadata: make(map[string]string)})
|
||||
|
||||
if err := SetReqData(ctx, "client", "web"); err != nil {
|
||||
t.Fatalf("SetReqData: %v", err)
|
||||
}
|
||||
if err := SetReqData(ctx, "trace", "abc123"); err != nil {
|
||||
t.Fatalf("SetReqData: %v", err)
|
||||
}
|
||||
|
||||
data, ok := ReadContext(ctx)
|
||||
if !ok {
|
||||
t.Fatal("context data missing")
|
||||
}
|
||||
if data.Metadata["client"] != "web" {
|
||||
t.Errorf("client = %q, want %q", data.Metadata["client"], "web")
|
||||
}
|
||||
if data.Metadata["trace"] != "abc123" {
|
||||
t.Errorf("trace = %q, want %q", data.Metadata["trace"], "abc123")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetReqData_Errors(t *testing.T) {
|
||||
if err := SetReqData(context.Background(), "k", "v"); err == nil {
|
||||
t.Error("expected error when no request context data exists")
|
||||
}
|
||||
ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"})
|
||||
if err := SetReqData(ctx, "k", "v"); err == nil {
|
||||
t.Error("expected error when metadata map is missing")
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_ExtractAPIKey(t *testing.T) {
|
||||
basicHeader := func(user, pass string) string {
|
||||
return "Basic " + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
auth string
|
||||
xapi string
|
||||
want string
|
||||
}{
|
||||
{"none", "", "", ""},
|
||||
{"bearer", "Bearer tok123", "", "tok123"},
|
||||
{"basic", basicHeader("user", "pw-key"), "", "pw-key"},
|
||||
{"x-api-key", "", "xkey", "xkey"},
|
||||
{"basic beats bearer", basicHeader("u", "bk"), "", "bk"},
|
||||
{"bearer beats x-api-key", "Bearer btok", "xkey", "btok"},
|
||||
{"malformed basic falls back to x-api-key", "Basic !!!notbase64", "xkey", "xkey"},
|
||||
{"lowercase bearer", "bearer tok123", "", "tok123"},
|
||||
{"lowercase basic", "basic " + base64.StdEncoding.EncodeToString([]byte("user:pw-key")), "", "pw-key"},
|
||||
{"mixed case BEARER", "BEARER tok456", "", "tok456"},
|
||||
{"mixed case bAsIc", "bAsIc " + base64.StdEncoding.EncodeToString([]byte("u:bk")), "", "bk"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
if c.auth != "" {
|
||||
r.Header.Set("Authorization", c.auth)
|
||||
}
|
||||
if c.xapi != "" {
|
||||
r.Header.Set("x-api-key", c.xapi)
|
||||
}
|
||||
if got := ExtractAPIKey(r); got != c.want {
|
||||
t.Errorf("extractAPIKey() = %q, want %q", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchContext_UpstreamPath(t *testing.T) {
|
||||
cfg := config.Config{
|
||||
Models: map[string]config.ModelConfig{
|
||||
"m1": {},
|
||||
"author/model": {},
|
||||
"real": {Aliases: []string{"nick"}},
|
||||
},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
path string
|
||||
wantModel string
|
||||
wantModelID string
|
||||
wantErr bool
|
||||
}{
|
||||
{"known model", "/upstream/m1/v1/chat/completions", "m1", "m1", false},
|
||||
{"model with slash", "/upstream/author/model/v1/chat", "author/model", "author/model", false},
|
||||
{"unknown model", "/upstream/nope/v1/chat/completions", "", "", true},
|
||||
{"bare model path", "/upstream/m1/", "m1", "m1", false},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodPost, c.path, strings.NewReader(`{}`))
|
||||
data, err := FetchContext(r, cfg)
|
||||
if (err != nil) != c.wantErr {
|
||||
t.Fatalf("wantErr=%v got err=%v", c.wantErr, err)
|
||||
}
|
||||
if c.wantErr {
|
||||
return
|
||||
}
|
||||
if data.Model != c.wantModel {
|
||||
t.Errorf("model = %q, want %q", data.Model, c.wantModel)
|
||||
}
|
||||
if data.ModelID != c.wantModelID {
|
||||
t.Errorf("modelID = %q, want %q", data.ModelID, c.wantModelID)
|
||||
}
|
||||
if data.Metadata == nil {
|
||||
t.Error("metadata map not initialized")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFetchContext_UpstreamPath_DoesNotReadBody(t *testing.T) {
|
||||
cfg := config.Config{Models: map[string]config.ModelConfig{"m1": {}}}
|
||||
body := `{"model":"should-not-matter"}`
|
||||
r := httptest.NewRequest(http.MethodPost, "/upstream/m1/v1/chat/completions", strings.NewReader(body))
|
||||
|
||||
_, err := FetchContext(r, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("FetchContext: %v", err)
|
||||
}
|
||||
|
||||
// The body should be untouched so the upstream handler can still read it.
|
||||
got, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read body: %v", err)
|
||||
}
|
||||
if string(got) != body {
|
||||
t.Errorf("body was consumed: %q", string(got))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// HTTPError is an error that carries a complete HTTP response. A producer (e.g.
|
||||
// a scheduler shedding a request) returns one of these; a renderer (e.g.
|
||||
// router.SendError) writes the status, headers, and body verbatim instead of
|
||||
// mapping the error to a generic status. It is the seam that lets a component
|
||||
// shed a request with a rich response (e.g. a 429 with rate-limit headers and a
|
||||
// JSON hint body) without the renderer knowing the producer's internals.
|
||||
type HTTPError interface {
|
||||
error
|
||||
StatusCode() int
|
||||
Header() http.Header
|
||||
Body() []byte
|
||||
}
|
||||
|
||||
// ConcurrencyLimitError is an HTTPError for a 429 concurrency-limit rejection.
|
||||
// Zero-value fields fall back to sensible defaults: a 1-second Retry-After and a
|
||||
// JSON hint body.
|
||||
type ConcurrencyLimitError struct {
|
||||
// RetryAfter, when > 0, is sent as the Retry-After header (in seconds).
|
||||
// Defaults to 1.
|
||||
RetryAfter int
|
||||
|
||||
// Message overrides the JSON body's "error" field. Defaults to
|
||||
// "Too many requests".
|
||||
Message string
|
||||
}
|
||||
|
||||
func (e ConcurrencyLimitError) Error() string { return "concurrency limit reached" }
|
||||
|
||||
func (e ConcurrencyLimitError) StatusCode() int { return http.StatusTooManyRequests }
|
||||
|
||||
func (e ConcurrencyLimitError) Header() http.Header {
|
||||
h := http.Header{}
|
||||
h.Set("Content-Type", "application/json")
|
||||
h.Set("Retry-After", e.retryAfter())
|
||||
return h
|
||||
}
|
||||
|
||||
func (e ConcurrencyLimitError) Body() []byte {
|
||||
b, _ := json.Marshal(map[string]string{"error": e.message()})
|
||||
return b
|
||||
}
|
||||
|
||||
func (e ConcurrencyLimitError) retryAfter() string {
|
||||
if e.RetryAfter > 0 {
|
||||
return strconv.Itoa(e.RetryAfter)
|
||||
}
|
||||
return "1"
|
||||
}
|
||||
|
||||
func (e ConcurrencyLimitError) message() string {
|
||||
if e.Message != "" {
|
||||
return e.Message
|
||||
}
|
||||
return "Too many requests"
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package shared
|
||||
|
||||
import "net"
|
||||
|
||||
// IsLoopbackAddr reports whether listenAddr binds exclusively to loopback.
|
||||
// Addresses with an empty or wildcard host (e.g. ":8080", "0.0.0.0:8080",
|
||||
// "[::]:8080") bind on all interfaces and return false.
|
||||
func IsLoopbackAddr(listenAddr string) bool {
|
||||
host, _, err := net.SplitHostPort(listenAddr)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if host == "" {
|
||||
return false
|
||||
}
|
||||
ip := net.ParseIP(host)
|
||||
if ip != nil {
|
||||
return ip.IsLoopback()
|
||||
}
|
||||
// hostname case (e.g. "localhost")
|
||||
addrs, err := net.LookupHost(host)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, a := range addrs {
|
||||
if !net.ParseIP(a).IsLoopback() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(addrs) > 0
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
package configwatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DirWatcher polls a directory for changes to its set of *.yml / *.yaml files.
|
||||
// It fires OnChange when a file is added, removed, or has its mod time/size
|
||||
// change. Like Watcher it is poll-based so it works in Docker bind-mounts and
|
||||
// k8s ConfigMap projections where inotify is unreliable.
|
||||
//
|
||||
// The baseline poll establishes initial state and does not fire OnChange.
|
||||
type DirWatcher struct {
|
||||
Path string
|
||||
Interval time.Duration
|
||||
OnChange func()
|
||||
}
|
||||
|
||||
// dirSnapshot is an ordered map of file name -> file state. The ordering is
|
||||
// derived from sorted filenames so two snapshots compare deterministically
|
||||
// regardless of readdir order. exists reflects whether the directory was
|
||||
// readable at scan time; a missing directory yields exists=false.
|
||||
type dirSnapshot struct {
|
||||
exists bool
|
||||
names []string
|
||||
states map[string]snapshot
|
||||
}
|
||||
|
||||
func newDirSnapshot() dirSnapshot {
|
||||
return dirSnapshot{states: make(map[string]snapshot)}
|
||||
}
|
||||
|
||||
// equal reports whether two snapshots describe the same file set and per-file
|
||||
// state. A missing directory (exists=false) is treated as equal to any other
|
||||
// missing directory regardless of cached names.
|
||||
func (s dirSnapshot) equal(other dirSnapshot) bool {
|
||||
if !s.exists && !other.exists {
|
||||
return true
|
||||
}
|
||||
if s.exists != other.exists {
|
||||
return false
|
||||
}
|
||||
if len(s.names) != len(other.names) {
|
||||
return false
|
||||
}
|
||||
for i, n := range s.names {
|
||||
if other.names[i] != n {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, n := range s.names {
|
||||
a, b := s.states[n], other.states[n]
|
||||
if a.exists != b.exists || a.size != b.size || !a.modTime.Equal(b.modTime) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Run blocks until ctx is canceled. It polls Path on Interval and invokes
|
||||
// OnChange whenever the directory's YAML file set changes.
|
||||
//
|
||||
// Policy mirrors the single-file Watcher: disappearance (directory missing or
|
||||
// empty) is treated as a transient rename-style write and stays quiet; the
|
||||
// transition back to present-with-content fires OnChange.
|
||||
func (w *DirWatcher) Run(ctx context.Context) {
|
||||
interval := w.Interval
|
||||
if interval <= 0 {
|
||||
interval = DefaultInterval
|
||||
}
|
||||
|
||||
prev := scanDir(w.Path)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
cur := scanDir(w.Path)
|
||||
// Suppress transitions involving an empty or missing directory —
|
||||
// these are treated as transient rename-style writes, mirroring
|
||||
// the single-file Watcher. Only present-with-content →
|
||||
// present-with-content (changed) or no-content →
|
||||
// present-with-content fires OnChange.
|
||||
prevHasContent := prev.exists && len(prev.names) > 0
|
||||
curHasContent := cur.exists && len(cur.names) > 0
|
||||
if curHasContent && (!prevHasContent || !prev.equal(cur)) && w.OnChange != nil {
|
||||
w.OnChange()
|
||||
}
|
||||
prev = cur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scanDir returns a snapshot of the *.yml/*.yaml files in dir. If the
|
||||
// directory cannot be read (missing, permission denied) the snapshot reports
|
||||
// exists=false; the next successful scan will detect the recovery and fire
|
||||
// OnChange.
|
||||
func scanDir(dir string) dirSnapshot {
|
||||
snap := newDirSnapshot()
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return snap // exists=false
|
||||
}
|
||||
snap.exists = true
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
if !strings.HasSuffix(name, ".yml") && !strings.HasSuffix(name, ".yaml") {
|
||||
continue
|
||||
}
|
||||
fi, err := os.Stat(filepath.Join(dir, name))
|
||||
if err != nil {
|
||||
// File disappeared between ReadDir and Stat; skip it — the
|
||||
// next poll will observe the removal cleanly.
|
||||
continue
|
||||
}
|
||||
snap.names = append(snap.names, name)
|
||||
snap.states[name] = snapshot{
|
||||
exists: true,
|
||||
modTime: fi.ModTime(),
|
||||
size: fi.Size(),
|
||||
}
|
||||
}
|
||||
sort.Strings(snap.names)
|
||||
return snap
|
||||
}
|
||||
@@ -0,0 +1,199 @@
|
||||
package configwatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// startDirWatcher launches w.Run in a goroutine and returns a function that
|
||||
// cancels the context and waits for Run to return.
|
||||
func startDirWatcher(t *testing.T, w *DirWatcher) func() {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
w.Run(ctx)
|
||||
close(done)
|
||||
}()
|
||||
return func() {
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("DirWatcher did not stop within 2s of cancel")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writeYAMLInDir(t *testing.T, dir, name, content string) {
|
||||
t.Helper()
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644))
|
||||
}
|
||||
|
||||
func TestDirWatcher_NoFireOnBaseline(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
|
||||
time.Sleep(testInterval * 5)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "baseline poll must not fire")
|
||||
}
|
||||
|
||||
func TestDirWatcher_DetectsFileAdd(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
writeYAMLInDir(t, dir, "b.yaml", "b")
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when a file is added")
|
||||
}
|
||||
|
||||
func TestDirWatcher_DetectsFileRemoval(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
writeYAMLInDir(t, dir, "b.yaml", "b")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
require.NoError(t, os.Remove(filepath.Join(dir, "b.yaml")))
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when a file is removed")
|
||||
}
|
||||
|
||||
func TestDirWatcher_DetectsModTimeChange(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
base := time.Now().Add(-1 * time.Hour).Truncate(time.Second)
|
||||
require.NoError(t, os.Chtimes(filepath.Join(dir, "a.yaml"), base, base))
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
require.NoError(t, os.Chtimes(filepath.Join(dir, "a.yaml"), base.Add(10*time.Second), base.Add(10*time.Second)))
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire after mtime change")
|
||||
}
|
||||
|
||||
func TestDirWatcher_IgnoresNonYAMLFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
// Adding a .txt file must not fire.
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("hi"), 0o644))
|
||||
time.Sleep(testInterval * 4)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "non-YAML files must be ignored")
|
||||
|
||||
// Adding a .yml file must fire.
|
||||
writeYAMLInDir(t, dir, "b.yml", "b")
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire for *.yml files")
|
||||
}
|
||||
|
||||
func TestDirWatcher_MissingDirRecovers(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
// Remove the directory. No fire expected on disappearance alone.
|
||||
require.NoError(t, os.RemoveAll(dir))
|
||||
time.Sleep(testInterval * 3)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "directory removal alone must not fire")
|
||||
|
||||
// Recreate the directory and a YAML file; the recovery should fire.
|
||||
require.NoError(t, os.MkdirAll(dir, 0o755))
|
||||
writeYAMLInDir(t, dir, "recovered.yaml", "r")
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when dir returns with content")
|
||||
}
|
||||
|
||||
func TestDirWatcher_EmptyDirSuppressedThenRecovers(t *testing.T) {
|
||||
// Present-with-content → empty (all YAML removed, dir still exists)
|
||||
// must stay quiet — treated as transient per the documented policy.
|
||||
// The transition back to content fires.
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
var n int64
|
||||
stop := startDirWatcher(t, &DirWatcher{
|
||||
Path: dir,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
// Remove the only YAML file. Dir still exists but is empty of YAML.
|
||||
require.NoError(t, os.Remove(filepath.Join(dir, "a.yaml")))
|
||||
time.Sleep(testInterval * 4)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "emptying the directory must not fire")
|
||||
|
||||
// Add a YAML file back; transition to present-with-content fires.
|
||||
writeYAMLInDir(t, dir, "c.yaml", "c")
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when content returns")
|
||||
}
|
||||
|
||||
func TestDirWatcher_ContextCancelStopsRun(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeYAMLInDir(t, dir, "a.yaml", "a")
|
||||
|
||||
w := &DirWatcher{Path: dir, Interval: testInterval}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() { w.Run(ctx); close(done) }()
|
||||
|
||||
time.Sleep(testInterval * 2)
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("Run did not return within 2s of cancel")
|
||||
}
|
||||
}
|
||||
+43
-19
@@ -6,6 +6,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -54,7 +55,8 @@ var logTimeFormats = map[string]string{
|
||||
}
|
||||
|
||||
func main() {
|
||||
flagConfig := flag.String("config", "", "path to config file (required)")
|
||||
flagConfig := flag.String("config", "", "path to config file")
|
||||
flagConfigDir := flag.String("config-dir", "", "directory of *.yml/*.yaml config files (additive to -config)")
|
||||
flagListen := flag.String("listen", "", "listen address (default :8080 or :8443 for TLS)")
|
||||
flagCertFile := flag.String("tls-cert-file", "", "TLS certificate file")
|
||||
flagKeyFile := flag.String("tls-key-file", "", "TLS key file")
|
||||
@@ -67,8 +69,8 @@ func main() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if *flagConfig == "" {
|
||||
slog.Error("-config is required")
|
||||
if *flagConfig == "" && *flagConfigDir == "" {
|
||||
slog.Error("at least one of -config or -config-dir must be provided")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@@ -87,10 +89,9 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
configPath := *flagConfig
|
||||
cfg, err := config.LoadConfig(configPath)
|
||||
cfg, err := config.LoadConfigSources(*flagConfig, *flagConfigDir)
|
||||
if err != nil {
|
||||
slog.Error("failed to load config", "path", configPath, "error", err)
|
||||
slog.Error("failed to load config", "config", *flagConfig, "config-dir", *flagConfigDir, "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@@ -186,7 +187,7 @@ func main() {
|
||||
|
||||
proxyLog.Info("reloading configuration")
|
||||
|
||||
newCfg, err := config.LoadConfig(configPath)
|
||||
newCfg, err := config.LoadConfigSources(*flagConfig, *flagConfigDir)
|
||||
if err != nil {
|
||||
proxyLog.Warnf("failed to reload config: %v", err)
|
||||
return
|
||||
@@ -229,19 +230,37 @@ func main() {
|
||||
defer watcherCancel()
|
||||
|
||||
if *flagWatchConfig {
|
||||
absConfigPath, err := filepath.Abs(configPath)
|
||||
if err != nil {
|
||||
slog.Error("watch-config: failed to resolve config path", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
proxyLog.Info("watching configuration for changes (poll-based, 2s interval)")
|
||||
go func() {
|
||||
(&configwatcher.Watcher{
|
||||
Path: absConfigPath,
|
||||
Interval: configwatcher.DefaultInterval,
|
||||
OnChange: reload,
|
||||
}).Run(watcherCtx)
|
||||
}()
|
||||
|
||||
if *flagConfig != "" {
|
||||
absConfigPath, err := filepath.Abs(*flagConfig)
|
||||
if err != nil {
|
||||
slog.Error("watch-config: failed to resolve config path", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
go func() {
|
||||
(&configwatcher.Watcher{
|
||||
Path: absConfigPath,
|
||||
Interval: configwatcher.DefaultInterval,
|
||||
OnChange: reload,
|
||||
}).Run(watcherCtx)
|
||||
}()
|
||||
}
|
||||
|
||||
if *flagConfigDir != "" {
|
||||
absConfigDir, err := filepath.Abs(*flagConfigDir)
|
||||
if err != nil {
|
||||
slog.Error("watch-config: failed to resolve config-dir path", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
go func() {
|
||||
(&configwatcher.DirWatcher{
|
||||
Path: absConfigDir,
|
||||
Interval: configwatcher.DefaultInterval,
|
||||
OnChange: reload,
|
||||
}).Run(watcherCtx)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
@@ -262,6 +281,11 @@ func main() {
|
||||
}
|
||||
}()
|
||||
|
||||
if !shared.IsLoopbackAddr(listenAddr) {
|
||||
_, port, _ := net.SplitHostPort(listenAddr)
|
||||
proxyLog.Infof("llama-swap is reachable by all hosts on the network, use -listen localhost:%s to restrict to loopback only", port)
|
||||
}
|
||||
|
||||
exitChan := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"$schema": "https://shadcn-svelte.com/schema.json",
|
||||
"tailwind": {
|
||||
"css": "src/index.css",
|
||||
"baseColor": "zinc"
|
||||
},
|
||||
"aliases": {
|
||||
"components": "$lib/components",
|
||||
"utils": "$lib/utils",
|
||||
"ui": "$lib/components/ui",
|
||||
"hooks": "$lib/hooks",
|
||||
"lib": "$lib"
|
||||
},
|
||||
"typescript": true,
|
||||
"registry": "https://shadcn-svelte.com/registry",
|
||||
"iconLibrary": "lucide"
|
||||
}
|
||||
Generated
+421
-120
@@ -8,10 +8,10 @@
|
||||
"name": "ui-svelte",
|
||||
"version": "0.0.0",
|
||||
"dependencies": {
|
||||
"@tanstack/table-core": "^8.21.3",
|
||||
"chart.js": "4.5.1",
|
||||
"highlight.js": "^11.11.1",
|
||||
"katex": "^0.16.28",
|
||||
"lucide-svelte": "^0.563.0",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"rehype-stringify": "^10.0.1",
|
||||
"remark-gfm": "^4.0.1",
|
||||
@@ -23,14 +23,22 @@
|
||||
"unist-util-visit": "^5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@internationalized/date": "^3.12.2",
|
||||
"@lucide/svelte": "^1.21.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||
"@tailwindcss/vite": "^4.1.8",
|
||||
"@tsconfig/svelte": "^5.0.4",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/node": "^25.1.0",
|
||||
"bits-ui": "^2.18.1",
|
||||
"clsx": "^2.1.1",
|
||||
"paneforge": "^1.0.2",
|
||||
"svelte": "^5.46.4",
|
||||
"svelte-check": "^4.1.4",
|
||||
"tailwind-merge": "^3.6.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"typescript": "~5.8.3",
|
||||
"vite": "^8.0.0",
|
||||
"vite-plugin-compression2": "^2.5.1",
|
||||
@@ -38,21 +46,21 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@emnapi/core": {
|
||||
"version": "1.9.2",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz",
|
||||
"integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==",
|
||||
"version": "1.11.1",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.11.1.tgz",
|
||||
"integrity": "sha512-RSvbQmHzdKzNsLYa/wHrbc3KN4sYLKAdPZxqiM2HATqv/SBk2/ENSHpvXGaLOMcsAyz0poEGqkmmKYG3OWiJEQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@emnapi/wasi-threads": "1.2.1",
|
||||
"@emnapi/wasi-threads": "1.2.2",
|
||||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@emnapi/runtime": {
|
||||
"version": "1.9.2",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
|
||||
"integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
|
||||
"version": "1.11.1",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.11.1.tgz",
|
||||
"integrity": "sha512-vgj7R3y3Wgx24IQaGPA/R6YFXLHVMOZ0uVEyIQPaWs+rd1AzfEMXlAC22FYwO1XkKR6NPsq7mUandH8oIRdZFw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
@@ -61,9 +69,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@emnapi/wasi-threads": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz",
|
||||
"integrity": "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==",
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.2.tgz",
|
||||
"integrity": "sha512-c95qOXkHdydNKhscBTebqEC1CVAZpyqOfVfBzQ1qgzyl3gfeldUjIggDbIZgDKsHLgnsM+igH7TJ/eAasaVuMA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
@@ -71,6 +79,44 @@
|
||||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/core": {
|
||||
"version": "1.7.5",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz",
|
||||
"integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@floating-ui/utils": "^0.2.11"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/dom": {
|
||||
"version": "1.7.6",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz",
|
||||
"integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@floating-ui/core": "^1.7.5",
|
||||
"@floating-ui/utils": "^0.2.11"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/utils": {
|
||||
"version": "0.2.11",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz",
|
||||
"integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@internationalized/date": {
|
||||
"version": "3.12.2",
|
||||
"resolved": "https://registry.npmjs.org/@internationalized/date/-/date-3.12.2.tgz",
|
||||
"integrity": "sha512-FY1Y+H64NDs+HAF6omlnWxm3mEpfgaCSWtL5l551ZZfImA+kGjPFgrnJrGjH6lfmLL0g8Z/mBu1R3kufeCp6Jw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@swc/helpers": "^0.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jridgewell/gen-mapping": {
|
||||
"version": "0.3.13",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
|
||||
@@ -127,15 +173,25 @@
|
||||
"integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@lucide/svelte": {
|
||||
"version": "1.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lucide/svelte/-/svelte-1.21.0.tgz",
|
||||
"integrity": "sha512-MEv//A7Jv3kHukZowv/DWp1MAtUzJKYwtJsmnQ7X98lCgtac3z3NbaToDl3Q6jO3gS9sougFpcD+t+YuxOkRMw==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"peerDependencies": {
|
||||
"svelte": "^5"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/wasm-runtime": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.3.tgz",
|
||||
"integrity": "sha512-xK9sGVbJWYb08+mTJt3/YV24WxvxpXcXtP6B172paPZ+Ts69Re9dAr7lKwJoeIx8OoeuimEiRZ7umkiUVClmmQ==",
|
||||
"version": "1.1.6",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.6.tgz",
|
||||
"integrity": "sha512-ZLv/JdUfkvOy9eCnnBaGfiO+XimbjebAeO+MRQqD/B+FR1tnRN0tpKSJHRbE8sFfS6aqsXZ67TQjfwfsxULVbg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@tybys/wasm-util": "^0.10.1"
|
||||
"@tybys/wasm-util": "^0.10.3"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
@@ -147,9 +203,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@oxc-project/types": {
|
||||
"version": "0.124.0",
|
||||
"resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.124.0.tgz",
|
||||
"integrity": "sha512-VBFWMTBvHxS11Z5Lvlr3IWgrwhMTXV+Md+EQF0Xf60+wAdsGFTBx7X7K/hP4pi8N7dcm1RvcHwDxZ16Qx8keUg==",
|
||||
"version": "0.137.0",
|
||||
"resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.137.0.tgz",
|
||||
"integrity": "sha512-WT+Gb24i8hmvo85AIv2oEYouEXkRlKAlT9WaCa3TfLgNCN+GhrJOGZuIlMouAh38Qe4QOx26eUOVsq70qXrywA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
@@ -157,9 +213,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-android-arm64": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-YYe6aWruPZDtHNpwu7+qAHEMbQ/yRl6atqb/AhznLTnD3UY99Q1jE7ihLSahNWkF4EqRPVC4SiR4O0UkLK02tA==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.1.3.tgz",
|
||||
"integrity": "sha512-DT6Z3PhvioeHMvxo+xHc3KtqggrI7CCTXCmC2h/5zUlp5jVitv7XEy+9q5/7v8IolhlioawpMo8Kg0EEBy7J0g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -174,9 +230,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-darwin-arm64": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-oArR/ig8wNTPYsXL+Mzhs0oxhxfuHRfG7Ikw7jXsw8mYOtk71W0OkF2VEVh699pdmzjPQsTjlD1JIOoHkLP1Fg==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.1.3.tgz",
|
||||
"integrity": "sha512-0NwgwsjM7LrsuVnXMK3koTpagBNOhloc/BNjKqZjv4V5zI5r13qx69uVhRx+o5Z0yy4Hzq+lpy7TAgUG/ocvrw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -191,9 +247,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-darwin-x64": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-YzeVqOqjPYvUbJSWJ4EDL8ahbmsIXQpgL3JVipmN+MX0XnXMeWomLN3Fb+nwCmP/jfyqte5I3XRSm7OfQrbyxw==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.1.3.tgz",
|
||||
"integrity": "sha512-YtiBp4disu6V560loT6PjMdiRaWmVvDNrUunAalbiFx2ggeJwxdAsgZMcoGP17uyAsTwAj5V1niksxlHnVQ1Sw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -208,9 +264,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-freebsd-x64": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-9Erhx956jeQ0nNTyif1+QWAXDRD38ZNjr//bSHrt6wDwB+QkAfl2q6Mn1k6OBPerznjRmbM10lgRb1Pli4xZPw==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.1.3.tgz",
|
||||
"integrity": "sha512-yD3EkEdXk2LypPxnf/kSZHirarsI8gcPzc62SukhR9VJTyvV+F9Q/GxWNuCojc7sXyuVC4DxRGhdDK4X8VSsbw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -225,9 +281,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-arm-gnueabihf": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-cVwk0w8QbZJGTnP/AHQBs5yNwmpgGYStL88t4UIaqcvYJWBfS0s3oqVLZPwsPU6M0zlW4GqjP0Zq5MnAGwFeGA==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.1.3.tgz",
|
||||
"integrity": "sha512-c+8vieQbsD7HNAHKIA34w0GJ9FedFFuJGD+7E6vz7Q3uqAIugL5p45fhlsj4UaAsHpcmlqugBWMhA0/j7o0sIg==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -242,13 +298,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-arm64-gnu": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-eBZ/u8iAK9SoHGanqe/jrPnY0JvBN6iXbVOsbO38mbz+ZJsaobExAm1Iu+rxa4S1l2FjG0qEZn4Rc6X8n+9M+w==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.1.3.tgz",
|
||||
"integrity": "sha512-50jD0uUwLvur7Zz9LHz17kaAdTPjn5wN93hEgjvmYFRZwiR7ZJYovTd5ipyWJDAnXKvZ+wgc+/Ika6dwSF5OcA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -259,13 +318,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-arm64-musl": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-ZvRYMGrAklV9PEkgt4LQM6MjQX2P58HPAuecwYObY2DhS2t35R0I810bKi0wmaYORt6m/2Sm+Z+nFgb0WhXNcQ==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.1.3.tgz",
|
||||
"integrity": "sha512-BO9+oPL8K9poZJBfYPsXNtYjPE5uM3qeehT3aFcW4LITOl+iSqhp0abzjR2nWBUNjIZeKXjAEWBZ64WjNoHd6w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -276,13 +338,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-ppc64-gnu": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-VDpgGBzgfg5hLg+uBpCLoFG5kVvEyafmfxGUV0UHLcL5irxAK7PKNeC2MwClgk6ZAiNhmo9FLhRYgvMmedLtnQ==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.1.3.tgz",
|
||||
"integrity": "sha512-f3VpLB1vQ0Eo6ecr/6cekLnvYMFF4YBFoVGkfkvPLq1bAkbAwHYQPZKoAmG6OJyTcxxoC+AvezGx/S1obNC0Mw==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -293,13 +358,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-s390x-gnu": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-y1uXY3qQWCzcPgRJATPSOUP4tCemh4uBdY7e3EZbVwCJTY3gLJWnQABgeUetvED+bt1FQ01OeZwvhLS2bpNrAQ==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.1.3.tgz",
|
||||
"integrity": "sha512-AmurZ26Pqx/RI9N1gzEOCklkKXl927yjfXWUUS0O7Puh8ARM/Ob8qfrD3qnWksScdw6cSrW5PSHE9DyLu7+PtA==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -310,13 +378,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-x64-gnu": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-023bTPBod7J3Y/4fzAN6QtpkSABR0rigtrwaP+qSEabUh5zf6ELr9Nc7GujaROuPY3uwdSIXWrvhn1KxOvurWA==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.1.3.tgz",
|
||||
"integrity": "sha512-JJpqs8bRGITDOdbkNKnlojzBabbOHrqjSvDr0IVsZObE1lBcPjxItUEY9eWIDbxaJ3cGrXPWGfGkIxFijg/URg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -327,13 +398,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-linux-x64-musl": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-witB2O0/hU4CgfOOKUoeFgQ4GktPi1eEbAhaLAIpgD6+ZnhcPkUtPsoKKHRzmOoWPZue46IThdSgdo4XneOLYw==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.1.3.tgz",
|
||||
"integrity": "sha512-rSJcdjPxzA/by/6/rYs+v+bXU7UjvnbUWz8MJb6kh6+knqB1dCrtHg0uu7C/4haqJvqdkYHQ5IGn+tCH9GLW/g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -344,9 +418,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-openharmony-arm64": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-UCL68NJ0Ud5zRipXZE9dF5PmirzJE4E4BCIOOssEnM7wLDsxjc6Qb0sGDxTNRTP53I6MZpygyCpY8Aa8sPfKPg==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.1.3.tgz",
|
||||
"integrity": "sha512-hQ3/PYkDJICgevvyNcVrihVeqq7k1Pp3VZ9lY+dauAYUJKO+auqApvANhvR1An9BhmqYKvW2Mu1F9u4DXSMLxQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -361,9 +435,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-wasm32-wasi": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-ApLruZq/ig+nhaE7OJm4lDjayUnOHVUa77zGeqnqZ9pn0ovdVbbNPerVibLXDmWeUZXjIYIT8V3xkT58Rm9u5Q==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.1.3.tgz",
|
||||
"integrity": "sha512-Elcv/BtML9lXrV6JuKITc/grN2kYV9gjsQpW8Jfw4ioK0TOkjBjye0nnyqQNy9STNaI20lXNaQBRrD5gSgR0Yg==",
|
||||
"cpu": [
|
||||
"wasm32"
|
||||
],
|
||||
@@ -371,18 +445,18 @@
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@emnapi/core": "1.9.2",
|
||||
"@emnapi/runtime": "1.9.2",
|
||||
"@napi-rs/wasm-runtime": "^1.1.3"
|
||||
"@emnapi/core": "1.11.1",
|
||||
"@emnapi/runtime": "1.11.1",
|
||||
"@napi-rs/wasm-runtime": "^1.1.6"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
"node": "^20.19.0 || >=22.12.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-win32-arm64-msvc": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-KmoUoU7HnN+Si5YWJigfTws1jz1bKBYDQKdbLspz0UaqjjFkddHsqorgiW1mxcAj88lYUE6NC/zJNwT+SloqtA==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.1.3.tgz",
|
||||
"integrity": "sha512-2DrEfhluH9yhiaFApmsjsjwrSYbNcY1oFTzYSP1a535jDbV98zCFanA/96TBUd0iDFcxGmw9QRExwGCXz3U+/g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -397,9 +471,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/binding-win32-x64-msvc": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-3P2A8L+x75qavWLe/Dll3EYBJLQmtkJN8rfh+U/eR3MqMgL/h98PhYI+JFfXuDPgPeCB7iZAKiqii5vqOvnA0g==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.1.3.tgz",
|
||||
"integrity": "sha512-OL4OMk7UPXOeVGGd3qo5zJyPIljf4AFgk5QAkPPS+OoLuOOozhuaQGC18MxVTnw/06q93gShAJzlwnSCY9YtqA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -414,9 +488,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/pluginutils": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-UromN0peaE53IaBRe9W7CjrZgXl90fqGpK+mIZbA3qSTeYqg3pqpROBdIPvOG3F5ereDHNwoHBI2e50n1BDr1g==",
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.1.tgz",
|
||||
"integrity": "sha512-2j9bGt5Jh8hj+vPtgzPtl72j0yRxHAyumoo6TNfAjsLB04UtpSvPbPcDcBMxz7n+9CYB0c1GxQFxYRg2jimqGw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
@@ -480,6 +554,16 @@
|
||||
"vite": "^8.0.0-beta.7 || ^8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@swc/helpers": {
|
||||
"version": "0.5.23",
|
||||
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.23.tgz",
|
||||
"integrity": "sha512-5lSsMOTXURePglDfvuAQUqkGek9Hg2kksOYay2m0+XR++b2NWYL/4sWyuvVBIs8oKnJaxkdi9whaL/sqN13afw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"tslib": "^2.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@tailwindcss/node": {
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.2.1.tgz",
|
||||
@@ -752,6 +836,19 @@
|
||||
"vite": "^5.2.0 || ^6 || ^7"
|
||||
}
|
||||
},
|
||||
"node_modules/@tanstack/table-core": {
|
||||
"version": "8.21.3",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/table-core/-/table-core-8.21.3.tgz",
|
||||
"integrity": "sha512-ldZXEhOBb8Is7xLs01fR3YEc3DERiz5silj8tnGkFZytt1abEvl/GhUmCE0PMLaMPTa3Jk4HbKmRlHmu+gCftg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/tannerlinsley"
|
||||
}
|
||||
},
|
||||
"node_modules/@tsconfig/svelte": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@tsconfig/svelte/-/svelte-5.0.8.tgz",
|
||||
@@ -760,9 +857,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@tybys/wasm-util": {
|
||||
"version": "0.10.1",
|
||||
"resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz",
|
||||
"integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==",
|
||||
"version": "0.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.3.tgz",
|
||||
"integrity": "sha512-F3fo1MYrRJYL3zER0OUOmkutjr1Vp23m7OsSgp7nq4SP6OqX6C/56XFIPAl5bt3zaBRjmW7SGz3u/6LwFpYcOg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
@@ -1053,6 +1150,31 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/bits-ui": {
|
||||
"version": "2.18.1",
|
||||
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.18.1.tgz",
|
||||
"integrity": "sha512-KkemzKFH4T3gt3H+P86JcnAWExjByv/6vlwjm/BoCwTPHu03yiCdxbghdJLvFReQTe0acCAiRcKfmixxD6XvlA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@floating-ui/core": "^1.7.1",
|
||||
"@floating-ui/dom": "^1.7.1",
|
||||
"esm-env": "^1.1.2",
|
||||
"runed": "^0.35.1",
|
||||
"svelte-toolbelt": "^0.10.6",
|
||||
"tabbable": "^6.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/huntabyte"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@internationalized/date": "^3.8.1",
|
||||
"svelte": "^5.33.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ccount": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz",
|
||||
@@ -1555,6 +1677,13 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/inline-style-parser": {
|
||||
"version": "0.2.7",
|
||||
"resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz",
|
||||
"integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-plain-obj": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
|
||||
@@ -1881,13 +2010,14 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/lucide-svelte": {
|
||||
"version": "0.563.0",
|
||||
"resolved": "https://registry.npmjs.org/lucide-svelte/-/lucide-svelte-0.563.0.tgz",
|
||||
"integrity": "sha512-pjZKw7TpQcamfQrx7YdbOHgmrcNeKiGGMD0tKZQaVktwSsbqw28CsKc2Q97ttwjytiCWkJyOa8ij2Q+Og0nPfQ==",
|
||||
"license": "ISC",
|
||||
"peerDependencies": {
|
||||
"svelte": "^3 || ^4 || ^5.0.0-next.42"
|
||||
"node_modules/lz-string": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz",
|
||||
"integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"lz-string": "bin/bin.js"
|
||||
}
|
||||
},
|
||||
"node_modules/magic-string": {
|
||||
@@ -2738,9 +2868,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/nanoid": {
|
||||
"version": "3.3.11",
|
||||
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
|
||||
"integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
|
||||
"version": "3.3.15",
|
||||
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.15.tgz",
|
||||
"integrity": "sha512-y7Wygv/7mEOvxTuEQDB8StXdMRBWf1kR/tlhAzBRUFkB2jfcLOAxO/SHmOO2zgz1pVgK29/kyupn059/bCHdjA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
@@ -2767,6 +2897,74 @@
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/paneforge": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/paneforge/-/paneforge-1.0.2.tgz",
|
||||
"integrity": "sha512-KzmIXQH1wCfwZ4RsMohD/IUtEjVhteR+c+ulb/CHYJHX8SuDXoJmChtsc/Xs5Wl8NHS4L5Q7cxL8MG40gSU1bA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"runed": "^0.23.4",
|
||||
"svelte-toolbelt": "^0.9.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.29.0"
|
||||
}
|
||||
},
|
||||
"node_modules/paneforge/node_modules/runed": {
|
||||
"version": "0.23.4",
|
||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.23.4.tgz",
|
||||
"integrity": "sha512-9q8oUiBYeXIDLWNK5DfCWlkL0EW3oGbk845VdKlPeia28l751VpfesaB/+7pI6rnbx1I6rqoZ2fZxptOJLxILA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte",
|
||||
"https://github.com/sponsors/tglide"
|
||||
],
|
||||
"dependencies": {
|
||||
"esm-env": "^1.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/paneforge/node_modules/svelte-toolbelt": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.9.3.tgz",
|
||||
"integrity": "sha512-HCSWxCtVmv+c6g1ACb8LTwHVbDqLKJvHpo6J8TaqwUme2hj9ATJCpjCPNISR1OCq2Q4U1KT41if9ON0isINQZw==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte"
|
||||
],
|
||||
"dependencies": {
|
||||
"clsx": "^2.1.1",
|
||||
"runed": "^0.29.0",
|
||||
"style-to-object": "^1.0.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18",
|
||||
"pnpm": ">=8.7.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.30.2"
|
||||
}
|
||||
},
|
||||
"node_modules/paneforge/node_modules/svelte-toolbelt/node_modules/runed": {
|
||||
"version": "0.29.2",
|
||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.29.2.tgz",
|
||||
"integrity": "sha512-0cq6cA6sYGZwl/FvVqjx9YN+1xEBu9sDDyuWdDW1yWX7JF2wmvmVKfH+hVCZs+csW+P3ARH92MjI3H9QTagOQA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte",
|
||||
"https://github.com/sponsors/tglide"
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esm-env": "^1.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/parse5": {
|
||||
"version": "7.3.0",
|
||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
|
||||
@@ -2807,9 +3005,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.5.12",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
|
||||
"integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
|
||||
"version": "8.5.15",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.15.tgz",
|
||||
"integrity": "sha512-FfR8sjd4em2T6fb3I2MwAJU7HWVMr9zba+enmQeeWFfCbm+UOC/0X4DS8XtpUTMwWMGbjKYP7xjfNekzyGmB3A==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
@@ -2827,7 +3025,7 @@
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nanoid": "^3.3.11",
|
||||
"nanoid": "^3.3.12",
|
||||
"picocolors": "^1.1.1",
|
||||
"source-map-js": "^1.2.1"
|
||||
},
|
||||
@@ -2985,14 +3183,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/rolldown": {
|
||||
"version": "1.0.0-rc.15",
|
||||
"resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.15.tgz",
|
||||
"integrity": "sha512-Ff31guA5zT6WjnGp0SXw76X6hzGRk/OQq2hE+1lcDe+lJdHSgnSX6nK3erbONHyCbpSj9a9E+uX/OvytZoWp2g==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.1.3.tgz",
|
||||
"integrity": "sha512-1F1eEtUBtFvcGm1HQ9TiUIUHPQG7mSAODrhIzjxoUEFuo8OcbrGLiVLkevNgj84TE4lnHvnumwFjhJO5Eu135g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@oxc-project/types": "=0.124.0",
|
||||
"@rolldown/pluginutils": "1.0.0-rc.15"
|
||||
"@oxc-project/types": "=0.137.0",
|
||||
"@rolldown/pluginutils": "^1.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"rolldown": "bin/cli.mjs"
|
||||
@@ -3001,21 +3199,46 @@
|
||||
"node": "^20.19.0 || >=22.12.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@rolldown/binding-android-arm64": "1.0.0-rc.15",
|
||||
"@rolldown/binding-darwin-arm64": "1.0.0-rc.15",
|
||||
"@rolldown/binding-darwin-x64": "1.0.0-rc.15",
|
||||
"@rolldown/binding-freebsd-x64": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-arm64-musl": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-x64-gnu": "1.0.0-rc.15",
|
||||
"@rolldown/binding-linux-x64-musl": "1.0.0-rc.15",
|
||||
"@rolldown/binding-openharmony-arm64": "1.0.0-rc.15",
|
||||
"@rolldown/binding-wasm32-wasi": "1.0.0-rc.15",
|
||||
"@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.15",
|
||||
"@rolldown/binding-win32-x64-msvc": "1.0.0-rc.15"
|
||||
"@rolldown/binding-android-arm64": "1.1.3",
|
||||
"@rolldown/binding-darwin-arm64": "1.1.3",
|
||||
"@rolldown/binding-darwin-x64": "1.1.3",
|
||||
"@rolldown/binding-freebsd-x64": "1.1.3",
|
||||
"@rolldown/binding-linux-arm-gnueabihf": "1.1.3",
|
||||
"@rolldown/binding-linux-arm64-gnu": "1.1.3",
|
||||
"@rolldown/binding-linux-arm64-musl": "1.1.3",
|
||||
"@rolldown/binding-linux-ppc64-gnu": "1.1.3",
|
||||
"@rolldown/binding-linux-s390x-gnu": "1.1.3",
|
||||
"@rolldown/binding-linux-x64-gnu": "1.1.3",
|
||||
"@rolldown/binding-linux-x64-musl": "1.1.3",
|
||||
"@rolldown/binding-openharmony-arm64": "1.1.3",
|
||||
"@rolldown/binding-wasm32-wasi": "1.1.3",
|
||||
"@rolldown/binding-win32-arm64-msvc": "1.1.3",
|
||||
"@rolldown/binding-win32-x64-msvc": "1.1.3"
|
||||
}
|
||||
},
|
||||
"node_modules/runed": {
|
||||
"version": "0.35.1",
|
||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.35.1.tgz",
|
||||
"integrity": "sha512-2F4Q/FZzbeJTFdIS/PuOoPRSm92sA2LhzTnv6FXhCoENb3huf5+fDuNOg1LNvGOouy3u/225qxmuJvcV3IZK5Q==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte",
|
||||
"https://github.com/sponsors/tglide"
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dequal": "^2.0.3",
|
||||
"esm-env": "^1.0.0",
|
||||
"lz-string": "^1.5.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@sveltejs/kit": "^2.21.0",
|
||||
"svelte": "^5.7.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@sveltejs/kit": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/sade": {
|
||||
@@ -3086,6 +3309,16 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/style-to-object": {
|
||||
"version": "1.0.14",
|
||||
"resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz",
|
||||
"integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"inline-style-parser": "0.2.7"
|
||||
}
|
||||
},
|
||||
"node_modules/svelte": {
|
||||
"version": "5.55.7",
|
||||
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.55.7.tgz",
|
||||
@@ -3150,6 +3383,65 @@
|
||||
"url": "https://github.com/sponsors/ItalyPaleAle"
|
||||
}
|
||||
},
|
||||
"node_modules/svelte-toolbelt": {
|
||||
"version": "0.10.6",
|
||||
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.10.6.tgz",
|
||||
"integrity": "sha512-YWuX+RE+CnWYx09yseAe4ZVMM7e7GRFZM6OYWpBKOb++s+SQ8RBIMMe+Bs/CznBMc0QPLjr+vDBxTAkozXsFXQ==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte"
|
||||
],
|
||||
"dependencies": {
|
||||
"clsx": "^2.1.1",
|
||||
"runed": "^0.35.1",
|
||||
"style-to-object": "^1.0.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18",
|
||||
"pnpm": ">=8.7.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.30.2"
|
||||
}
|
||||
},
|
||||
"node_modules/tabbable": {
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.5.0.tgz",
|
||||
"integrity": "sha512-wieBHXygIm7OyQOu5hQlkk62/WyCFYGlWg7L6/ZCUZwx0o398Zkn4pVmMyfYhfMG8kGrj/Krt8eIk6UKC6VzwA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tailwind-merge": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.6.0.tgz",
|
||||
"integrity": "sha512-uxL7qAVQriqRQPAyK3pj66VqskWqoZ37PW94jwOTwNfq/z9oyu1V+eqrZqtR2+fCiXdYOZe/Modt8GtvqNzu+w==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/dcastil"
|
||||
}
|
||||
},
|
||||
"node_modules/tailwind-variants": {
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-3.2.2.tgz",
|
||||
"integrity": "sha512-Mi4kHeMTLvKlM98XPnK+7HoBPmf4gygdFmqQPaDivc3DpYS6aIY6KiG/PgThrGvii5YZJqRsPz0aPyhoFzmZgg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=16.x",
|
||||
"pnpm": ">=7.x"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tailwind-merge": ">=3.0.0",
|
||||
"tailwindcss": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tailwind-merge": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/tailwindcss": {
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.1.tgz",
|
||||
@@ -3196,14 +3488,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
|
||||
"version": "0.2.17",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.17.tgz",
|
||||
"integrity": "sha512-wXR/dYpcqKmfWpEdZjiKJOwCNFndD0DMnrW/cYjVGttEkBfVgcLFHoNrlj47mjOVic9yyNu65alsgF4NQyTa2g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fdir": "^6.5.0",
|
||||
"picomatch": "^4.0.3"
|
||||
"picomatch": "^4.0.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
@@ -3247,8 +3539,17 @@
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"dev": true,
|
||||
"license": "0BSD",
|
||||
"optional": true
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/tw-animate-css": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz",
|
||||
"integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/Wombosvideo"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.8.3",
|
||||
@@ -3429,17 +3730,17 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "8.0.8",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-8.0.8.tgz",
|
||||
"integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==",
|
||||
"version": "8.1.0",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-8.1.0.tgz",
|
||||
"integrity": "sha512-BuJcQK/56NQTWDGn4ABea3q4SSBdNPWwNZKTkkUpcMPnLoquSYH8llRtSUIgoL1KSCpHt5eghLShn50mH36y7Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lightningcss": "^1.32.0",
|
||||
"picomatch": "^4.0.4",
|
||||
"postcss": "^8.5.8",
|
||||
"rolldown": "1.0.0-rc.15",
|
||||
"tinyglobby": "^0.2.15"
|
||||
"postcss": "^8.5.15",
|
||||
"rolldown": "~1.1.2",
|
||||
"tinyglobby": "^0.2.17"
|
||||
},
|
||||
"bin": {
|
||||
"vite": "bin/vite.js"
|
||||
@@ -3455,7 +3756,7 @@
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/node": "^20.19.0 || >=22.12.0",
|
||||
"@vitejs/devtools": "^0.1.0",
|
||||
"@vitejs/devtools": "^0.3.0",
|
||||
"esbuild": "^0.27.0 || ^0.28.0",
|
||||
"jiti": ">=1.21.0",
|
||||
"less": "^4.0.0",
|
||||
|
||||
+10
-2
@@ -12,30 +12,38 @@
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@internationalized/date": "^3.12.2",
|
||||
"@lucide/svelte": "^1.21.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||
"@tailwindcss/vite": "^4.1.8",
|
||||
"@tsconfig/svelte": "^5.0.4",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/node": "^25.1.0",
|
||||
"bits-ui": "^2.18.1",
|
||||
"clsx": "^2.1.1",
|
||||
"paneforge": "^1.0.2",
|
||||
"svelte": "^5.46.4",
|
||||
"svelte-check": "^4.1.4",
|
||||
"tailwind-merge": "^3.6.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"typescript": "~5.8.3",
|
||||
"vite": "^8.0.0",
|
||||
"vite-plugin-compression2": "^2.5.1",
|
||||
"vitest": "^4.1.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tanstack/table-core": "^8.21.3",
|
||||
"chart.js": "4.5.1",
|
||||
"highlight.js": "^11.11.1",
|
||||
"katex": "^0.16.28",
|
||||
"lucide-svelte": "^0.563.0",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"rehype-stringify": "^10.0.1",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"remark-parse": "^11.0.0",
|
||||
"remark-rehype": "^11.1.2",
|
||||
"chart.js": "4.5.1",
|
||||
"svelte-spa-router": "^4.0.1",
|
||||
"unified": "^11.0.5",
|
||||
"unist-util-visit": "^5.1.0"
|
||||
|
||||
+65
-18
@@ -1,33 +1,68 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from "svelte";
|
||||
import Router from "svelte-spa-router";
|
||||
import Header from "./components/Header.svelte";
|
||||
import AppSidebar from "./components/AppSidebar.svelte";
|
||||
import LogViewer from "./routes/LogViewer.svelte";
|
||||
import Models from "./routes/Models.svelte";
|
||||
import ModelDetail from "./routes/ModelDetail.svelte";
|
||||
import ModelsDash from "./routes/ModelsDash.svelte";
|
||||
import Activity from "./routes/Activity.svelte";
|
||||
import Performance from "./routes/Performance.svelte";
|
||||
import Playground from "./routes/Playground.svelte";
|
||||
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
||||
import { enableAPIEvents } from "./stores/api";
|
||||
import Settings from "./routes/Settings.svelte";
|
||||
import * as Sidebar from "$lib/components/ui/sidebar/index.js";
|
||||
import * as Tooltip from "$lib/components/ui/tooltip/index.js";
|
||||
import { Separator } from "$lib/components/ui/separator/index.js";
|
||||
import { enableAPIEvents, checkPerformanceEnabled } from "./stores/api";
|
||||
import { initScreenWidth, initSystemThemeListener, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||
import { currentRoute } from "./stores/route";
|
||||
import { selectedPlaygroundTab, playgroundTabs } from "./stores/playground";
|
||||
|
||||
const routes = {
|
||||
"/": PlaygroundStub,
|
||||
"/models": Models,
|
||||
"/models": ModelsDash,
|
||||
"/models/:id": ModelDetail,
|
||||
"/logs": LogViewer,
|
||||
"/activity": Activity,
|
||||
"/settings": Settings,
|
||||
"/performance": Performance,
|
||||
"*": PlaygroundStub,
|
||||
};
|
||||
|
||||
function handleRouteLoaded(event: { detail: { route: string | RegExp } }) {
|
||||
const routeTitles: Record<string, string> = {
|
||||
"/": "Playground",
|
||||
"/models": "Models",
|
||||
"/activity": "Activity",
|
||||
"/logs": "Logs",
|
||||
"/settings": "Settings",
|
||||
"/performance": "Performance",
|
||||
};
|
||||
|
||||
let sectionTitle = $derived.by(() => {
|
||||
if ($currentRoute === "/") {
|
||||
const tab = playgroundTabs.find((t) => t.id === $selectedPlaygroundTab);
|
||||
return `Playground / ${tab?.label ?? ""}`;
|
||||
}
|
||||
if ($currentRoute.startsWith("/models/")) {
|
||||
const id = $currentRoute.slice("/models/".length);
|
||||
return id ? `Models / ${decodeURIComponent(id)}` : "Models";
|
||||
}
|
||||
if ($currentRoute === "/models") {
|
||||
return "Models";
|
||||
}
|
||||
return routeTitles[$currentRoute] ?? "Playground";
|
||||
});
|
||||
|
||||
function handleRouteLoaded(event: { detail: { route: string | RegExp; location?: string } }) {
|
||||
const route = event.detail.route;
|
||||
currentRoute.set(typeof route === "string" ? route : "/");
|
||||
// Prefer the actual URL path so parameterised routes (e.g. /models/:id)
|
||||
// are reflected accurately in currentRoute for sidebar highlighting.
|
||||
const loc = event.detail.location;
|
||||
currentRoute.set(loc ?? (typeof route === "string" ? route : "/"));
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
||||
document.documentElement.classList.toggle("dark", $isDarkMode);
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
@@ -39,6 +74,7 @@
|
||||
const cleanupScreenWidth = initScreenWidth();
|
||||
const cleanupSystemTheme = initSystemThemeListener();
|
||||
enableAPIEvents(true);
|
||||
checkPerformanceEnabled();
|
||||
|
||||
return () => {
|
||||
cleanupScreenWidth();
|
||||
@@ -48,15 +84,26 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col h-screen">
|
||||
<Header />
|
||||
<Tooltip.Provider>
|
||||
<Sidebar.Provider>
|
||||
<AppSidebar />
|
||||
<Sidebar.Inset class="h-screen min-w-0 overflow-hidden">
|
||||
<header
|
||||
class="bg-background sticky top-0 z-10 flex h-14 shrink-0 items-center gap-2 border-b px-4"
|
||||
>
|
||||
<Sidebar.Trigger class="-ml-1" />
|
||||
<Separator orientation="vertical" class="mr-2 !h-4" />
|
||||
<h2 class="truncate pb-0 text-sm font-semibold">{sectionTitle}</h2>
|
||||
</header>
|
||||
|
||||
<main class="flex-1 overflow-auto p-4">
|
||||
<div class="h-full" class:hidden={$currentRoute !== "/"}>
|
||||
<Playground />
|
||||
</div>
|
||||
<div class="h-full" class:hidden={$currentRoute === "/"}>
|
||||
<Router {routes} on:routeLoaded={handleRouteLoaded} />
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
<main class="min-h-0 flex-1 overflow-auto p-4">
|
||||
<div class="h-full" class:hidden={$currentRoute !== "/"}>
|
||||
<Playground />
|
||||
</div>
|
||||
<div class="h-full" class:hidden={$currentRoute === "/"}>
|
||||
<Router {routes} on:routeLoaded={handleRouteLoaded} />
|
||||
</div>
|
||||
</main>
|
||||
</Sidebar.Inset>
|
||||
</Sidebar.Provider>
|
||||
</Tooltip.Provider>
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
import { persistentStore } from "../stores/persistent";
|
||||
import { calculateHistogramData } from "../lib/histogram";
|
||||
import TokenHistogram from "./TokenHistogram.svelte";
|
||||
import { ChevronDown, X } from "@lucide/svelte";
|
||||
import * as Card from "$lib/components/ui/card/index.js";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
|
||||
const nf = new Intl.NumberFormat();
|
||||
const histogramCollapsed = persistentStore<boolean>("activity-histogram-collapsed", false);
|
||||
@@ -35,26 +38,24 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="card relative p-3">
|
||||
<button
|
||||
class="absolute top-2 right-2 w-6 h-6 flex items-center justify-center rounded-full border border-gray-300 dark:border-gray-600 text-gray-400 dark:text-gray-500 hover:text-gray-600 dark:hover:text-gray-300 hover:border-gray-400 dark:hover:border-gray-400 transition-colors"
|
||||
<Card.Root class="relative p-3">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-xs"
|
||||
class="text-muted-foreground absolute right-2 top-2 rounded-full"
|
||||
onclick={() => ($histogramCollapsed = !$histogramCollapsed)}
|
||||
title={$histogramCollapsed ? "Show histograms" : "Hide histograms"}
|
||||
>
|
||||
{#if $histogramCollapsed}
|
||||
<svg class="w-3.5 h-3.5" viewBox="0 0 16 16" fill="currentColor">
|
||||
<path d="M4.5 6l3.5 4 3.5-4H4.5z" />
|
||||
</svg>
|
||||
<ChevronDown />
|
||||
{:else}
|
||||
<svg class="w-3 h-3" viewBox="0 0 16 16" fill="currentColor">
|
||||
<path d="M3.5 3.5l9 9M12.5 3.5l-9 9" stroke="currentColor" stroke-width="2" stroke-linecap="round" fill="none" />
|
||||
</svg>
|
||||
<X />
|
||||
{/if}
|
||||
</button>
|
||||
</Button>
|
||||
{#if !$histogramCollapsed}
|
||||
<div class="flex flex-col sm:flex-row gap-6 mb-3">
|
||||
<div class="w-full sm:w-1/2 min-w-0">
|
||||
<div class="text-sm font-medium text-gray-500 dark:text-gray-400 mb-1">Prompt Processing</div>
|
||||
<div class="mb-3 flex flex-col gap-6 sm:flex-row">
|
||||
<div class="w-full min-w-0 sm:w-1/2">
|
||||
<div class="text-muted-foreground mb-1 text-sm font-medium">Prompt Processing</div>
|
||||
{#if stats.promptHistogramData}
|
||||
<TokenHistogram
|
||||
data={stats.promptHistogramData}
|
||||
@@ -62,36 +63,36 @@
|
||||
colorClass="text-amber-500 dark:text-amber-400"
|
||||
/>
|
||||
{:else}
|
||||
<div class="py-6 text-center text-sm text-gray-500 dark:text-gray-400">No prompt speed data yet</div>
|
||||
<div class="text-muted-foreground py-6 text-center text-sm">No prompt speed data yet</div>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="w-full sm:w-1/2 min-w-0">
|
||||
<div class="text-sm font-medium text-gray-500 dark:text-gray-400 mb-1">Token Generation</div>
|
||||
<div class="w-full min-w-0 sm:w-1/2">
|
||||
<div class="text-muted-foreground mb-1 text-sm font-medium">Token Generation</div>
|
||||
{#if stats.genHistogramData}
|
||||
<TokenHistogram data={stats.genHistogramData} unit="tokens/sec" />
|
||||
{:else}
|
||||
<div class="py-6 text-center text-sm text-gray-500 dark:text-gray-400">No generation speed data yet</div>
|
||||
<div class="text-muted-foreground py-6 text-center text-sm">No generation speed data yet</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
<div class="grid grid-cols-4 gap-x-6 gap-y-1 text-sm">
|
||||
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Requests</div>
|
||||
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Cached</div>
|
||||
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Processed</div>
|
||||
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Generated</div>
|
||||
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||
<div class="text-muted-foreground text-xs uppercase tracking-wider">Requests</div>
|
||||
<div class="text-muted-foreground text-xs uppercase tracking-wider">Cached</div>
|
||||
<div class="text-muted-foreground text-xs uppercase tracking-wider">Processed</div>
|
||||
<div class="text-muted-foreground text-xs uppercase tracking-wider">Generated</div>
|
||||
<div class="text-sm">
|
||||
<span class="font-semibold">{nf.format(stats.totalRequests)}</span> completed,
|
||||
<span class="font-semibold">{nf.format(stats.inFlightRequests)}</span> waiting
|
||||
</div>
|
||||
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||
<div class="text-sm">
|
||||
<span class="font-semibold">{nf.format(stats.totalCacheTokens)}</span> tokens
|
||||
</div>
|
||||
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||
<div class="text-sm">
|
||||
<span class="font-semibold">{nf.format(stats.totalInputTokens)}</span> tokens
|
||||
</div>
|
||||
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||
<div class="text-sm">
|
||||
<span class="font-semibold">{nf.format(stats.totalOutputTokens)}</span> tokens
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card.Root>
|
||||
|
||||
@@ -0,0 +1,449 @@
|
||||
<script lang="ts">
|
||||
import { untrack } from "svelte";
|
||||
import type { ActivityLogEntry, ReqRespCapture } from "../lib/types";
|
||||
import { getCapture } from "../stores/api";
|
||||
import { persistentStore } from "../stores/persistent";
|
||||
import CaptureDialog from "./CaptureDialog.svelte";
|
||||
import {
|
||||
type ColumnDef,
|
||||
type PaginationState,
|
||||
type VisibilityState,
|
||||
getCoreRowModel,
|
||||
getPaginationRowModel,
|
||||
} from "@tanstack/table-core";
|
||||
import {
|
||||
FlexRender,
|
||||
createSvelteTable,
|
||||
renderComponent,
|
||||
} from "$lib/components/ui/data-table/index.js";
|
||||
import * as Table from "$lib/components/ui/table/index.js";
|
||||
import * as Card from "$lib/components/ui/card/index.js";
|
||||
import * as Select from "$lib/components/ui/select/index.js";
|
||||
import * as DropdownMenu from "$lib/components/ui/dropdown-menu/index.js";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import {
|
||||
Columns3,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
ChevronsLeft,
|
||||
ChevronsRight,
|
||||
} from "@lucide/svelte";
|
||||
import HeaderLabel from "./activity-table/HeaderLabel.svelte";
|
||||
import ViewCaptureButton from "./activity-table/ViewCaptureButton.svelte";
|
||||
import MetaCell from "./activity-table/MetaCell.svelte";
|
||||
|
||||
interface Props {
|
||||
metrics: ActivityLogEntry[];
|
||||
storagePrefix: string;
|
||||
showModelColumn?: boolean;
|
||||
showPagination?: boolean;
|
||||
title?: string;
|
||||
compact?: boolean;
|
||||
emptyMessage?: string;
|
||||
cardClass?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
metrics,
|
||||
storagePrefix,
|
||||
showModelColumn = true,
|
||||
showPagination = false,
|
||||
title,
|
||||
compact = false,
|
||||
emptyMessage = "No activity recorded",
|
||||
cardClass = "",
|
||||
}: Props = $props();
|
||||
|
||||
function formatSpeed(speed: number): string {
|
||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||
}
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
return (ms / 1000).toFixed(2) + "s";
|
||||
}
|
||||
|
||||
function formatRelativeTime(timestamp: string): string {
|
||||
const now = new Date();
|
||||
const date = new Date(timestamp);
|
||||
const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000);
|
||||
if (diffInSeconds < 5) return "now";
|
||||
if (diffInSeconds < 60) return `${diffInSeconds}s ago`;
|
||||
const diffInMinutes = Math.floor(diffInSeconds / 60);
|
||||
if (diffInMinutes < 60) return `${diffInMinutes}m ago`;
|
||||
const diffInHours = Math.floor(diffInMinutes / 60);
|
||||
if (diffInHours < 24) return `${diffInHours}h ago`;
|
||||
return "a while ago";
|
||||
}
|
||||
|
||||
function formatDrafted(drafted: number, accepted: number): string {
|
||||
return drafted > 0
|
||||
? ((accepted * 100) / drafted).toFixed(1) + "% (" + accepted + "/" + drafted + ")"
|
||||
: "-";
|
||||
}
|
||||
|
||||
interface ColMeta {
|
||||
id: string;
|
||||
label: string;
|
||||
defaultVisible: boolean;
|
||||
}
|
||||
|
||||
function buildColumnMeta(withModel: boolean): ColMeta[] {
|
||||
const cols: ColMeta[] = [
|
||||
{ id: "id", label: "ID", defaultVisible: true },
|
||||
{ id: "time", label: "Time", defaultVisible: true },
|
||||
];
|
||||
if (withModel) cols.push({ id: "model", label: "Model", defaultVisible: true });
|
||||
cols.push(
|
||||
{ id: "req_path", label: "Path", defaultVisible: false },
|
||||
{ id: "resp_status_code", label: "Status", defaultVisible: true },
|
||||
{ id: "resp_content_type", label: "Content-Type", defaultVisible: false },
|
||||
{ id: "cached", label: "Cached", defaultVisible: true },
|
||||
{ id: "prompt", label: "Prompt", defaultVisible: true },
|
||||
{ id: "generated", label: "Generated", defaultVisible: true },
|
||||
{ id: "drafted", label: "Drafted", defaultVisible: false },
|
||||
{ id: "prompt_speed", label: "Prompt Speed", defaultVisible: true },
|
||||
{ id: "gen_speed", label: "Gen Speed", defaultVisible: true },
|
||||
{ id: "duration", label: "Duration", defaultVisible: true },
|
||||
{ id: "capture", label: "Capture", defaultVisible: true },
|
||||
{ id: "meta", label: "Meta", defaultVisible: false }
|
||||
);
|
||||
return cols;
|
||||
}
|
||||
|
||||
let columnMeta = $derived(buildColumnMeta(showModelColumn));
|
||||
|
||||
let columnLabelMap = $derived(
|
||||
Object.fromEntries(columnMeta.map((c) => [c.id, c.label])) as Record<string, string>
|
||||
);
|
||||
|
||||
let defaultVisibility = $derived.by(() => {
|
||||
const v: VisibilityState = {};
|
||||
for (const c of columnMeta) v[c.id] = c.defaultVisible;
|
||||
return v;
|
||||
});
|
||||
|
||||
// svelte-ignore state_referenced_locally
|
||||
const storedVisibility = persistentStore<VisibilityState>(
|
||||
`${storagePrefix}-columns`,
|
||||
{}
|
||||
);
|
||||
|
||||
// svelte-ignore state_referenced_locally
|
||||
let columnVisibility = $state<VisibilityState>(
|
||||
Object.keys($storedVisibility).length > 0 ? $storedVisibility : defaultVisibility
|
||||
);
|
||||
|
||||
// svelte-ignore state_referenced_locally
|
||||
const storedPageSize = persistentStore<number>(`${storagePrefix}-page-size`, 10);
|
||||
|
||||
// When not paginating, use a large page size so all rows render in one page.
|
||||
// svelte-ignore state_referenced_locally
|
||||
let pagination = $state<PaginationState>({
|
||||
pageIndex: 0,
|
||||
pageSize: showPagination ? $storedPageSize : Number.MAX_SAFE_INTEGER,
|
||||
});
|
||||
|
||||
// Reset to the first page when the data source changes. We deliberately do
|
||||
// NOT track pagination here — page-size changes reset pageIndex inside
|
||||
// onPaginationChange instead, to avoid clobbering page navigation.
|
||||
$effect(() => {
|
||||
metrics;
|
||||
untrack(() => {
|
||||
pagination = { ...pagination, pageIndex: 0 };
|
||||
});
|
||||
});
|
||||
|
||||
let selectedCapture = $state<ReqRespCapture | null>(null);
|
||||
let dialogOpen = $state(false);
|
||||
let loadingCaptureId = $state<number | null>(null);
|
||||
|
||||
async function viewCapture(id: number) {
|
||||
loadingCaptureId = id;
|
||||
const capture = await getCapture(id);
|
||||
loadingCaptureId = null;
|
||||
selectedCapture = capture;
|
||||
dialogOpen = true;
|
||||
}
|
||||
|
||||
function closeDialog() {
|
||||
dialogOpen = false;
|
||||
selectedCapture = null;
|
||||
}
|
||||
|
||||
function buildColumns(withModel: boolean): ColumnDef<ActivityLogEntry>[] {
|
||||
const cols: ColumnDef<ActivityLogEntry>[] = [
|
||||
{
|
||||
id: "id",
|
||||
header: "ID",
|
||||
cell: ({ row }) => String(row.original.id + 1),
|
||||
},
|
||||
{
|
||||
id: "time",
|
||||
header: "Time",
|
||||
cell: ({ row }) => formatRelativeTime(row.original.timestamp),
|
||||
},
|
||||
];
|
||||
|
||||
if (withModel) {
|
||||
cols.push({
|
||||
id: "model",
|
||||
header: "Model",
|
||||
cell: ({ row }) => row.original.model ?? "-",
|
||||
});
|
||||
}
|
||||
|
||||
cols.push(
|
||||
{
|
||||
id: "req_path",
|
||||
header: "Path",
|
||||
cell: ({ row }) => row.original.req_path || "-",
|
||||
},
|
||||
{
|
||||
id: "resp_status_code",
|
||||
header: "Status",
|
||||
cell: ({ row }) => String(row.original.resp_status_code || "-"),
|
||||
},
|
||||
{
|
||||
id: "resp_content_type",
|
||||
header: "Content-Type",
|
||||
cell: ({ row }) => row.original.resp_content_type || "-",
|
||||
},
|
||||
{
|
||||
id: "cached",
|
||||
header: () => renderComponent(HeaderLabel, { label: "Cached", tooltip: "prompt tokens from cache" }),
|
||||
cell: ({ row }) =>
|
||||
row.original.tokens.cache_tokens > 0
|
||||
? row.original.tokens.cache_tokens.toLocaleString()
|
||||
: "-",
|
||||
},
|
||||
{
|
||||
id: "prompt",
|
||||
header: () => renderComponent(HeaderLabel, { label: "Prompt", tooltip: "new prompt tokens processed" }),
|
||||
cell: ({ row }) => row.original.tokens.input_tokens.toLocaleString(),
|
||||
},
|
||||
{
|
||||
id: "generated",
|
||||
header: "Generated",
|
||||
cell: ({ row }) => row.original.tokens.output_tokens.toLocaleString(),
|
||||
},
|
||||
{
|
||||
id: "drafted",
|
||||
header: () => renderComponent(HeaderLabel, { label: "Drafted", tooltip: "acceptance rate (accepted/drafted)" }),
|
||||
cell: ({ row }) =>
|
||||
formatDrafted(row.original.tokens.draft_tokens, row.original.tokens.draft_acc_tokens),
|
||||
},
|
||||
{
|
||||
id: "prompt_speed",
|
||||
header: "Prompt Speed",
|
||||
cell: ({ row }) => formatSpeed(row.original.tokens.prompt_per_second),
|
||||
},
|
||||
{
|
||||
id: "gen_speed",
|
||||
header: "Gen Speed",
|
||||
cell: ({ row }) => formatSpeed(row.original.tokens.tokens_per_second),
|
||||
},
|
||||
{
|
||||
id: "duration",
|
||||
header: "Duration",
|
||||
cell: ({ row }) => formatDuration(row.original.duration_ms),
|
||||
},
|
||||
{
|
||||
id: "capture",
|
||||
header: "Capture",
|
||||
cell: ({ row }) =>
|
||||
renderComponent(ViewCaptureButton, {
|
||||
hasCapture: row.original.has_capture,
|
||||
loading: loadingCaptureId === row.original.id,
|
||||
onclick: () => viewCapture(row.original.id),
|
||||
}),
|
||||
},
|
||||
{
|
||||
id: "meta",
|
||||
header: "Meta",
|
||||
cell: ({ row }) =>
|
||||
renderComponent(MetaCell, { metadata: row.original.metadata }),
|
||||
}
|
||||
);
|
||||
return cols;
|
||||
}
|
||||
|
||||
let columns = $derived(buildColumns(showModelColumn));
|
||||
|
||||
const table = createSvelteTable({
|
||||
get data() {
|
||||
return metrics;
|
||||
},
|
||||
get columns() {
|
||||
return columns;
|
||||
},
|
||||
state: {
|
||||
get pagination() {
|
||||
return pagination;
|
||||
},
|
||||
get columnVisibility() {
|
||||
return columnVisibility;
|
||||
},
|
||||
},
|
||||
onPaginationChange: (updater) => {
|
||||
const prev = pagination;
|
||||
const next =
|
||||
typeof updater === "function" ? updater(prev) : updater;
|
||||
// Reassign so the table's $effect.pre (which reads state.pagination)
|
||||
// picks up the new value. Reset to first page when the page size
|
||||
// changes so we don't land on an empty page.
|
||||
pagination =
|
||||
next.pageSize !== prev.pageSize
|
||||
? { pageIndex: 0, pageSize: next.pageSize }
|
||||
: next;
|
||||
if (showPagination) storedPageSize.set(pagination.pageSize);
|
||||
},
|
||||
onColumnVisibilityChange: (updater) => {
|
||||
columnVisibility =
|
||||
typeof updater === "function" ? updater(columnVisibility) : updater;
|
||||
storedVisibility.set(columnVisibility);
|
||||
},
|
||||
getCoreRowModel: getCoreRowModel(),
|
||||
getPaginationRowModel: getPaginationRowModel(),
|
||||
});
|
||||
|
||||
let thClass = $derived(compact ? "px-4 py-2 h-9" : "px-6 py-3 h-12");
|
||||
let tdClass = $derived(compact ? "px-4 py-2" : "px-6 py-4");
|
||||
</script>
|
||||
|
||||
<Card.Root class="shrink-0 gap-0 overflow-hidden py-0 {cardClass}">
|
||||
<Card.Header class="flex items-center justify-between border-b px-4 py-2">
|
||||
<div class="flex items-center gap-2">
|
||||
{#if title}
|
||||
<Card.Title class="text-sm font-semibold">
|
||||
{title}
|
||||
<span class="text-muted-foreground text-xs font-normal">({metrics.length})</span>
|
||||
</Card.Title>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
{#if showPagination}
|
||||
<span class="text-muted-foreground text-xs">Rows</span>
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={String(pagination.pageSize)}
|
||||
onValueChange={(v) => table.setPageSize(Number(v))}
|
||||
>
|
||||
<Select.Trigger size="sm" class="h-7 w-[4.5rem] text-xs">
|
||||
{pagination.pageSize}
|
||||
</Select.Trigger>
|
||||
<Select.Content>
|
||||
{#each [5, 10, 25, 50] as size (size)}
|
||||
<Select.Item value={String(size)}>{size}</Select.Item>
|
||||
{/each}
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
{/if}
|
||||
<DropdownMenu.Root>
|
||||
<DropdownMenu.Trigger
|
||||
class="hover:bg-muted inline-flex size-7 items-center justify-center rounded-[min(var(--radius-md),12px)]"
|
||||
title="Select columns"
|
||||
>
|
||||
<Columns3 class="size-4" />
|
||||
</DropdownMenu.Trigger>
|
||||
<DropdownMenu.Content align="end" class="min-w-[16rem] p-0">
|
||||
<DropdownMenu.Label class="text-muted-foreground border-b px-3 py-2 text-xs font-medium uppercase tracking-wider">
|
||||
Columns
|
||||
</DropdownMenu.Label>
|
||||
{#each table.getAllColumns() as column (column.id)}
|
||||
{#if column.getCanHide()}
|
||||
<DropdownMenu.CheckboxItem
|
||||
checked={column.getIsVisible()}
|
||||
onCheckedChange={(v) => column.toggleVisibility(!!v)}
|
||||
closeOnSelect={false}
|
||||
>
|
||||
{columnLabelMap[column.id] ?? column.id}
|
||||
</DropdownMenu.CheckboxItem>
|
||||
{/if}
|
||||
{/each}
|
||||
</DropdownMenu.Content>
|
||||
</DropdownMenu.Root>
|
||||
</div>
|
||||
</Card.Header>
|
||||
<Card.Content class="overflow-x-auto p-0">
|
||||
<Table.Root class="min-w-full">
|
||||
<Table.Header>
|
||||
{#each table.getHeaderGroups() as headerGroup (headerGroup.id)}
|
||||
<Table.Row>
|
||||
{#each headerGroup.headers as header (header.id)}
|
||||
<Table.Head class={thClass} colspan={header.colSpan}>
|
||||
{#if !header.isPlaceholder}
|
||||
<FlexRender content={header.column.columnDef.header} context={header.getContext()} />
|
||||
{/if}
|
||||
</Table.Head>
|
||||
{/each}
|
||||
</Table.Row>
|
||||
{/each}
|
||||
</Table.Header>
|
||||
<Table.Body>
|
||||
{#each table.getRowModel().rows as row (row.id)}
|
||||
<Table.Row>
|
||||
{#each row.getVisibleCells() as cell (cell.id)}
|
||||
<Table.Cell class={tdClass}>
|
||||
<FlexRender content={cell.column.columnDef.cell} context={cell.getContext()} />
|
||||
</Table.Cell>
|
||||
{/each}
|
||||
</Table.Row>
|
||||
{:else}
|
||||
<Table.Row>
|
||||
<Table.Cell colspan={columns.length} class="text-muted-foreground py-6 text-center text-sm">
|
||||
{emptyMessage}
|
||||
</Table.Cell>
|
||||
</Table.Row>
|
||||
{/each}
|
||||
</Table.Body>
|
||||
</Table.Root>
|
||||
|
||||
{#if showPagination && metrics.length > 0}
|
||||
<div class="flex items-center justify-between gap-2 border-t px-4 py-2 text-sm">
|
||||
<span class="text-muted-foreground text-xs">
|
||||
Page {pagination.pageIndex + 1} of {table.getPageCount()} · {metrics.length} total
|
||||
</span>
|
||||
<div class="flex items-center gap-1">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
onclick={() => table.setPageIndex(0)}
|
||||
disabled={!table.getCanPreviousPage()}
|
||||
title="First page"
|
||||
>
|
||||
<ChevronsLeft />
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
onclick={() => table.previousPage()}
|
||||
disabled={!table.getCanPreviousPage()}
|
||||
title="Previous page"
|
||||
>
|
||||
<ChevronLeft />
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
onclick={() => table.nextPage()}
|
||||
disabled={!table.getCanNextPage()}
|
||||
title="Next page"
|
||||
>
|
||||
<ChevronRight />
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
onclick={() => table.setPageIndex(table.getPageCount() - 1)}
|
||||
disabled={!table.getCanNextPage()}
|
||||
title="Last page"
|
||||
>
|
||||
<ChevronsRight />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</Card.Content>
|
||||
</Card.Root>
|
||||
|
||||
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
|
||||
@@ -0,0 +1,266 @@
|
||||
<script lang="ts">
|
||||
import { link } from "svelte-spa-router";
|
||||
import { House, Boxes, Activity, ScrollText, Gauge, Sun, Moon, Monitor, ChevronRight, Settings } from "@lucide/svelte";
|
||||
import * as Sidebar from "$lib/components/ui/sidebar/index.js";
|
||||
import * as Collapsible from "$lib/components/ui/collapsible/index.js";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { toggleTheme, themeMode, appTitle } from "../stores/theme";
|
||||
import { currentRoute } from "../stores/route";
|
||||
import { playgroundActivity } from "../stores/playgroundActivity";
|
||||
import { performanceEnabled, models } from "../stores/api";
|
||||
import { selectedPlaygroundTab, playgroundTabs, playgroundMenuOpen } from "../stores/playground";
|
||||
import { modelsMenuOpen } from "../stores/sidebar";
|
||||
import type { Model } from "../lib/types";
|
||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||
|
||||
function handleTitleChange(newTitle: string): void {
|
||||
const sanitized = newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap";
|
||||
appTitle.set(sanitized);
|
||||
}
|
||||
|
||||
function handleKeyDown(e: KeyboardEvent): void {
|
||||
if (e.key === "Enter") {
|
||||
e.preventDefault();
|
||||
const target = e.currentTarget as HTMLElement;
|
||||
handleTitleChange(target.textContent || "(set title)");
|
||||
target.blur();
|
||||
}
|
||||
}
|
||||
|
||||
function handleBlur(e: FocusEvent): void {
|
||||
const target = e.currentTarget as HTMLElement;
|
||||
handleTitleChange(target.textContent || "(set title)");
|
||||
}
|
||||
|
||||
function isActive(path: string, current: string): boolean {
|
||||
return path === "/" ? current === "/" : current.startsWith(path);
|
||||
}
|
||||
|
||||
type DotColor = "grey" | "yellow" | "green";
|
||||
function statusDotColor(model: Model): DotColor {
|
||||
if (model.state === "ready") return "green";
|
||||
if (model.state === "starting" || model.state === "stopping") return "yellow";
|
||||
return "grey";
|
||||
}
|
||||
|
||||
const dotClass: Record<DotColor, string> = {
|
||||
grey: "bg-muted-foreground/40",
|
||||
yellow: "bg-warning",
|
||||
green: "bg-success",
|
||||
};
|
||||
</script>
|
||||
|
||||
<Sidebar.Root collapsible="icon">
|
||||
<Sidebar.Header>
|
||||
<div class="flex items-center gap-2 px-2 py-1.5">
|
||||
<div class="flex shrink-0 items-center justify-center">
|
||||
<ConnectionStatus />
|
||||
</div>
|
||||
<h1
|
||||
contenteditable="true"
|
||||
class="truncate pb-0 text-base font-semibold outline-none rounded-md px-1 hover:bg-sidebar-accent group-data-[collapsible=icon]:hidden"
|
||||
onblur={handleBlur}
|
||||
onkeydown={handleKeyDown}
|
||||
>
|
||||
{$appTitle}
|
||||
</h1>
|
||||
</div>
|
||||
</Sidebar.Header>
|
||||
|
||||
<Sidebar.Content>
|
||||
<Sidebar.Group>
|
||||
<Sidebar.GroupContent>
|
||||
<Sidebar.Menu class="gap-1">
|
||||
<Sidebar.MenuItem>
|
||||
<Sidebar.MenuButton isActive={isActive("/activity", $currentRoute)} tooltipContent="Activity">
|
||||
{#snippet child({ props })}
|
||||
<a href="/activity" use:link {...props}>
|
||||
<Activity />
|
||||
<span>Activity</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
</Sidebar.MenuItem>
|
||||
|
||||
<Sidebar.MenuItem>
|
||||
<Collapsible.Root
|
||||
open={$playgroundMenuOpen}
|
||||
onOpenChange={(v) => playgroundMenuOpen.set(v)}
|
||||
class="gap-0"
|
||||
>
|
||||
<Sidebar.MenuButton
|
||||
isActive={isActive("/", $currentRoute)}
|
||||
tooltipContent="Playground"
|
||||
>
|
||||
{#snippet child({ props })}
|
||||
<a href="/" use:link {...props}>
|
||||
<House />
|
||||
<span class={$playgroundActivity ? "activity-link" : ""}>Playground</span>
|
||||
<span
|
||||
class="ml-auto transition-transform duration-200 {$playgroundMenuOpen ? 'rotate-90' : ''}"
|
||||
role="button"
|
||||
tabindex="0"
|
||||
aria-label="Toggle playground section"
|
||||
onclick={(e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
playgroundMenuOpen.update((v) => !v);
|
||||
}}
|
||||
onkeydown={(e) => {
|
||||
if (e.key === 'Enter' || e.key === ' ') {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
playgroundMenuOpen.update((v) => !v);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<ChevronRight />
|
||||
</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
<Collapsible.Content>
|
||||
<Sidebar.MenuSub>
|
||||
{#each playgroundTabs as tab (tab.id)}
|
||||
<Sidebar.MenuSubItem>
|
||||
<Sidebar.MenuSubButton
|
||||
isActive={isActive("/", $currentRoute) && $selectedPlaygroundTab === tab.id}
|
||||
>
|
||||
{#snippet child({ props })}
|
||||
<a
|
||||
href="/"
|
||||
use:link
|
||||
{...props}
|
||||
onclick={() => selectedPlaygroundTab.set(tab.id)}
|
||||
>
|
||||
<span>{tab.label}</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuSubButton>
|
||||
</Sidebar.MenuSubItem>
|
||||
{/each}
|
||||
</Sidebar.MenuSub>
|
||||
</Collapsible.Content>
|
||||
</Collapsible.Root>
|
||||
</Sidebar.MenuItem>
|
||||
|
||||
<Sidebar.MenuItem>
|
||||
<Collapsible.Root
|
||||
open={$modelsMenuOpen}
|
||||
onOpenChange={(v) => modelsMenuOpen.set(v)}
|
||||
class="gap-0"
|
||||
>
|
||||
<Sidebar.MenuButton
|
||||
isActive={$currentRoute.startsWith("/models")}
|
||||
tooltipContent="Models"
|
||||
>
|
||||
{#snippet child({ props })}
|
||||
<a href="/models" use:link {...props}>
|
||||
<Boxes />
|
||||
<span>Models</span>
|
||||
<span
|
||||
class="ml-auto transition-transform duration-200 {$modelsMenuOpen ? 'rotate-90' : ''}"
|
||||
role="button"
|
||||
tabindex="0"
|
||||
aria-label="Toggle models section"
|
||||
onclick={(e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
modelsMenuOpen.update((v) => !v);
|
||||
}}
|
||||
onkeydown={(e) => {
|
||||
if (e.key === 'Enter' || e.key === ' ') {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
modelsMenuOpen.update((v) => !v);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<ChevronRight />
|
||||
</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
<Collapsible.Content>
|
||||
<Sidebar.MenuSub>
|
||||
{#each $models as model (model.id)}
|
||||
<Sidebar.MenuSubItem>
|
||||
<Sidebar.MenuSubButton
|
||||
isActive={$currentRoute === `/models/${encodeURIComponent(model.id)}`}
|
||||
>
|
||||
{#snippet child({ props })}
|
||||
<a href="/models/{encodeURIComponent(model.id)}" use:link {...props}>
|
||||
<span class={`size-2 shrink-0 rounded-full ${dotClass[statusDotColor(model)]}`}></span>
|
||||
<span class="flex-1 truncate">{model.id}</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuSubButton>
|
||||
</Sidebar.MenuSubItem>
|
||||
{/each}
|
||||
</Sidebar.MenuSub>
|
||||
</Collapsible.Content>
|
||||
</Collapsible.Root>
|
||||
</Sidebar.MenuItem>
|
||||
|
||||
<Sidebar.MenuItem>
|
||||
<Sidebar.MenuButton isActive={isActive("/logs", $currentRoute)} tooltipContent="Logs">
|
||||
{#snippet child({ props })}
|
||||
<a href="/logs" use:link {...props}>
|
||||
<ScrollText />
|
||||
<span>Logs</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
</Sidebar.MenuItem>
|
||||
|
||||
{#if $performanceEnabled}
|
||||
<Sidebar.MenuItem>
|
||||
<Sidebar.MenuButton isActive={isActive("/performance", $currentRoute)} tooltipContent="Performance">
|
||||
{#snippet child({ props })}
|
||||
<a href="/performance" use:link {...props}>
|
||||
<Gauge />
|
||||
<span>Performance</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
</Sidebar.MenuItem>
|
||||
{/if}
|
||||
</Sidebar.Menu>
|
||||
</Sidebar.GroupContent>
|
||||
</Sidebar.Group>
|
||||
</Sidebar.Content>
|
||||
|
||||
<Sidebar.Footer>
|
||||
<div
|
||||
class="flex items-center justify-between gap-2 px-1 group-data-[collapsible=icon]:flex-col-reverse"
|
||||
>
|
||||
<Sidebar.MenuButton
|
||||
isActive={isActive("/settings", $currentRoute)}
|
||||
tooltipContent="Settings"
|
||||
>
|
||||
{#snippet child({ props })}
|
||||
<a href="/settings" use:link {...props}>
|
||||
<Settings />
|
||||
<span>Settings</span>
|
||||
</a>
|
||||
{/snippet}
|
||||
</Sidebar.MenuButton>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
onclick={toggleTheme}
|
||||
title="Toggle theme (current: {$themeMode})"
|
||||
>
|
||||
{#if $themeMode === "system"}
|
||||
<Monitor />
|
||||
{:else if $themeMode === "light"}
|
||||
<Sun />
|
||||
{:else}
|
||||
<Moon />
|
||||
{/if}
|
||||
<span class="sr-only">Toggle theme</span>
|
||||
</Button>
|
||||
</div>
|
||||
</Sidebar.Footer>
|
||||
<Sidebar.Rail />
|
||||
</Sidebar.Root>
|
||||
@@ -1,5 +1,7 @@
|
||||
<script lang="ts">
|
||||
import type { ReqRespCapture } from "../lib/types";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import * as Dialog from "$lib/components/ui/dialog/index.js";
|
||||
|
||||
interface Props {
|
||||
capture: ReqRespCapture | null;
|
||||
@@ -9,22 +11,12 @@
|
||||
|
||||
let { capture, open, onclose }: Props = $props();
|
||||
|
||||
let dialogEl: HTMLDialogElement | undefined = $state();
|
||||
|
||||
type BodyTab = "raw" | "pretty" | "chat";
|
||||
let reqBodyTab: BodyTab = $state("pretty");
|
||||
let respBodyTab: BodyTab = $state("pretty");
|
||||
let copiedReq = $state(false);
|
||||
let copiedResp = $state(false);
|
||||
|
||||
$effect(() => {
|
||||
if (open && dialogEl) {
|
||||
dialogEl.showModal();
|
||||
} else if (!open && dialogEl) {
|
||||
dialogEl.close();
|
||||
}
|
||||
});
|
||||
|
||||
// Reset tabs when capture changes
|
||||
$effect(() => {
|
||||
if (capture) {
|
||||
@@ -39,10 +31,6 @@
|
||||
}
|
||||
});
|
||||
|
||||
function handleDialogClose() {
|
||||
onclose();
|
||||
}
|
||||
|
||||
function decodeBody(body: string | null | undefined): string {
|
||||
if (!body) return "";
|
||||
try {
|
||||
@@ -190,40 +178,36 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<dialog
|
||||
bind:this={dialogEl}
|
||||
onclose={handleDialogClose}
|
||||
class="bg-surface text-txtmain rounded-lg shadow-xl max-w-4xl w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
|
||||
<Dialog.Root
|
||||
{open}
|
||||
onOpenChange={(v) => {
|
||||
if (!v) onclose();
|
||||
}}
|
||||
>
|
||||
{#if capture}
|
||||
<div class="flex flex-col max-h-[90vh]">
|
||||
<div
|
||||
class="flex justify-between items-center p-4 border-b border-card-border"
|
||||
>
|
||||
<h2 class="text-xl font-bold pb-0">Capture #{capture.id + 1}{#if capture.req_path} <span class="text-base font-mono font-normal text-txtsecondary">{capture.req_path}</span>{/if}</h2>
|
||||
<button
|
||||
onclick={() => dialogEl?.close()}
|
||||
class="text-txtsecondary hover:text-txtmain text-2xl leading-none"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</div>
|
||||
<Dialog.Content class="flex max-h-[90vh] w-[90%] sm:max-w-[90%] flex-col gap-0 p-0">
|
||||
{#if capture}
|
||||
<Dialog.Header class="border-b border-border px-4 py-3">
|
||||
<Dialog.Title class="text-lg font-bold">
|
||||
Capture #{capture.id + 1}{#if capture.req_path}
|
||||
<span class="font-mono text-base font-normal text-muted-foreground">{capture.req_path}</span>{/if}
|
||||
</Dialog.Title>
|
||||
</Dialog.Header>
|
||||
|
||||
<div class="overflow-y-auto flex-1 p-4 space-y-4">
|
||||
<div class="min-h-0 flex-1 overflow-y-auto space-y-4 p-4">
|
||||
<!-- Request Headers -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
Request Headers
|
||||
</summary>
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-48"
|
||||
>
|
||||
<table class="w-full text-sm">
|
||||
<tbody>
|
||||
{#each Object.entries(capture.req_headers || {}) as [key, value]}
|
||||
<tr class="border-b border-card-border-inner last:border-0">
|
||||
<tr class="border-b border-border last:border-0">
|
||||
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||
>{key}</td
|
||||
>
|
||||
@@ -238,7 +222,7 @@
|
||||
<!-- Request Body -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
Request Body
|
||||
</summary>
|
||||
@@ -271,14 +255,14 @@
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-1 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre
|
||||
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedRequestBody}</pre>
|
||||
</div>
|
||||
{:else}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre class="p-3 text-sm font-mono whitespace-pre-wrap break-all"
|
||||
>(empty)</pre
|
||||
@@ -290,17 +274,17 @@
|
||||
<!-- Response Headers -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
Response Headers
|
||||
</summary>
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-48"
|
||||
>
|
||||
<table class="w-full text-sm">
|
||||
<tbody>
|
||||
{#each Object.entries(capture.resp_headers || {}) as [key, value]}
|
||||
<tr class="border-b border-card-border-inner last:border-0">
|
||||
<tr class="border-b border-border last:border-0">
|
||||
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||
>{key}</td
|
||||
>
|
||||
@@ -315,13 +299,13 @@
|
||||
<!-- Response Body -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
Response Body
|
||||
</summary>
|
||||
{#if isResponseImage && capture.resp_body}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
<div class="p-3 flex justify-center">
|
||||
<img
|
||||
@@ -368,26 +352,26 @@
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-1 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
{#if respBodyTab === "chat"}
|
||||
<div class="p-3 text-sm space-y-3">
|
||||
{#if sseChat.reasoning}
|
||||
<div>
|
||||
<div
|
||||
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||
class="text-xs font-semibold uppercase tracking-wider text-muted-foreground mb-1"
|
||||
>
|
||||
Reasoning
|
||||
</div>
|
||||
<pre
|
||||
class="font-mono whitespace-pre-wrap break-all text-txtsecondary">{sseChat.reasoning}</pre>
|
||||
class="font-mono whitespace-pre-wrap break-all text-muted-foreground">{sseChat.reasoning}</pre>
|
||||
</div>
|
||||
{/if}
|
||||
{#if sseChat.content}
|
||||
<div>
|
||||
{#if sseChat.reasoning}
|
||||
<div
|
||||
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||
class="text-xs font-semibold uppercase tracking-wider text-muted-foreground mb-1"
|
||||
>
|
||||
Response
|
||||
</div>
|
||||
@@ -407,15 +391,15 @@
|
||||
</div>
|
||||
{:else if responseBodyRaw}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
<div class="p-3 text-sm text-txtsecondary italic">
|
||||
<div class="p-3 text-sm text-muted-foreground italic">
|
||||
(binary data - {responseContentType || "unknown content type"})
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
class="mt-2 bg-background rounded-md border border-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre class="p-3 text-sm font-mono">(empty)</pre>
|
||||
</div>
|
||||
@@ -423,39 +407,39 @@
|
||||
</details>
|
||||
</div>
|
||||
|
||||
<div class="p-4 border-t border-card-border flex justify-end">
|
||||
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
|
||||
<Dialog.Footer class="border-t border-border px-4 py-3 sm:justify-end">
|
||||
<Button variant="outline" onclick={onclose}>Close</Button>
|
||||
</Dialog.Footer>
|
||||
{:else}
|
||||
<div class="flex flex-col items-center justify-center p-12">
|
||||
<p class="text-lg text-muted-foreground">Capture not found</p>
|
||||
<p class="text-sm text-muted-foreground mt-1">The capture may have expired or was never recorded.</p>
|
||||
<div class="mt-4">
|
||||
<Button variant="outline" onclick={onclose}>Close</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="flex flex-col items-center justify-center p-12">
|
||||
<p class="text-lg text-txtsecondary">Capture not found</p>
|
||||
<p class="text-sm text-txtsecondary mt-1">The capture may have expired or was never recorded.</p>
|
||||
<div class="mt-4">
|
||||
<button onclick={() => dialogEl?.close()} class="btn">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</dialog>
|
||||
{/if}
|
||||
</Dialog.Content>
|
||||
</Dialog.Root>
|
||||
|
||||
<style>
|
||||
.tab-btn {
|
||||
padding: 2px 10px;
|
||||
font-size: 0.75rem;
|
||||
border-radius: 4px;
|
||||
color: var(--color-txtsecondary);
|
||||
border-radius: 0;
|
||||
color: var(--muted-foreground);
|
||||
cursor: pointer;
|
||||
border: 1px solid transparent;
|
||||
background: transparent;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.tab-btn:hover {
|
||||
color: var(--color-txtmain);
|
||||
background: var(--color-secondary);
|
||||
color: var(--foreground);
|
||||
background: var(--accent);
|
||||
}
|
||||
.tab-btn-active {
|
||||
color: var(--color-primary);
|
||||
background: color-mix(in srgb, var(--color-primary) 12%, transparent);
|
||||
border-color: color-mix(in srgb, var(--color-primary) 25%, transparent);
|
||||
color: var(--primary);
|
||||
background: color-mix(in srgb, var(--primary) 12%, transparent);
|
||||
border-color: color-mix(in srgb, var(--primary) 25%, transparent);
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { connectionState } from "../stores/theme";
|
||||
import { versionInfo } from "../stores/api";
|
||||
|
||||
let eventStatusColor = $derived.by(() => {
|
||||
switch ($connectionState) {
|
||||
@@ -14,9 +13,7 @@
|
||||
}
|
||||
});
|
||||
|
||||
let tooltipText = $derived(
|
||||
`Event Stream: ${$connectionState ?? "unknown"}\nAPI Version: ${$versionInfo?.version ?? "unknown"}\nCommit Hash: ${$versionInfo?.commit?.substring(0, 7) ?? "unknown"}\nBuild Date: ${$versionInfo?.build_date ?? "unknown"}`
|
||||
);
|
||||
let tooltipText = $derived(`Event Stream: ${$connectionState ?? "unknown"}`);
|
||||
</script>
|
||||
|
||||
<div class="flex items-center" title={tooltipText}>
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
<script lang="ts">
|
||||
import { link } from "svelte-spa-router";
|
||||
import { screenWidth, toggleTheme, themeMode, appTitle, isNarrow } from "../stores/theme";
|
||||
import { currentRoute } from "../stores/route";
|
||||
import { playgroundActivity } from "../stores/playgroundActivity";
|
||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||
|
||||
function handleTitleChange(newTitle: string): void {
|
||||
const sanitized = newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap";
|
||||
appTitle.set(sanitized);
|
||||
}
|
||||
|
||||
function handleKeyDown(e: KeyboardEvent): void {
|
||||
if (e.key === "Enter") {
|
||||
e.preventDefault();
|
||||
const target = e.currentTarget as HTMLElement;
|
||||
handleTitleChange(target.textContent || "(set title)");
|
||||
target.blur();
|
||||
}
|
||||
}
|
||||
|
||||
function handleBlur(e: FocusEvent): void {
|
||||
const target = e.currentTarget as HTMLElement;
|
||||
handleTitleChange(target.textContent || "(set title)");
|
||||
}
|
||||
|
||||
function isActive(path: string, current: string): boolean {
|
||||
return path === "/" ? current === "/" : current.startsWith(path);
|
||||
}
|
||||
|
||||
</script>
|
||||
|
||||
<header
|
||||
class="flex items-center justify-between bg-surface border-b border-border px-4 {$isNarrow
|
||||
? 'py-1 h-[60px]'
|
||||
: 'p-2 h-[75px]'}"
|
||||
>
|
||||
{#if $screenWidth !== "xs" && $screenWidth !== "sm"}
|
||||
<h1
|
||||
contenteditable="true"
|
||||
class="p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded"
|
||||
onblur={handleBlur}
|
||||
onkeydown={handleKeyDown}
|
||||
>
|
||||
{$appTitle}
|
||||
</h1>
|
||||
{/if}
|
||||
|
||||
<menu class="flex items-center gap-4 overflow-x-auto">
|
||||
<a
|
||||
href="/"
|
||||
use:link
|
||||
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold underline underline-offset-4' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
||||
>
|
||||
Playground
|
||||
</a>
|
||||
<a
|
||||
href="/models"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/models", $currentRoute)}
|
||||
class:underline={isActive("/models", $currentRoute)}
|
||||
class:underline-offset-4={isActive("/models", $currentRoute)}
|
||||
>
|
||||
Models
|
||||
</a>
|
||||
<a
|
||||
href="/activity"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/activity", $currentRoute)}
|
||||
class:underline={isActive("/activity", $currentRoute)}
|
||||
class:underline-offset-4={isActive("/activity", $currentRoute)}
|
||||
>
|
||||
Activity
|
||||
</a>
|
||||
<a
|
||||
href="/logs"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/logs", $currentRoute)}
|
||||
class:underline={isActive("/logs", $currentRoute)}
|
||||
class:underline-offset-4={isActive("/logs", $currentRoute)}
|
||||
>
|
||||
Logs
|
||||
</a>
|
||||
<a
|
||||
href="/performance"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/performance", $currentRoute)}
|
||||
class:underline={isActive("/performance", $currentRoute)}
|
||||
class:underline-offset-4={isActive("/performance", $currentRoute)}
|
||||
>
|
||||
Performance
|
||||
</a>
|
||||
<button onclick={toggleTheme} title="Toggle theme (current: {$themeMode})">
|
||||
{#if $themeMode === "system"}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path d="M0,9c0-.552,.448-1,1-1H3.108c.147-.874,.472-1.721,1.006-2.471l-1.478-1.478c-.391-.391-.391-1.023,0-1.414s1.023-.391,1.414,0l1.478,1.478c.751-.534,1.598-.859,2.471-1.006V1c0-.552,.448-1,1-1s1,.448,1,1V3.108c.874,.147,1.725,.466,2.477,1.001l1.473-1.473c.391-.391,1.023-.391,1.414,0s.391,1.023,0,1.414L3.963,15.45c-.195,.195-.451,.293-.707,.293s-.512-.098-.707-.293c-.391-.391-.391-1.023,0-1.414l1.56-1.56c-.535-.751-.854-1.602-1.001-2.477H1c-.552,0-1-.448-1-1ZM23.707,.293c-.391-.391-1.023-.391-1.414,0L.293,22.293c-.391,.391-.391,1.023,0,1.414,.195,.195,.451,.293,.707,.293s.512-.098,.707-.293L23.707,1.707c.391-.391,.391-1.023,0-1.414Zm-.283,10.954c.32-.15,.538-.458,.572-.81,.034-.353-.121-.696-.407-.904-.858-.625-1.833-1.066-2.897-1.315-.335-.078-.69,.022-.934,.267l-8.392,8.391c-.244,.244-.345,.597-.267,.933,.843,3.646,4.047,6.191,7.792,6.191,1.695,0,3.32-.53,4.697-1.533,.286-.208,.441-.553,.407-.904-.034-.353-.251-.66-.572-.811-1.842-.861-3.033-2.727-3.033-4.752s1.19-3.891,3.033-4.753Z"/>
|
||||
</svg>
|
||||
{:else if $themeMode === "light"}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path
|
||||
fill-rule="evenodd"
|
||||
d="M12 2.25a.75.75 0 0 1 .75.75v2.25a.75.75 0 0 1-1.5 0V3a.75.75 0 0 1 .75-.75ZM7.5 12a4.5 4.5 0 1 1 9 0 4.5 4.5 0 0 1-9 0ZM18.894 6.166a.75.75 0 0 0-1.06-1.06l-1.591 1.59a.75.75 0 1 0 1.06 1.061l1.591-1.59ZM21.75 12a.75.75 0 0 1-.75.75h-2.25a.75.75 0 0 1 0-1.5H21a.75.75 0 0 1 .75.75ZM17.834 18.894a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 1 0-1.061 1.06l1.591 1.591ZM12 18a.75.75 0 0 1 .75.75V21a.75.75 0 0 1-1.5 0v-2.25A.75.75 0 0 1 12 18ZM7.758 17.303a.75.75 0 0 0-1.061-1.06l-1.591 1.59a.75.75 0 0 0 1.06 1.061l1.591-1.59ZM6 12a.75.75 0 0 1-.75.75H3a.75.75 0 0 1 0-1.5h2.25A.75.75 0 0 1 6 12ZM6.697 7.757a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 0 0-1.061 1.06l1.59 1.591Z"
|
||||
/>
|
||||
</svg>
|
||||
{:else}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path
|
||||
fill-rule="evenodd"
|
||||
d="M9.528 1.718a.75.75 0 0 1 .162.819A8.97 8.97 0 0 0 9 6a9 9 0 0 0 9 9 8.97 8.97 0 0 0 3.463-.69.75.75 0 0 1 .981.98 10.503 10.503 0 0 1-9.694 6.46c-5.799 0-10.5-4.7-10.5-10.5 0-4.368 2.667-8.112 6.46-9.694a.75.75 0 0 1 .818.162Z"
|
||||
clip-rule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
{/if}
|
||||
</button>
|
||||
<ConnectionStatus />
|
||||
</menu>
|
||||
</header>
|
||||
|
||||
<style>
|
||||
.activity-link {
|
||||
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
|
||||
background-size: 200% 100%;
|
||||
-webkit-background-clip: text;
|
||||
background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
animation: gradient-shift 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes gradient-shift {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
}
|
||||
100% {
|
||||
background-position: 200% 50%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
@@ -1,5 +1,9 @@
|
||||
<script lang="ts">
|
||||
import { persistentStore } from "../stores/persistent";
|
||||
import { Type, WrapText, Search, SearchX, CircleX } from "@lucide/svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Input } from "$lib/components/ui/input/index.js";
|
||||
import * as Card from "$lib/components/ui/card/index.js";
|
||||
|
||||
interface Props {
|
||||
id: string;
|
||||
@@ -81,59 +85,32 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="rounded-lg overflow-hidden flex flex-col bg-gray-950/5 dark:bg-white/10 h-full w-full p-1">
|
||||
<div class="p-4">
|
||||
<div class="flex items-center justify-between">
|
||||
<h3 class="m-0 text-lg p-0">{title}</h3>
|
||||
|
||||
<div class="flex gap-2 items-center">
|
||||
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||
<path d="M2 4v3h5v12h3V7h5V4H2zm19 5h-9v3h3v7h3v-7h3V9z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
||||
{#if $wrapTextStore}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{:else}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h10.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{/if}
|
||||
</button>
|
||||
<button class="btn border-0" onclick={toggleFilter} title="Toggle filter">
|
||||
{#if $showFilterStore}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||
<path fill-rule="evenodd" d="M10.5 3.75a6.75 6.75 0 1 0 0 13.5 6.75 6.75 0 0 0 0-13.5ZM2.25 10.5a8.25 8.25 0 1 1 14.59 5.28l4.69 4.69a.75.75 0 1 1-1.06 1.06l-4.69-4.69A8.25 8.25 0 0 1 2.25 10.5Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{:else}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" d="m21 21-5.197-5.197m0 0A7.5 7.5 0 1 0 5.196 5.196a7.5 7.5 0 0 0 10.607 10.607Z" />
|
||||
</svg>
|
||||
{/if}
|
||||
</button>
|
||||
<Card.Root class="bg-muted/30 h-full w-full gap-0 overflow-hidden py-0">
|
||||
<Card.Header class="border-b px-4 py-2">
|
||||
<Card.Title class="text-sm font-semibold">{title}</Card.Title>
|
||||
<Card.Action>
|
||||
<div class="flex items-center gap-1">
|
||||
<Button variant="ghost" size="icon-sm" onclick={toggleFontSize} title="Change font size">
|
||||
<Type />
|
||||
</Button>
|
||||
<Button variant="ghost" size="icon-sm" onclick={toggleWrapText} title="Toggle text wrap">
|
||||
<WrapText class={$wrapTextStore ? "text-primary" : ""} />
|
||||
</Button>
|
||||
<Button variant="ghost" size="icon-sm" onclick={toggleFilter} title="Toggle filter">
|
||||
{#if $showFilterStore}<SearchX />{:else}<Search />{/if}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</Card.Action>
|
||||
{#if $showFilterStore}
|
||||
<div class="mt-2 flex gap-2 items-center w-full">
|
||||
<input
|
||||
type="text"
|
||||
class="w-full text-sm border border-gray-950/10 dark:border-white/5 p-2 rounded outline-none"
|
||||
placeholder="Filter logs (regex)..."
|
||||
bind:value={filterRegex}
|
||||
/>
|
||||
<button class="pl-2" onclick={() => (filterRegex = "")} aria-label="Clear filter">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm-1.72 6.97a.75.75 0 1 0-1.06 1.06L10.94 12l-1.72 1.72a.75.75 0 1 0 1.06 1.06L12 13.06l1.72 1.72a.75.75 0 1 0 1.06-1.06L13.06 12l1.72-1.72a.75.75 0 1 0-1.06-1.06L12 10.94l-1.72-1.72Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
</button>
|
||||
<div class="flex w-full items-center gap-2 pt-2">
|
||||
<Input type="text" class="h-8" placeholder="Filter logs (regex)..." bind:value={filterRegex} />
|
||||
<Button variant="ghost" size="icon-sm" onclick={() => (filterRegex = "")} aria-label="Clear filter">
|
||||
<CircleX />
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
||||
</Card.Header>
|
||||
<Card.Content class="bg-background min-h-0 flex-1 p-0 font-mono text-sm">
|
||||
<pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||
</div>
|
||||
</div>
|
||||
</Card.Content>
|
||||
</Card.Root>
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
<script lang="ts">
|
||||
import type { Snippet } from "svelte";
|
||||
import * as Tooltip from "$lib/components/ui/tooltip/index.js";
|
||||
|
||||
interface Props {
|
||||
metadata: Record<string, string> | undefined;
|
||||
children: Snippet;
|
||||
}
|
||||
|
||||
let { metadata, children }: Props = $props();
|
||||
|
||||
let entries = $derived(Object.entries(metadata || {}));
|
||||
</script>
|
||||
|
||||
{#if entries.length > 0}
|
||||
<Tooltip.Provider>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render children()}
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="min-w-[12rem] max-w-[24rem] normal-case">
|
||||
<table class="w-full text-left">
|
||||
<tbody>
|
||||
{#each entries as [key, value]}
|
||||
<tr class="border-b border-white/10 last:border-0">
|
||||
<td class="py-1 pr-3 font-medium whitespace-nowrap text-primary">{key}</td>
|
||||
<td class="py-1 break-all">{value}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
</Tooltip.Provider>
|
||||
{:else}
|
||||
{@render children()}
|
||||
{/if}
|
||||
@@ -1,211 +0,0 @@
|
||||
<script lang="ts">
|
||||
import { models, loadModel, unloadAllModels, unloadSingleModel } from "../stores/api";
|
||||
import { isNarrow } from "../stores/theme";
|
||||
import { persistentStore } from "../stores/persistent";
|
||||
import type { Model } from "../lib/types";
|
||||
|
||||
let isUnloading = $state(false);
|
||||
let menuOpen = $state(false);
|
||||
|
||||
const showUnlistedStore = persistentStore<boolean>("showUnlisted", true);
|
||||
const showIdorNameStore = persistentStore<"id" | "name">("showIdorName", "id");
|
||||
|
||||
let filteredModels = $derived.by(() => {
|
||||
const filtered = $models.filter((model) => $showUnlistedStore || !model.unlisted);
|
||||
const peerModels = filtered.filter((m) => m.peerID);
|
||||
|
||||
// Group peer models by peerID
|
||||
const grouped = peerModels.reduce(
|
||||
(acc, model) => {
|
||||
const peerId = model.peerID || "unknown";
|
||||
if (!acc[peerId]) acc[peerId] = [];
|
||||
acc[peerId].push(model);
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, Model[]>
|
||||
);
|
||||
|
||||
return {
|
||||
regularModels: filtered.filter((m) => !m.peerID),
|
||||
peerModelsByPeerId: grouped,
|
||||
};
|
||||
});
|
||||
|
||||
async function handleUnloadAllModels(): Promise<void> {
|
||||
isUnloading = true;
|
||||
try {
|
||||
await unloadAllModels();
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
} finally {
|
||||
setTimeout(() => (isUnloading = false), 1000);
|
||||
}
|
||||
}
|
||||
|
||||
function toggleIdorName(): void {
|
||||
showIdorNameStore.update((prev) => (prev === "name" ? "id" : "name"));
|
||||
}
|
||||
|
||||
function toggleShowUnlisted(): void {
|
||||
showUnlistedStore.update((prev) => !prev);
|
||||
}
|
||||
|
||||
function getModelDisplay(model: Model): string {
|
||||
return $showIdorNameStore === "id" ? model.id : (model.name || model.id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="card h-full flex flex-col">
|
||||
<div class="shrink-0">
|
||||
<div class="flex justify-between items-baseline">
|
||||
<h2 class={$isNarrow ? "text-xl" : ""}>Models</h2>
|
||||
{#if $isNarrow}
|
||||
<div class="relative">
|
||||
<button class="btn text-base flex items-center gap-2 py-1" onclick={() => (menuOpen = !menuOpen)} aria-label="Toggle menu">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
</button>
|
||||
{#if menuOpen}
|
||||
<div class="absolute right-0 mt-2 w-48 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-20">
|
||||
<button
|
||||
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||
onclick={() => { toggleIdorName(); menuOpen = false; }}
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path fill-rule="evenodd" d="M15.97 2.47a.75.75 0 0 1 1.06 0l4.5 4.5a.75.75 0 0 1 0 1.06l-4.5 4.5a.75.75 0 1 1-1.06-1.06l3.22-3.22H7.5a.75.75 0 0 1 0-1.5h11.69l-3.22-3.22a.75.75 0 0 1 0-1.06Zm-7.94 9a.75.75 0 0 1 0 1.06l-3.22 3.22H16.5a.75.75 0 0 1 0 1.5H4.81l3.22 3.22a.75.75 0 1 1-1.06 1.06l-4.5-4.5a.75.75 0 0 1 0-1.06l4.5-4.5a.75.75 0 0 1 1.06 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{$showIdorNameStore === "id" ? "Show Name" : "Show ID"}
|
||||
</button>
|
||||
<button
|
||||
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||
onclick={() => { toggleShowUnlisted(); menuOpen = false; }}
|
||||
>
|
||||
{#if $showUnlistedStore}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path d="M3.53 2.47a.75.75 0 0 0-1.06 1.06l18 18a.75.75 0 1 0 1.06-1.06l-18-18ZM22.676 12.553a11.249 11.249 0 0 1-2.631 4.31l-3.099-3.099a5.25 5.25 0 0 0-6.71-6.71L7.759 4.577a11.217 11.217 0 0 1 4.242-.827c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113Z" />
|
||||
<path d="M15.75 12c0 .18-.013.357-.037.53l-4.244-4.243A3.75 3.75 0 0 1 15.75 12ZM12.53 15.713l-4.243-4.244a3.75 3.75 0 0 0 4.244 4.243Z" />
|
||||
<path d="M6.75 12c0-.619.107-1.213.304-1.764l-3.1-3.1a11.25 11.25 0 0 0-2.63 4.31c-.12.362-.12.752 0 1.114 1.489 4.467 5.704 7.69 10.675 7.69 1.5 0 2.933-.294 4.242-.827l-2.477-2.477A5.25 5.25 0 0 1 6.75 12Z" />
|
||||
</svg>
|
||||
{:else}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path d="M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z" />
|
||||
<path fill-rule="evenodd" d="M1.323 11.447C2.811 6.976 7.028 3.75 12.001 3.75c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113-1.487 4.471-5.705 7.697-10.677 7.697-4.97 0-9.186-3.223-10.675-7.69a1.762 1.762 0 0 1 0-1.113ZM17.25 12a5.25 5.25 0 1 1-10.5 0 5.25 5.25 0 0 1 10.5 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{/if}
|
||||
{$showUnlistedStore ? "Hide Unlisted" : "Show Unlisted"}
|
||||
</button>
|
||||
<button
|
||||
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||
onclick={() => { handleUnloadAllModels(); menuOpen = false; }}
|
||||
disabled={isUnloading}
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm.53 5.47a.75.75 0 0 0-1.06 0l-3 3a.75.75 0 1 0 1.06 1.06l1.72-1.72v5.69a.75.75 0 0 0 1.5 0v-5.69l1.72 1.72a.75.75 0 1 0 1.06-1.06l-3-3Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{isUnloading ? "Unloading..." : "Unload All"}
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{#if !$isNarrow}
|
||||
<div class="flex justify-between">
|
||||
<div class="flex gap-2">
|
||||
<button class="btn text-base flex items-center gap-2" onclick={toggleIdorName} style="line-height: 1.2">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path fill-rule="evenodd" d="M15.97 2.47a.75.75 0 0 1 1.06 0l4.5 4.5a.75.75 0 0 1 0 1.06l-4.5 4.5a.75.75 0 1 1-1.06-1.06l3.22-3.22H7.5a.75.75 0 0 1 0-1.5h11.69l-3.22-3.22a.75.75 0 0 1 0-1.06Zm-7.94 9a.75.75 0 0 1 0 1.06l-3.22 3.22H16.5a.75.75 0 0 1 0 1.5H4.81l3.22 3.22a.75.75 0 1 1-1.06 1.06l-4.5-4.5a.75.75 0 0 1 0-1.06l4.5-4.5a.75.75 0 0 1 1.06 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{$showIdorNameStore === "id" ? "ID" : "Name"}
|
||||
</button>
|
||||
|
||||
<button class="btn text-base flex items-center gap-2" onclick={toggleShowUnlisted} style="line-height: 1.2">
|
||||
{#if $showUnlistedStore}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path d="M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z" />
|
||||
<path fill-rule="evenodd" d="M1.323 11.447C2.811 6.976 7.028 3.75 12.001 3.75c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113-1.487 4.471-5.705 7.697-10.677 7.697-4.97 0-9.186-3.223-10.675-7.69a1.762 1.762 0 0 1 0-1.113ZM17.25 12a5.25 5.25 0 1 1-10.5 0 5.25 5.25 0 0 1 10.5 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{:else}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||
<path d="M3.53 2.47a.75.75 0 0 0-1.06 1.06l18 18a.75.75 0 1 0 1.06-1.06l-18-18ZM22.676 12.553a11.249 11.249 0 0 1-2.631 4.31l-3.099-3.099a5.25 5.25 0 0 0-6.71-6.71L7.759 4.577a11.217 11.217 0 0 1 4.242-.827c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113Z" />
|
||||
<path d="M15.75 12c0 .18-.013.357-.037.53l-4.244-4.243A3.75 3.75 0 0 1 15.75 12ZM12.53 15.713l-4.243-4.244a3.75 3.75 0 0 0 4.244 4.243Z" />
|
||||
<path d="M6.75 12c0-.619.107-1.213.304-1.764l-3.1-3.1a11.25 11.25 0 0 0-2.63 4.31c-.12.362-.12.752 0 1.114 1.489 4.467 5.704 7.69 10.675 7.69 1.5 0 2.933-.294 4.242-.827l-2.477-2.477A5.25 5.25 0 0 1 6.75 12Z" />
|
||||
</svg>
|
||||
{/if}
|
||||
unlisted
|
||||
</button>
|
||||
</div>
|
||||
<button class="btn text-base flex items-center gap-2" onclick={handleUnloadAllModels} disabled={isUnloading}>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm.53 5.47a.75.75 0 0 0-1.06 0l-3 3a.75.75 0 1 0 1.06 1.06l1.72-1.72v5.69a.75.75 0 0 0 1.5 0v-5.69l1.72 1.72a.75.75 0 1 0 1.06-1.06l-3-3Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
{isUnloading ? "Unloading..." : "Unload All"}
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="flex-1 overflow-y-auto">
|
||||
<table class="w-full">
|
||||
<thead class="sticky top-0 bg-card z-10">
|
||||
<tr class="text-left border-b border-gray-200 dark:border-white/10 bg-surface">
|
||||
<th>{$showIdorNameStore === "id" ? "Model ID" : "Name"}</th>
|
||||
<th></th>
|
||||
<th>State</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each filteredModels.regularModels as model (model.id)}
|
||||
<tr class="border-b hover:bg-secondary-hover border-gray-200">
|
||||
<td class={model.unlisted ? "text-txtsecondary" : ""}>
|
||||
<a href="/upstream/{model.id}/" class="font-semibold" target="_blank">
|
||||
{getModelDisplay(model)}
|
||||
</a>
|
||||
{#if model.description}
|
||||
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||
{/if}
|
||||
{#if model.aliases && model.aliases.length > 0}
|
||||
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
|
||||
{/if}
|
||||
</td>
|
||||
<td class="w-12">
|
||||
{#if model.state === "stopped"}
|
||||
<button class="btn btn--sm" onclick={() => loadModel(model.id)}>Load</button>
|
||||
{:else}
|
||||
<button class="btn btn--sm" onclick={() => unloadSingleModel(model.id)} disabled={model.state !== "ready"}>Unload</button>
|
||||
{/if}
|
||||
</td>
|
||||
<td class="w-20">
|
||||
<span class="w-16 text-center status status--{model.state}">{model.state}</span>
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
{#if Object.keys(filteredModels.peerModelsByPeerId).length > 0}
|
||||
<h3 class="mt-8 mb-2">Peer Models</h3>
|
||||
{#each Object.entries(filteredModels.peerModelsByPeerId).sort(([a], [b]) => a.localeCompare(b)) as [peerId, peerModels] (peerId)}
|
||||
<div class="mb-4">
|
||||
<table class="w-full">
|
||||
<thead class="sticky top-0 bg-card z-10">
|
||||
<tr class="text-left border-b border-gray-200 dark:border-white/10 bg-surface">
|
||||
<th class="font-semibold">{peerId}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each peerModels as model (model.id)}
|
||||
<tr class="border-b hover:bg-secondary-hover border-gray-200">
|
||||
<td class="pl-8 {model.unlisted ? 'text-txtsecondary' : ''}">
|
||||
<span>{model.id}</span>
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
@@ -2,6 +2,7 @@
|
||||
import { onMount } from "svelte";
|
||||
import { Chart, registerables } from "chart.js";
|
||||
import { isDarkMode } from "../stores/theme";
|
||||
import * as Card from "$lib/components/ui/card/index.js";
|
||||
|
||||
Chart.register(...registerables);
|
||||
|
||||
@@ -143,6 +144,8 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="card p-4 h-[300px]">
|
||||
<canvas bind:this={canvas}></canvas>
|
||||
</div>
|
||||
<Card.Root class="h-[300px] py-0">
|
||||
<Card.Content class="h-full p-4">
|
||||
<canvas bind:this={canvas}></canvas>
|
||||
</Card.Content>
|
||||
</Card.Root>
|
||||
|
||||
@@ -135,7 +135,7 @@
|
||||
<div
|
||||
role="separator"
|
||||
tabindex="0"
|
||||
class="{handleClass} bg-primary hover:bg-success transition-colors rounded flex-shrink-0"
|
||||
class="{handleClass} bg-primary hover:bg-success transition-colors rounded-md flex-shrink-0"
|
||||
onmousedown={handleMouseDown}
|
||||
ontouchstart={handleTouchStart}
|
||||
onkeydown={handleKeyDown}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
<script lang="ts">
|
||||
import * as Tooltip from "$lib/components/ui/tooltip/index.js";
|
||||
|
||||
interface Props {
|
||||
content: string;
|
||||
}
|
||||
@@ -6,15 +8,7 @@
|
||||
let { content }: Props = $props();
|
||||
</script>
|
||||
|
||||
<div class="relative group inline-block">
|
||||
<span class="cursor-help">ⓘ</span>
|
||||
<div
|
||||
class="absolute top-full left-1/2 transform -translate-x-1/2 mt-2
|
||||
px-3 py-2 bg-gray-900 text-white text-sm rounded-md
|
||||
opacity-0 group-hover:opacity-100 transition-opacity
|
||||
duration-200 pointer-events-none whitespace-nowrap z-50 normal-case"
|
||||
>
|
||||
{content}
|
||||
<div class="absolute bottom-full left-1/2 transform -translate-x-1/2 border-4 border-transparent border-b-gray-900"></div>
|
||||
</div>
|
||||
</div>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="cursor-help align-middle normal-case">ⓘ</Tooltip.Trigger>
|
||||
<Tooltip.Content>{content}</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
<script lang="ts">
|
||||
import * as Tooltip from "$lib/components/ui/tooltip/index.js";
|
||||
|
||||
interface Props {
|
||||
label: string;
|
||||
tooltip?: string;
|
||||
}
|
||||
|
||||
let { label, tooltip }: Props = $props();
|
||||
</script>
|
||||
|
||||
{#if tooltip}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="cursor-help border-b border-dotted border-current align-middle">
|
||||
{label}
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>{tooltip}</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
{label}
|
||||
{/if}
|
||||
@@ -0,0 +1,33 @@
|
||||
<script lang="ts">
|
||||
import * as Tooltip from "$lib/components/ui/tooltip/index.js";
|
||||
|
||||
interface Props {
|
||||
metadata: Record<string, string> | undefined;
|
||||
}
|
||||
|
||||
let { metadata }: Props = $props();
|
||||
|
||||
let entries = $derived(Object.entries(metadata || {}));
|
||||
</script>
|
||||
|
||||
{#if entries.length > 0}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<span class="text-muted-foreground hover:text-foreground cursor-help">...</span>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="min-w-[12rem] max-w-[24rem] normal-case">
|
||||
<table class="w-full text-left">
|
||||
<tbody>
|
||||
{#each entries as [key, value]}
|
||||
<tr class="border-b border-white/10 last:border-0">
|
||||
<td class="py-1 pr-3 font-medium whitespace-nowrap text-primary">{key}</td>
|
||||
<td class="py-1 break-all">{value}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
<span class="text-muted-foreground">-</span>
|
||||
{/if}
|
||||
@@ -0,0 +1,19 @@
|
||||
<script lang="ts">
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
|
||||
interface Props {
|
||||
hasCapture: boolean;
|
||||
loading: boolean;
|
||||
onclick: () => void;
|
||||
}
|
||||
|
||||
let { hasCapture, loading, onclick }: Props = $props();
|
||||
</script>
|
||||
|
||||
{#if hasCapture}
|
||||
<Button variant="outline" size="xs" {onclick} disabled={loading}>
|
||||
{loading ? "..." : "View"}
|
||||
</Button>
|
||||
{:else}
|
||||
<span class="text-muted-foreground">-</span>
|
||||
{/if}
|
||||
@@ -0,0 +1,24 @@
|
||||
<script lang="ts">
|
||||
import { metrics } from "../../stores/api";
|
||||
import ActivityTable from "../ActivityTable.svelte";
|
||||
|
||||
interface Props {
|
||||
modelId: string;
|
||||
}
|
||||
|
||||
let { modelId }: Props = $props();
|
||||
|
||||
let modelMetrics = $derived(
|
||||
[...$metrics].filter((m) => m.model === modelId).sort((a, b) => b.id - a.id)
|
||||
);
|
||||
</script>
|
||||
|
||||
<ActivityTable
|
||||
metrics={modelMetrics}
|
||||
storagePrefix="model-detail"
|
||||
showModelColumn={false}
|
||||
showPagination={true}
|
||||
compact={true}
|
||||
title="Recent Activity"
|
||||
emptyMessage="No activity recorded for this model"
|
||||
/>
|
||||
@@ -0,0 +1,44 @@
|
||||
<script lang="ts">
|
||||
import type { Model } from "../../lib/types";
|
||||
import * as Card from "$lib/components/ui/card/index.js";
|
||||
|
||||
interface Props {
|
||||
model: Model;
|
||||
}
|
||||
|
||||
let { model }: Props = $props();
|
||||
|
||||
const capabilityLabels: Record<string, string> = {
|
||||
vision: "Vision",
|
||||
audio_transcriptions: "Transcription",
|
||||
audio_speech: "Speech",
|
||||
image_generation: "Image Gen",
|
||||
image_to_image: "Img→Img",
|
||||
function_calling: "Function Calling",
|
||||
reranker: "Reranker",
|
||||
};
|
||||
|
||||
let capabilities = $derived.by(() => {
|
||||
const caps = model?.capabilities ?? {};
|
||||
return Object.entries(caps).filter(([, v]) => v);
|
||||
});
|
||||
</script>
|
||||
|
||||
<Card.Root class="shrink-0 gap-0 overflow-hidden py-0">
|
||||
<Card.Header class="border-b px-4 py-2">
|
||||
<Card.Title class="text-sm font-semibold">Capabilities</Card.Title>
|
||||
</Card.Header>
|
||||
<Card.Content class="p-3">
|
||||
{#if capabilities.length === 0}
|
||||
<span class="text-muted-foreground text-sm">No capabilities reported.</span>
|
||||
{:else}
|
||||
<div class="flex flex-wrap gap-1.5">
|
||||
{#each capabilities as [key] (key)}
|
||||
<span class="bg-muted text-muted-foreground rounded-md px-2 py-0.5 text-xs font-medium">
|
||||
{capabilityLabels[key] ?? key}
|
||||
</span>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</Card.Content>
|
||||
</Card.Root>
|
||||
@@ -0,0 +1,26 @@
|
||||
<script lang="ts">
|
||||
import { streamModelLog } from "../../stores/modelLogs";
|
||||
import LogPanel from "../LogPanel.svelte";
|
||||
|
||||
interface Props {
|
||||
modelId: string;
|
||||
}
|
||||
|
||||
let { modelId }: Props = $props();
|
||||
|
||||
let logData = $state("");
|
||||
$effect(() => {
|
||||
const id = modelId;
|
||||
if (!id) {
|
||||
logData = "";
|
||||
return;
|
||||
}
|
||||
const store = streamModelLog(id);
|
||||
const unsub = store.subscribe((v) => (logData = v));
|
||||
return () => unsub();
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="h-full">
|
||||
<LogPanel id={`model-${modelId}`} title="Model Logs" {logData} />
|
||||
</div>
|
||||
@@ -4,6 +4,8 @@
|
||||
import { transcribeAudio } from "../../lib/audioApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Copy, Check } from "@lucide/svelte";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
||||
|
||||
@@ -145,19 +147,19 @@
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Model selector -->
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an audio model..." disabled={isTranscribing} />
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an audio model..." disabled={isTranscribing} capabilities={["audio_transcriptions"]} />
|
||||
</div>
|
||||
|
||||
<!-- Empty state for no models configured -->
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<div class="flex-1 flex items-center justify-center text-muted-foreground">
|
||||
<p>No models configured. Add models to your configuration to transcribe audio.</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- File upload / Result display area -->
|
||||
<div class="flex-1 overflow-auto mb-4 flex items-center justify-center bg-surface border border-gray-200 dark:border-white/10 rounded">
|
||||
<div class="flex-1 overflow-auto mb-4 flex items-center justify-center bg-background border border-border rounded-md">
|
||||
{#if isTranscribing}
|
||||
<div class="text-center text-txtsecondary">
|
||||
<div class="text-center text-muted-foreground">
|
||||
<div class="inline-block w-8 h-8 border-4 border-primary border-t-transparent rounded-full animate-spin mb-2"></div>
|
||||
<p>Transcribing audio...</p>
|
||||
</div>
|
||||
@@ -169,29 +171,26 @@
|
||||
{:else if transcriptionResult}
|
||||
<div class="w-full h-full flex flex-col p-4">
|
||||
<div class="flex justify-between items-center mb-2">
|
||||
<h3 class="font-medium">Transcription Result</h3>
|
||||
<button
|
||||
class="btn btn-sm"
|
||||
<h3 class="pb-0 font-medium">Transcription Result</h3>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon-sm"
|
||||
onclick={copyToClipboard}
|
||||
title={copied ? 'Copied!' : 'Copy to clipboard'}
|
||||
>
|
||||
{#if copied}
|
||||
<svg class="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"></path>
|
||||
</svg>
|
||||
<Check class="text-success" />
|
||||
{:else}
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
|
||||
</svg>
|
||||
<Copy />
|
||||
{/if}
|
||||
</button>
|
||||
</Button>
|
||||
</div>
|
||||
<div class="flex-1 overflow-auto p-3 rounded border border-gray-200 dark:border-white/10 bg-background whitespace-pre-wrap">
|
||||
<div class="flex-1 overflow-auto p-3 rounded-md border border-border bg-background whitespace-pre-wrap">
|
||||
{transcriptionResult}
|
||||
</div>
|
||||
</div>
|
||||
{:else if selectedFile}
|
||||
<div class="text-center text-txtsecondary p-4">
|
||||
<div class="text-center text-muted-foreground p-4">
|
||||
<p class="font-medium mb-2">File Selected</p>
|
||||
<p class="text-sm">{selectedFile.name}</p>
|
||||
<p class="text-xs mt-1">{formatFileSize(selectedFile.size)}</p>
|
||||
@@ -200,7 +199,7 @@
|
||||
<div
|
||||
role="region"
|
||||
aria-label="Audio file drop zone"
|
||||
class="w-full h-full flex items-center justify-center text-center text-txtsecondary p-8 {isDragging ? 'bg-primary/10' : ''}"
|
||||
class="w-full h-full flex items-center justify-center text-center text-muted-foreground p-8 {isDragging ? 'bg-primary/10' : ''}"
|
||||
ondragover={handleDragOver}
|
||||
ondragleave={handleDragLeave}
|
||||
ondrop={handleDrop}
|
||||
@@ -223,33 +222,21 @@
|
||||
onchange={handleFileSelect}
|
||||
bind:this={fileInput}
|
||||
/>
|
||||
<button
|
||||
class="btn"
|
||||
onclick={() => fileInput?.click()}
|
||||
disabled={isTranscribing}
|
||||
>
|
||||
<Button variant="outline" onclick={() => fileInput?.click()} disabled={isTranscribing}>
|
||||
Browse Files
|
||||
</button>
|
||||
</Button>
|
||||
<div class="flex-1"></div>
|
||||
{#if isTranscribing}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancelTranscription}>
|
||||
Cancel
|
||||
</button>
|
||||
<Button variant="destructive" onclick={cancelTranscription}>Cancel</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
onclick={transcribe}
|
||||
disabled={!canTranscribe}
|
||||
>
|
||||
Transcribe
|
||||
</button>
|
||||
<button
|
||||
class="btn"
|
||||
<Button onclick={transcribe} disabled={!canTranscribe}>Transcribe</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
onclick={clearAll}
|
||||
disabled={!selectedFile && !transcriptionResult && !error}
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
@@ -7,6 +7,14 @@
|
||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
import { Settings, Paperclip } from "@lucide/svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Input } from "$lib/components/ui/input/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
import { Label } from "$lib/components/ui/label/index.js";
|
||||
import * as Select from "$lib/components/ui/select/index.js";
|
||||
import * as Dialog from "$lib/components/ui/dialog/index.js";
|
||||
import { X } from "@lucide/svelte";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-selected-model", "");
|
||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||
@@ -30,6 +38,7 @@
|
||||
let reasoningStartTime = $state<number>(0);
|
||||
let abortController = $state<AbortController | null>(null);
|
||||
let messagesContainer: HTMLDivElement | undefined = $state();
|
||||
let inputRef: HTMLTextAreaElement | null = $state(null);
|
||||
let showSettings = $state(false);
|
||||
let attachedImages = $state<string[]>([]);
|
||||
let fileInput = $state<HTMLInputElement | null>(null);
|
||||
@@ -42,6 +51,14 @@
|
||||
playgroundStores.chatStreaming.set(isStreaming);
|
||||
});
|
||||
|
||||
let wasStreaming = $state(false);
|
||||
$effect(() => {
|
||||
if (wasStreaming && !isStreaming) {
|
||||
inputRef?.focus();
|
||||
}
|
||||
wasStreaming = isStreaming;
|
||||
});
|
||||
|
||||
function handleMessagesScroll() {
|
||||
if (!messagesContainer) return;
|
||||
const { scrollTop, scrollHeight, clientHeight } = messagesContainer;
|
||||
@@ -303,96 +320,95 @@
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a model..." disabled={isStreaming} />
|
||||
<div class="flex gap-2">
|
||||
<button
|
||||
class="btn"
|
||||
onclick={() => (showSettings = !showSettings)}
|
||||
title="Settings"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" class="w-5 h-5">
|
||||
<path fill-rule="evenodd" d="M8.34 1.804A1 1 0 0 1 9.32 1h1.36a1 1 0 0 1 .98.804l.295 1.473c.497.144.971.342 1.416.587l1.25-.834a1 1 0 0 1 1.262.125l.962.962a1 1 0 0 1 .125 1.262l-.834 1.25c.245.445.443.919.587 1.416l1.473.295a1 1 0 0 1 .804.98v1.36a1 1 0 0 1-.804.98l-1.473.295a6.95 6.95 0 0 1-.587 1.416l.834 1.25a1 1 0 0 1-.125 1.262l-.962.962a1 1 0 0 1-1.262.125l-1.25-.834a6.953 6.953 0 0 1-1.416.587l-.295 1.473a1 1 0 0 1-.98.804H9.32a1 1 0 0 1-.98-.804l-.295-1.473a6.957 6.957 0 0 1-1.416-.587l-1.25.834a1 1 0 0 1-1.262-.125l-.962-.962a1 1 0 0 1-.125-1.262l.834-1.25a6.957 6.957 0 0 1-.587-1.416l-1.473-.295A1 1 0 0 1 1 10.68V9.32a1 1 0 0 1 .804-.98l1.473-.295c.144-.497.342-.971.587-1.416l-.834-1.25a1 1 0 0 1 .125-1.262l.962-.962A1 1 0 0 1 5.38 3.03l1.25.834a6.957 6.957 0 0 1 1.416-.587l.294-1.473ZM13 10a3 3 0 1 1-6 0 3 3 0 0 1 6 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
</button>
|
||||
<button class="btn" onclick={newChat} disabled={messages.length === 0 && !isStreaming}>
|
||||
<Button variant="outline" size="icon" onclick={() => (showSettings = true)} title="Settings">
|
||||
<Settings />
|
||||
</Button>
|
||||
<Button variant="outline" onclick={newChat} disabled={messages.length === 0 && !isStreaming}>
|
||||
New Chat
|
||||
</button>
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Settings panel -->
|
||||
{#if showSettings}
|
||||
<div class="shrink-0 mb-4 p-4 bg-surface border border-gray-200 dark:border-white/10 rounded">
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="endpoint">Endpoint</label>
|
||||
<select
|
||||
id="endpoint"
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$endpointStore}
|
||||
disabled={isStreaming}
|
||||
>
|
||||
<option value="v1/chat/completions">/v1/chat/completions</option>
|
||||
<option value="v1/messages">/v1/messages</option>
|
||||
<option value="v1/responses">/v1/responses</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="system-prompt">System Prompt</label>
|
||||
<textarea
|
||||
id="system-prompt"
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
placeholder="You are a helpful assistant..."
|
||||
rows="3"
|
||||
bind:value={$systemPromptStore}
|
||||
disabled={isStreaming}
|
||||
></textarea>
|
||||
</div>
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="temperature">
|
||||
Temperature: {$temperatureStore.toFixed(2)}
|
||||
</label>
|
||||
<input
|
||||
id="temperature"
|
||||
type="range"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.05"
|
||||
class="w-full"
|
||||
bind:value={$temperatureStore}
|
||||
disabled={isStreaming}
|
||||
/>
|
||||
<div class="flex justify-between text-xs text-txtsecondary mt-1">
|
||||
<span>Precise (0)</span>
|
||||
<span>Creative (2)</span>
|
||||
<!-- Settings dialog -->
|
||||
<Dialog.Root bind:open={showSettings}>
|
||||
<Dialog.Content class="max-w-xl">
|
||||
<Dialog.Header>
|
||||
<Dialog.Title>Chat Settings</Dialog.Title>
|
||||
</Dialog.Header>
|
||||
|
||||
<div class="space-y-4">
|
||||
<div>
|
||||
<Label class="mb-1" for="endpoint">Endpoint</Label>
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$endpointStore}
|
||||
onValueChange={(v) => v && endpointStore.set(v as Endpoint)}
|
||||
>
|
||||
<Select.Trigger class="w-full">/{$endpointStore}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Item value="v1/chat/completions">/v1/chat/completions</Select.Item>
|
||||
<Select.Item value="v1/messages">/v1/messages</Select.Item>
|
||||
<Select.Item value="v1/responses">/v1/responses</Select.Item>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
</div>
|
||||
<div>
|
||||
<Label class="mb-1" for="system-prompt">System Prompt</Label>
|
||||
<Textarea
|
||||
id="system-prompt"
|
||||
class="resize-none"
|
||||
placeholder="You are a helpful assistant..."
|
||||
rows={3}
|
||||
bind:value={$systemPromptStore}
|
||||
disabled={isStreaming}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<Label class="mb-1" for="temperature">
|
||||
Temperature: {$temperatureStore.toFixed(2)}
|
||||
</Label>
|
||||
<input
|
||||
id="temperature"
|
||||
type="range"
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.05"
|
||||
class="accent-primary w-full"
|
||||
bind:value={$temperatureStore}
|
||||
disabled={isStreaming}
|
||||
/>
|
||||
<div class="text-muted-foreground mt-1 flex justify-between text-xs">
|
||||
<span>Precise (0)</span>
|
||||
<span>Creative (2)</span>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<Label class="mb-1" for="max-tokens">Max Tokens</Label>
|
||||
<Input id="max-tokens" type="number" min="1" bind:value={$maxTokensStore} disabled={isStreaming} />
|
||||
<p class="text-muted-foreground mt-1 text-xs">Required for /v1/messages.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-sm font-medium mb-1" for="max-tokens">Max Tokens</label>
|
||||
<input
|
||||
id="max-tokens"
|
||||
type="number"
|
||||
min="1"
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$maxTokensStore}
|
||||
disabled={isStreaming}
|
||||
/>
|
||||
<p class="text-xs text-txtsecondary mt-1">Required for /v1/messages.</p>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<Dialog.Footer>
|
||||
<Button variant="outline" onclick={() => (showSettings = false)}>Done</Button>
|
||||
</Dialog.Footer>
|
||||
</Dialog.Content>
|
||||
</Dialog.Root>
|
||||
|
||||
<!-- Empty state for no models configured -->
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<div class="text-muted-foreground flex flex-1 items-center justify-center">
|
||||
<p>No models configured. Add models to your configuration to start chatting.</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Messages area -->
|
||||
<div
|
||||
class="flex-1 overflow-y-auto mb-4 px-2"
|
||||
class="mb-4 flex-1 overflow-y-auto px-2"
|
||||
bind:this={messagesContainer}
|
||||
onscroll={handleMessagesScroll}
|
||||
>
|
||||
{#if messages.length === 0}
|
||||
<div class="h-full flex items-center justify-center text-txtsecondary">
|
||||
<div class="text-muted-foreground flex h-full items-center justify-center">
|
||||
<p>Start a conversation by typing a message below.</p>
|
||||
</div>
|
||||
{:else}
|
||||
@@ -419,19 +435,21 @@
|
||||
{#if attachedImages.length > 0}
|
||||
<div class="mb-2 flex flex-wrap gap-2">
|
||||
{#each attachedImages as imageUrl, idx (idx)}
|
||||
<div class="relative group">
|
||||
<div class="group relative">
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Attached image {idx + 1}"
|
||||
class="w-20 h-20 object-cover rounded border border-gray-200 dark:border-white/10"
|
||||
class="h-20 w-20 rounded-md border object-cover"
|
||||
/>
|
||||
<button
|
||||
class="absolute -top-2 -right-2 bg-red-500 text-white rounded-full w-6 h-6 flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity"
|
||||
<Button
|
||||
variant="destructive"
|
||||
size="icon-sm"
|
||||
class="absolute -right-2 -top-2 h-6 w-6 rounded-full opacity-0 transition-opacity group-hover:opacity-100"
|
||||
onclick={() => removeImage(idx)}
|
||||
title="Remove image"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
<X class="size-3" />
|
||||
</Button>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
@@ -439,7 +457,7 @@
|
||||
|
||||
<!-- Error message -->
|
||||
{#if imageError}
|
||||
<div class="mb-2 p-2 bg-red-100 dark:bg-red-900/20 text-red-700 dark:text-red-400 rounded text-sm">
|
||||
<div class="bg-destructive/10 text-destructive mb-2 rounded-md p-2 text-sm">
|
||||
{imageError}
|
||||
</div>
|
||||
{/if}
|
||||
@@ -456,6 +474,7 @@
|
||||
/>
|
||||
|
||||
<ExpandableTextarea
|
||||
bind:ref={inputRef}
|
||||
bind:value={userInput}
|
||||
placeholder="Type a message..."
|
||||
rows={3}
|
||||
@@ -464,27 +483,23 @@
|
||||
/>
|
||||
<div class="flex flex-col gap-2">
|
||||
{#if isStreaming}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancelStreaming}>
|
||||
Cancel
|
||||
</button>
|
||||
<Button variant="destructive" onclick={cancelStreaming}>Cancel</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn"
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon"
|
||||
onclick={() => fileInput?.click()}
|
||||
disabled={isStreaming || !$selectedModelStore}
|
||||
title="Attach image"
|
||||
>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" class="w-5 h-5">
|
||||
<path fill-rule="evenodd" d="M1 5.25A2.25 2.25 0 0 1 3.25 3h13.5A2.25 2.25 0 0 1 19 5.25v9.5A2.25 2.25 0 0 1 16.75 17H3.25A2.25 2.25 0 0 1 1 14.75v-9.5Zm1.5 5.81v3.69c0 .414.336.75.75.75h13.5a.75.75 0 0 0 .75-.75v-2.69l-2.22-2.219a.75.75 0 0 0-1.06 0l-1.91 1.909.47.47a.75.75 0 1 1-1.06 1.06L6.53 8.091a.75.75 0 0 0-1.06 0l-2.97 2.97ZM12 7a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z" clip-rule="evenodd" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
<Paperclip />
|
||||
</Button>
|
||||
<Button
|
||||
onclick={sendMessage}
|
||||
disabled={(!userInput.trim() && attachedImages.length === 0) || !$selectedModelStore}
|
||||
>
|
||||
Send
|
||||
</button>
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
<script lang="ts">
|
||||
import { renderMarkdown, escapeHtml, renderStreamingMarkdown, createStreamingCache } from "../../lib/markdown";
|
||||
import type { RenderedBlock } from "../../lib/markdown";
|
||||
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
||||
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "@lucide/svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
import { getTextContent, getImageUrls } from "../../lib/types";
|
||||
import type { ContentPart } from "../../lib/types";
|
||||
|
||||
@@ -161,37 +163,37 @@
|
||||
|
||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||
<div
|
||||
class="relative group rounded-lg px-4 py-2 {role === 'user'
|
||||
? 'max-w-[85%] bg-primary text-btn-primary-text'
|
||||
: 'w-full sm:w-4/5 bg-surface border border-gray-200 dark:border-white/10'}"
|
||||
class="group relative rounded-lg px-4 py-2 {role === 'user'
|
||||
? 'bg-primary text-primary-foreground max-w-[85%]'
|
||||
: 'bg-card w-full border sm:w-4/5'}"
|
||||
>
|
||||
{#if role === "assistant"}
|
||||
{#if reasoning_content || isReasoning}
|
||||
<div class="mb-3 border border-gray-200 dark:border-white/10 rounded overflow-hidden">
|
||||
<div class="mb-3 overflow-hidden rounded-md border">
|
||||
<button
|
||||
class="w-full flex items-center gap-2 px-3 py-2 bg-gray-50 dark:bg-white/5 hover:bg-gray-100 dark:hover:bg-white/10 transition-colors text-sm"
|
||||
class="bg-muted/50 hover:bg-muted flex w-full items-center gap-2 px-3 py-2 text-sm transition-colors"
|
||||
onclick={() => showReasoning = !showReasoning}
|
||||
>
|
||||
{#if showReasoning}
|
||||
<ChevronDown class="w-4 h-4" />
|
||||
<ChevronDown class="size-4" />
|
||||
{:else}
|
||||
<ChevronRight class="w-4 h-4" />
|
||||
<ChevronRight class="size-4" />
|
||||
{/if}
|
||||
<Brain class="w-4 h-4" />
|
||||
<Brain class="size-4" />
|
||||
<span class="font-medium">Reasoning</span>
|
||||
<span class="text-txtsecondary ml-2">
|
||||
<span class="text-muted-foreground ml-2">
|
||||
({reasoning_content.length} chars{#if !isReasoning && reasoningTimeMs > 0}, {formatDuration(reasoningTimeMs)}{/if})
|
||||
</span>
|
||||
{#if isReasoning}
|
||||
<span class="ml-auto flex items-center gap-1 text-txtsecondary">
|
||||
<span class="w-1.5 h-1.5 bg-primary rounded-full animate-pulse"></span>
|
||||
<span class="text-muted-foreground ml-auto flex items-center gap-1">
|
||||
<span class="bg-primary h-1.5 w-1.5 animate-pulse rounded-full"></span>
|
||||
reasoning...
|
||||
</span>
|
||||
{/if}
|
||||
</button>
|
||||
{#if showReasoning}
|
||||
<div class="px-3 py-2 bg-gray-50/50 dark:bg-white/[0.02] text-sm text-txtsecondary whitespace-pre-wrap font-mono">
|
||||
{reasoning_content}{#if isReasoning}<span class="inline-block w-1.5 h-4 bg-current animate-pulse ml-0.5"></span>{/if}
|
||||
<div class="bg-muted/30 text-muted-foreground whitespace-pre-wrap px-3 py-2 font-mono text-sm">
|
||||
{reasoning_content}{#if isReasoning}<span class="ml-0.5 inline-block h-4 w-1.5 animate-pulse bg-current"></span>{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -201,12 +203,12 @@
|
||||
{#each imageUrls as imageUrl, idx (idx)}
|
||||
<button
|
||||
onclick={() => openModal(imageUrl)}
|
||||
class="cursor-pointer rounded border border-gray-200 dark:border-white/10 hover:opacity-80 transition-opacity"
|
||||
class="cursor-pointer rounded-md border transition-opacity hover:opacity-80"
|
||||
>
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Image {idx + 1}"
|
||||
class="max-h-64 rounded"
|
||||
class="max-h-64 rounded-md"
|
||||
/>
|
||||
</button>
|
||||
{/each}
|
||||
@@ -226,60 +228,47 @@
|
||||
</div>
|
||||
{/if}
|
||||
{#if !isStreaming}
|
||||
<div class="flex gap-1 mt-2 pt-1 border-t border-gray-200 dark:border-white/10">
|
||||
<div class="mt-2 flex gap-1 border-t pt-1">
|
||||
{#if onRegenerate}
|
||||
<button
|
||||
class="p-1 rounded hover:bg-black/10 dark:hover:bg-white/10 text-txtsecondary"
|
||||
onclick={onRegenerate}
|
||||
title="Regenerate response"
|
||||
>
|
||||
<RefreshCw class="w-4 h-4" />
|
||||
</button>
|
||||
<Button variant="ghost" size="icon-xs" class="text-muted-foreground" onclick={onRegenerate} title="Regenerate response">
|
||||
<RefreshCw />
|
||||
</Button>
|
||||
{/if}
|
||||
<button
|
||||
class="p-1 rounded hover:bg-black/10 dark:hover:bg-white/10 text-txtsecondary"
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-xs"
|
||||
class="text-muted-foreground"
|
||||
onclick={copyToClipboard}
|
||||
title={copied ? "Copied!" : "Copy to clipboard"}
|
||||
>
|
||||
{#if copied}
|
||||
<Check class="w-4 h-4 text-green-500" />
|
||||
<Check class="text-success" />
|
||||
{:else}
|
||||
<Copy class="w-4 h-4" />
|
||||
<Copy />
|
||||
{/if}
|
||||
</button>
|
||||
<button
|
||||
class="p-1 rounded hover:bg-black/10 dark:hover:bg-white/10 {showRaw ? 'text-primary' : 'text-txtsecondary'}"
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-xs"
|
||||
class={showRaw ? "text-primary" : "text-muted-foreground"}
|
||||
onclick={() => showRaw = !showRaw}
|
||||
title={showRaw ? "Show rendered" : "Show raw"}
|
||||
>
|
||||
<Code class="w-4 h-4" />
|
||||
</button>
|
||||
<Code />
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
{:else}
|
||||
{#if isEditing}
|
||||
<div class="flex flex-col gap-2 min-w-[300px]">
|
||||
<textarea
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface text-txtmain focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
rows="3"
|
||||
bind:value={editContent}
|
||||
onkeydown={handleKeyDown}
|
||||
></textarea>
|
||||
<div class="flex min-w-[300px] flex-col gap-2">
|
||||
<Textarea class="resize-none" rows={3} bind:value={editContent} onkeydown={handleKeyDown} />
|
||||
<div class="flex justify-end gap-2">
|
||||
<button
|
||||
class="p-1.5 rounded hover:bg-white/20"
|
||||
onclick={cancelEdit}
|
||||
title="Cancel"
|
||||
>
|
||||
<X class="w-4 h-4" />
|
||||
</button>
|
||||
<button
|
||||
class="p-1.5 rounded hover:bg-white/20"
|
||||
onclick={saveEdit}
|
||||
title="Save"
|
||||
>
|
||||
<Save class="w-4 h-4" />
|
||||
</button>
|
||||
<Button variant="ghost" size="icon-sm" onclick={cancelEdit} title="Cancel">
|
||||
<X />
|
||||
</Button>
|
||||
<Button variant="ghost" size="icon-sm" onclick={saveEdit} title="Save">
|
||||
<Save />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
@@ -288,12 +277,12 @@
|
||||
{#each imageUrls as imageUrl, idx (idx)}
|
||||
<button
|
||||
onclick={() => openModal(imageUrl)}
|
||||
class="cursor-pointer rounded border border-white/20 hover:opacity-80 transition-opacity"
|
||||
class="cursor-pointer rounded-md border border-white/20 transition-opacity hover:opacity-80"
|
||||
>
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Image {idx + 1}"
|
||||
class="max-w-[200px] rounded"
|
||||
class="max-w-[200px] rounded-md"
|
||||
/>
|
||||
</button>
|
||||
{/each}
|
||||
@@ -302,11 +291,11 @@
|
||||
<div class="whitespace-pre-wrap pr-8">{textContent}</div>
|
||||
{#if canEdit}
|
||||
<button
|
||||
class="absolute top-2 right-2 p-1.5 rounded-lg opacity-0 group-hover:opacity-100 transition-opacity bg-white/20 hover:bg-white/30 shadow-sm"
|
||||
class="absolute right-2 top-2 rounded-lg bg-white/20 p-1.5 opacity-0 shadow-sm transition-opacity hover:bg-white/30 group-hover:opacity-100"
|
||||
onclick={startEdit}
|
||||
title="Edit message"
|
||||
>
|
||||
<Pencil class="w-4 h-4" />
|
||||
<Pencil class="size-4" />
|
||||
</button>
|
||||
{/if}
|
||||
{/if}
|
||||
@@ -324,16 +313,16 @@
|
||||
tabindex="-1"
|
||||
>
|
||||
<button
|
||||
class="absolute top-4 right-4 p-2 rounded-lg bg-white/10 hover:bg-white/20 text-white transition-colors"
|
||||
class="absolute right-4 top-4 rounded-lg bg-white/10 p-2 text-white transition-colors hover:bg-white/20"
|
||||
onclick={() => closeModal()}
|
||||
title="Close"
|
||||
>
|
||||
<X class="w-6 h-6" />
|
||||
<X class="size-6" />
|
||||
</button>
|
||||
<img
|
||||
src={modalImageUrl}
|
||||
alt=""
|
||||
class="max-w-full max-h-full rounded pointer-events-none"
|
||||
class="max-w-full max-h-full rounded-md pointer-events-none"
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
@@ -341,8 +330,8 @@
|
||||
<style>
|
||||
.prose :global(pre) {
|
||||
position: relative;
|
||||
background-color: var(--color-surface);
|
||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||
background-color: var(--muted);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 0.375rem;
|
||||
padding: 0.75rem;
|
||||
padding-right: 2.5rem;
|
||||
@@ -359,20 +348,20 @@
|
||||
justify-content: center;
|
||||
padding: 0.25rem;
|
||||
border-radius: 0.25rem;
|
||||
border: 1px solid var(--color-border);
|
||||
background: var(--color-surface);
|
||||
color: var(--color-txtsecondary);
|
||||
border: 1px solid var(--border);
|
||||
background: var(--muted);
|
||||
color: var(--muted-foreground);
|
||||
cursor: pointer;
|
||||
transition: background-color 0.15s;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn:hover) {
|
||||
background: var(--color-secondary);
|
||||
background: var(--accent);
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn.copied) {
|
||||
color: var(--color-success);
|
||||
color: var(--success);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
@@ -387,10 +376,10 @@
|
||||
}
|
||||
|
||||
.prose :global(code:not(pre code)) {
|
||||
background-color: var(--color-surface);
|
||||
background-color: var(--muted);
|
||||
padding: 0.125rem 0.25rem;
|
||||
border-radius: 0.25rem;
|
||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.prose :global(p) {
|
||||
@@ -431,14 +420,14 @@
|
||||
}
|
||||
|
||||
.prose :global(blockquote) {
|
||||
border-left: 3px solid var(--color-primary);
|
||||
border-left: 3px solid var(--primary);
|
||||
padding-left: 1rem;
|
||||
margin: 0.5rem 0;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.prose :global(a) {
|
||||
color: var(--color-primary);
|
||||
color: var(--primary);
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
@@ -450,13 +439,13 @@
|
||||
|
||||
.prose :global(th),
|
||||
.prose :global(td) {
|
||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||
border: 1px solid var(--border);
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.prose :global(th) {
|
||||
background-color: var(--color-surface);
|
||||
background-color: var(--muted);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { streamChatCompletion } from "../../lib/chatApi";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Input } from "$lib/components/ui/input/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
import { X } from "@lucide/svelte";
|
||||
|
||||
type Status = "waiting" | "streaming" | "done" | "error";
|
||||
type Phase = "waiting" | "loading" | "reasoning" | "content";
|
||||
@@ -366,77 +370,77 @@
|
||||
<!-- Run controls -->
|
||||
<div class="flex items-center gap-2">
|
||||
{#if isRunning}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white border-red-500" onclick={stop}>
|
||||
<span class="inline-block w-3 h-3 bg-white align-middle mr-2"></span>Stop
|
||||
</button>
|
||||
<Button variant="destructive" onclick={stop}>
|
||||
<span class="mr-1 inline-block h-3 w-3 bg-current align-middle"></span>Stop
|
||||
</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
<Button
|
||||
onclick={run}
|
||||
disabled={!canRun}
|
||||
title={$testListStore.length === 0 ? "Add models from the list below" : "Run concurrent requests"}
|
||||
>
|
||||
<span class="inline-block align-middle mr-2" aria-hidden="true">▶</span>Go
|
||||
</button>
|
||||
<span class="mr-1 inline-block align-middle" aria-hidden="true">▶</span>Go
|
||||
</Button>
|
||||
{/if}
|
||||
<button class="btn btn--sm" onclick={clearAll} disabled={isRunning || $testListStore.length === 0}>
|
||||
<Button variant="outline" size="sm" onclick={clearAll} disabled={isRunning || $testListStore.length === 0}>
|
||||
Clear ({$testListStore.length})
|
||||
</button>
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<!-- Available models -->
|
||||
<div class="flex flex-col min-h-0 flex-1">
|
||||
<div class="text-xs font-medium text-txtsecondary mb-1">
|
||||
<div class="text-xs font-medium text-muted-foreground mb-1">
|
||||
Models <span class="text-[10px] font-normal">— click to queue (add the same model more than once to test parallel requests)</span>
|
||||
</div>
|
||||
<div class="flex-1 border border-gray-200 dark:border-white/10 rounded overflow-y-auto min-h-0">
|
||||
<div class="flex-1 border border-border rounded-md overflow-y-auto min-h-0">
|
||||
{#if !hasModels}
|
||||
<div class="p-3 text-sm text-txtsecondary text-center">No models configured.</div>
|
||||
<div class="p-3 text-sm text-muted-foreground text-center">No models configured.</div>
|
||||
{:else}
|
||||
<ul class="divide-y divide-gray-100 dark:divide-white/5">
|
||||
<div class="divide-y divide-gray-100 dark:divide-white/5">
|
||||
{#each availableModels as m (m.id)}
|
||||
<li>
|
||||
<button
|
||||
class="w-full text-left px-2 py-1.5 text-sm hover:bg-secondary-hover transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
onclick={() => addModel(m.id)}
|
||||
disabled={isRunning}
|
||||
title="Add {m.id}"
|
||||
>
|
||||
<span class="text-primary" aria-hidden="true">+</span>
|
||||
<span class="truncate flex-1">{m.id}</span>
|
||||
</button>
|
||||
</li>
|
||||
<button
|
||||
type="button"
|
||||
class="hover:bg-accent hover:text-foreground flex w-full items-center gap-1.5 px-2 py-1.5 text-left text-sm font-normal transition-colors disabled:pointer-events-none disabled:opacity-50"
|
||||
onclick={() => addModel(m.id)}
|
||||
disabled={isRunning}
|
||||
title="Add {m.id}"
|
||||
>
|
||||
<span class="text-primary" aria-hidden="true">+</span>
|
||||
<span class="truncate flex-1">{m.id}</span>
|
||||
</button>
|
||||
{/each}
|
||||
</ul>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Settings -->
|
||||
<div class="flex flex-col gap-2 border-t border-gray-200 dark:border-white/10 pt-3">
|
||||
<div class="flex flex-col gap-2 border-t border-border pt-3">
|
||||
<div class="flex items-center justify-between">
|
||||
<label for="concurrency-prompt" class="text-xs font-medium text-txtsecondary">Prompt</label>
|
||||
<button
|
||||
class="text-[10px] text-txtsecondary hover:text-txtmain underline"
|
||||
<label for="concurrency-prompt" class="text-xs font-medium text-muted-foreground">Prompt</label>
|
||||
<Button
|
||||
variant="link"
|
||||
size="sm"
|
||||
class="h-auto p-0 text-[10px]"
|
||||
onclick={resetDefaults}
|
||||
disabled={isRunning}
|
||||
>
|
||||
reset defaults
|
||||
</button>
|
||||
</Button>
|
||||
</div>
|
||||
<textarea
|
||||
<Textarea
|
||||
id="concurrency-prompt"
|
||||
class="w-full px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
rows="3"
|
||||
class="resize-none text-sm"
|
||||
rows={3}
|
||||
bind:value={$promptStore}
|
||||
disabled={isRunning}
|
||||
></textarea>
|
||||
<label for="concurrency-max-tokens" class="text-xs font-medium text-txtsecondary">max_tokens</label>
|
||||
<input
|
||||
></Textarea>
|
||||
<label for="concurrency-max-tokens" class="text-xs font-medium text-muted-foreground">max_tokens</label>
|
||||
<Input
|
||||
id="concurrency-max-tokens"
|
||||
type="number"
|
||||
min="1"
|
||||
class="w-full px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="h-8 text-sm"
|
||||
bind:value={$maxTokensStore}
|
||||
disabled={isRunning}
|
||||
/>
|
||||
@@ -447,8 +451,8 @@
|
||||
<div class="flex-1 min-w-0 min-h-0 overflow-y-auto">
|
||||
{#if $testListStore.length === 0}
|
||||
<div class="h-full flex items-center justify-center px-6">
|
||||
<div class="max-w-md text-sm text-txtsecondary space-y-4">
|
||||
<h4 class="text-base font-semibold text-txtmain pb-0">Load Test</h4>
|
||||
<div class="max-w-md text-sm text-muted-foreground space-y-4">
|
||||
<h4 class="text-base font-semibold text-foreground pb-0">Load Test</h4>
|
||||
<p>
|
||||
Fire several streaming chat completions at llama-swap at the same time to see how it handles parallel
|
||||
loading and concurrent inference. Each request streams into its own panel with a live timer and status.
|
||||
@@ -456,16 +460,16 @@
|
||||
<ol class="list-decimal list-inside space-y-1">
|
||||
<li>Click models on the left to queue them — repeat a model to hit it with parallel requests.</li>
|
||||
<li>Tweak the prompt and <code>max_tokens</code> if you want.</li>
|
||||
<li>Press <span class="font-semibold text-txtmain">Go</span> to launch them concurrently.</li>
|
||||
<li>Press <span class="font-semibold text-foreground">Go</span> to launch them concurrently.</li>
|
||||
</ol>
|
||||
<p class="text-xs">Tip: drag a result card's header to reorder, or hit × to drop it.</p>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Gantt-style timeline -->
|
||||
<div class="mb-3 border border-gray-200 dark:border-white/10 rounded">
|
||||
<div class="mb-3 border border-border rounded-md">
|
||||
<button
|
||||
class="w-full flex items-center gap-2 px-2 py-1.5 text-xs font-medium text-txtsecondary hover:bg-secondary-hover transition-colors {$timelineCollapsedStore ? 'rounded' : 'rounded-t border-b border-gray-200 dark:border-white/10'}"
|
||||
class="w-full flex items-center gap-2 px-2 py-1.5 text-xs font-medium text-muted-foreground hover:bg-accent transition-colors {$timelineCollapsedStore ? 'rounded-md' : 'rounded-t border-b border-border'}"
|
||||
onclick={() => timelineCollapsedStore.update((v) => !v)}
|
||||
aria-expanded={!$timelineCollapsedStore}
|
||||
>
|
||||
@@ -480,7 +484,7 @@
|
||||
</svg>
|
||||
<span>Timeline</span>
|
||||
{#if !$timelineCollapsedStore}
|
||||
<span class="flex items-center gap-3 text-[10px] text-txtsecondary font-normal ml-3" aria-hidden="true">
|
||||
<span class="flex items-center gap-3 text-[10px] text-muted-foreground font-normal ml-3" aria-hidden="true">
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-slate-200 dark:bg-white/10 border border-gray-300 dark:border-white/10"></span>waiting</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-slate-400 dark:bg-slate-500"></span>loading</span>
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-purple-500"></span>reasoning</span>
|
||||
@@ -489,7 +493,7 @@
|
||||
<span class="flex items-center gap-1"><span class="inline-block w-2.5 h-2.5 rounded-sm bg-red-500"></span>error</span>
|
||||
</span>
|
||||
{/if}
|
||||
<span class="ml-auto tabular-nums text-txtsecondary">
|
||||
<span class="ml-auto tabular-nums text-muted-foreground">
|
||||
max {formatElapsed(timelineMaxMs)} · {$testListStore.length} request{$testListStore.length === 1 ? "" : "s"}
|
||||
</span>
|
||||
</button>
|
||||
@@ -498,13 +502,13 @@
|
||||
<!-- X axis ticks -->
|
||||
<div class="flex" aria-hidden="true">
|
||||
<div class="w-40 shrink-0"></div>
|
||||
<div class="relative flex-1 h-4 border-b border-gray-200 dark:border-white/10">
|
||||
<div class="relative flex-1 h-4 border-b border-border">
|
||||
{#each timelineTicks as t (t)}
|
||||
<div
|
||||
class="absolute top-0 bottom-0 border-l border-gray-200 dark:border-white/10"
|
||||
class="absolute top-0 bottom-0 border-l border-border"
|
||||
style="left: {(t / timelineMaxMs) * 100}%;"
|
||||
>
|
||||
<span class="absolute -top-0.5 left-1 text-[10px] text-txtsecondary tabular-nums">{formatTickMs(t)}</span>
|
||||
<span class="absolute -top-0.5 left-1 text-[10px] text-muted-foreground tabular-nums">{formatTickMs(t)}</span>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
@@ -519,14 +523,14 @@
|
||||
{@const reasoningPct = run ? (run.reasoningMs / timelineMaxMs) * 100 : 0}
|
||||
{@const contentPct = run ? (run.contentMs / timelineMaxMs) * 100 : 0}
|
||||
<div class="flex items-center text-xs">
|
||||
<div class="w-40 shrink-0 flex items-center gap-1 pr-2 text-txtsecondary">
|
||||
<div class="w-40 shrink-0 flex items-center gap-1 pr-2 text-muted-foreground">
|
||||
<span class="tabular-nums w-5 text-right">{i + 1}.</span>
|
||||
<span class="truncate" title={entry.model}>{entry.model}</span>
|
||||
</div>
|
||||
<div class="relative flex-1 h-4">
|
||||
{#each timelineTicks as t (t)}
|
||||
<div
|
||||
class="absolute top-0 bottom-0 border-l border-gray-100 dark:border-white/5"
|
||||
class="absolute top-0 bottom-0 border-l border-border"
|
||||
style="left: {(t / timelineMaxMs) * 100}%;"
|
||||
aria-hidden="true"
|
||||
></div>
|
||||
@@ -560,7 +564,7 @@
|
||||
></div>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="w-16 shrink-0 pl-2 tabular-nums text-txtsecondary text-right">
|
||||
<div class="w-16 shrink-0 pl-2 tabular-nums text-muted-foreground text-right">
|
||||
{run ? formatElapsed(run.elapsedMs) : "—"}
|
||||
</div>
|
||||
</div>
|
||||
@@ -574,16 +578,16 @@
|
||||
{@const run = runs[entry.id]}
|
||||
{@const status = run?.status ?? "waiting"}
|
||||
<div
|
||||
class="border rounded flex flex-col min-h-0 transition-colors {dragOverIndex === i && dragIndex !== i
|
||||
class="border rounded-md flex flex-col min-h-0 transition-colors {dragOverIndex === i && dragIndex !== i
|
||||
? 'border-primary ring-2 ring-primary/40'
|
||||
: 'border-gray-200 dark:border-white/10'} {dragIndex === i ? 'opacity-40' : ''}"
|
||||
: 'border-border'} {dragIndex === i ? 'opacity-40' : ''}"
|
||||
style="height: 280px;"
|
||||
role="listitem"
|
||||
ondragover={(e) => onDragOver(i, e)}
|
||||
ondrop={(e) => onDrop(i, e)}
|
||||
>
|
||||
<div
|
||||
class="shrink-0 flex items-center gap-2 px-2 py-1.5 border-b border-gray-200 dark:border-white/10 bg-secondary/40 rounded-t"
|
||||
class="shrink-0 flex items-center gap-2 px-2 py-1.5 border-b border-border bg-secondary/40 rounded-t"
|
||||
draggable={!isRunning}
|
||||
role="button"
|
||||
tabindex="-1"
|
||||
@@ -593,26 +597,28 @@
|
||||
class:cursor-grab={!isRunning}
|
||||
title={isRunning ? "" : "Drag to reorder"}
|
||||
>
|
||||
<span class="text-txtsecondary select-none" aria-hidden="true">⋮⋮</span>
|
||||
<span class="text-txtsecondary tabular-nums text-xs w-5 text-right">{i + 1}.</span>
|
||||
<span class="text-muted-foreground select-none" aria-hidden="true">⋮⋮</span>
|
||||
<span class="text-muted-foreground tabular-nums text-xs w-5 text-right">{i + 1}.</span>
|
||||
<span class="flex-1 truncate text-sm font-medium" title={entry.model}>{entry.model}</span>
|
||||
<span class="text-xs tabular-nums text-txtsecondary">
|
||||
<span class="text-xs tabular-nums text-muted-foreground">
|
||||
{run ? formatElapsed(run.elapsedMs) : "—"}
|
||||
</span>
|
||||
<span class="status text-[10px] {statusBadgeClass(status)}">{status}</span>
|
||||
<button
|
||||
class="w-5 h-5 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
class="h-5 w-5 text-muted-foreground hover:text-red-500"
|
||||
onclick={() => removeEntry(entry.id)}
|
||||
disabled={isRunning}
|
||||
aria-label="Remove"
|
||||
tabindex="-1"
|
||||
tabindex={-1}
|
||||
>
|
||||
×
|
||||
</button>
|
||||
<X class="size-3" />
|
||||
</Button>
|
||||
</div>
|
||||
<div class="flex-1 min-h-0 overflow-y-auto font-mono text-xs px-2 py-1.5">
|
||||
{#if run?.loadingText}
|
||||
<div class="bg-secondary/40 dark:bg-white/5 text-txtsecondary rounded px-2 py-1 mb-2 whitespace-pre-wrap">{run.loadingText.trim()}</div>
|
||||
<div class="bg-secondary/40 dark:bg-white/5 text-muted-foreground rounded-md px-2 py-1 mb-2 whitespace-pre-wrap">{run.loadingText.trim()}</div>
|
||||
{/if}
|
||||
{#if run?.reasoningContent}
|
||||
<div class="text-purple-700 dark:text-purple-300 whitespace-pre-wrap">{run.reasoningContent}</div>
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
<script lang="ts">
|
||||
import { untrack } from "svelte";
|
||||
import { Maximize2, X } from "lucide-svelte";
|
||||
import { Maximize2, X } from "@lucide/svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
|
||||
interface Props {
|
||||
value: string;
|
||||
ref?: HTMLTextAreaElement | null;
|
||||
placeholder?: string;
|
||||
rows?: number;
|
||||
disabled?: boolean;
|
||||
@@ -12,6 +15,7 @@
|
||||
|
||||
let {
|
||||
value = $bindable(),
|
||||
ref = $bindable(null),
|
||||
placeholder = "",
|
||||
rows = 3,
|
||||
disabled = false,
|
||||
@@ -52,69 +56,55 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="flex-1 relative group flex items-stretch min-h-0">
|
||||
<textarea
|
||||
class="w-full px-3 py-2 pr-10 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-inset focus:ring-primary resize-none"
|
||||
<div class="group relative flex min-h-0 flex-1 items-stretch">
|
||||
<Textarea
|
||||
class="resize-none pr-10"
|
||||
bind:ref
|
||||
{placeholder}
|
||||
{rows}
|
||||
bind:value
|
||||
{onkeydown}
|
||||
{disabled}
|
||||
></textarea>
|
||||
<button
|
||||
class="absolute top-2 right-2 p-1.5 rounded-lg opacity-60 md:opacity-0 group-hover:opacity-100 transition-opacity bg-surface/90 hover:bg-surface border border-gray-200 dark:border-white/10 shadow-sm"
|
||||
/>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon-sm"
|
||||
class="absolute right-2 top-2 opacity-60 transition-opacity group-hover:opacity-100 md:opacity-0"
|
||||
onclick={openExpanded}
|
||||
title="Expand to edit"
|
||||
type="button"
|
||||
{disabled}
|
||||
>
|
||||
<Maximize2 class="w-4 h-4" />
|
||||
</button>
|
||||
<Maximize2 />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{#if isExpanded}
|
||||
<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4">
|
||||
<div class="w-full max-w-4xl h-[80vh] flex flex-col bg-surface rounded-lg shadow-xl border border-gray-200 dark:border-white/10">
|
||||
<div class="bg-card flex h-[80vh] w-full max-w-4xl flex-col rounded-lg border shadow-xl">
|
||||
<!-- Header -->
|
||||
<div class="flex justify-between items-center p-4 border-b border-gray-200 dark:border-white/10">
|
||||
<h3 class="font-medium">Edit Text</h3>
|
||||
<button
|
||||
class="p-1.5 rounded-lg hover:bg-gray-100 dark:hover:bg-white/10"
|
||||
onclick={closeExpanded}
|
||||
title="Close"
|
||||
type="button"
|
||||
>
|
||||
<X class="w-5 h-5" />
|
||||
</button>
|
||||
<div class="flex items-center justify-between border-b p-4">
|
||||
<h3 class="pb-0 font-medium">Edit Text</h3>
|
||||
<Button variant="ghost" size="icon-sm" onclick={closeExpanded} title="Close" type="button">
|
||||
<X />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<!-- Textarea -->
|
||||
<div class="flex-1 p-4">
|
||||
<textarea
|
||||
bind:this={expandedTextarea}
|
||||
class="w-full h-full px-4 py-3 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
placeholder={placeholder}
|
||||
<Textarea
|
||||
bind:ref={expandedTextarea}
|
||||
class="h-full resize-none"
|
||||
{placeholder}
|
||||
bind:value={expandedValue}
|
||||
onkeydown={handleKeyDown}
|
||||
></textarea>
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- Footer -->
|
||||
<div class="flex justify-end gap-2 p-4 border-t border-gray-200 dark:border-white/10">
|
||||
<button
|
||||
class="btn"
|
||||
onclick={closeExpanded}
|
||||
type="button"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
onclick={saveExpanded}
|
||||
type="button"
|
||||
>
|
||||
Done
|
||||
</button>
|
||||
<div class="flex justify-end gap-2 border-t p-4">
|
||||
<Button variant="outline" onclick={closeExpanded} type="button">Cancel</Button>
|
||||
<Button onclick={saveExpanded} type="button">Done</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -7,6 +7,11 @@
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
import type { ImageApiMode, SdApiLora, SdApiLoraRef } from "../../lib/types";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Input } from "$lib/components/ui/input/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
import * as Select from "$lib/components/ui/select/index.js";
|
||||
import { Download, X } from "@lucide/svelte";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-image-model", "");
|
||||
const selectedSizeStore = persistentStore<string>("playground-image-size", "1024x1024");
|
||||
@@ -61,18 +66,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
function addLora(event: Event) {
|
||||
const select = event.target as HTMLSelectElement;
|
||||
const path = select.value;
|
||||
if (!path) return;
|
||||
|
||||
const lora = availableLoras.find((l) => l.path === path);
|
||||
if (lora && !selectedLoras.some((l) => l.path === path)) {
|
||||
selectedLoras = [...selectedLoras, { path: lora.path, multiplier: 1.0 }];
|
||||
}
|
||||
select.value = "";
|
||||
}
|
||||
|
||||
function removeLora(path: string) {
|
||||
selectedLoras = selectedLoras.filter((l) => l.path !== path);
|
||||
}
|
||||
@@ -193,67 +186,75 @@
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Model selector and mode toggle -->
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} />
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} capabilities={["image_generation", "image_to_image"]} matchAny={true} />
|
||||
|
||||
<select
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$apiModeStore}
|
||||
disabled={isGenerating}
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$apiModeStore}
|
||||
onValueChange={(v) => v && apiModeStore.set(v as ImageApiMode)}
|
||||
>
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="sdapi">SDAPI</option>
|
||||
</select>
|
||||
<Select.Trigger class="h-9 w-32">{$apiModeStore}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Item value="openai">OpenAI</Select.Item>
|
||||
<Select.Item value="sdapi">SDAPI</Select.Item>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
|
||||
<select
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$selectedSizeStore}
|
||||
disabled={isGenerating}
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$selectedSizeStore}
|
||||
onValueChange={(v) => v && selectedSizeStore.set(v)}
|
||||
>
|
||||
<optgroup label="Square">
|
||||
<option value="512x512">512x512</option>
|
||||
<option value="1024x1024">1024x1024</option>
|
||||
</optgroup>
|
||||
<optgroup label="Landscape">
|
||||
<option value="1024x768">1024x768 (4:3)</option>
|
||||
<option value="1280x720">1280x720 (16:9)</option>
|
||||
<option value="1792x1024">1792x1024 (SDXL)</option>
|
||||
</optgroup>
|
||||
<optgroup label="Portrait">
|
||||
<option value="768x1024">768x1024 (3:4)</option>
|
||||
<option value="720x1280">720x1280 (9:16)</option>
|
||||
<option value="1024x1792">1024x1792 (SDXL)</option>
|
||||
</optgroup>
|
||||
</select>
|
||||
<Select.Trigger class="h-9 w-40">{$selectedSizeStore}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Group>
|
||||
<Select.Label>Square</Select.Label>
|
||||
<Select.Item value="512x512">512x512</Select.Item>
|
||||
<Select.Item value="1024x1024">1024x1024</Select.Item>
|
||||
</Select.Group>
|
||||
<Select.Separator />
|
||||
<Select.Group>
|
||||
<Select.Label>Landscape</Select.Label>
|
||||
<Select.Item value="1024x768">1024x768 (4:3)</Select.Item>
|
||||
<Select.Item value="1280x720">1280x720 (16:9)</Select.Item>
|
||||
<Select.Item value="1792x1024">1792x1024 (SDXL)</Select.Item>
|
||||
</Select.Group>
|
||||
<Select.Separator />
|
||||
<Select.Group>
|
||||
<Select.Label>Portrait</Select.Label>
|
||||
<Select.Item value="768x1024">768x1024 (3:4)</Select.Item>
|
||||
<Select.Item value="720x1280">720x1280 (9:16)</Select.Item>
|
||||
<Select.Item value="1024x1792">1024x1792 (SDXL)</Select.Item>
|
||||
</Select.Group>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
|
||||
{#if isSdapi}
|
||||
<button
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors"
|
||||
onclick={() => showSettings = !showSettings}
|
||||
>
|
||||
<Button variant="outline" onclick={() => showSettings = !showSettings}>
|
||||
{showSettings ? "Hide Settings" : "Settings"}
|
||||
</button>
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- SDAPI Settings Panel -->
|
||||
{#if isSdapi && showSettings}
|
||||
<div class="shrink-0 mb-4 p-4 rounded border border-gray-200 dark:border-white/10 bg-surface">
|
||||
<div class="shrink-0 mb-4 p-4 rounded-md border border-border bg-background">
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Steps</span>
|
||||
<input
|
||||
<span class="text-xs text-muted-foreground">Steps</span>
|
||||
<Input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="h-8"
|
||||
bind:value={$sdStepsStore}
|
||||
min="1"
|
||||
max="150"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">CFG Scale</span>
|
||||
<input
|
||||
<span class="text-xs text-muted-foreground">CFG Scale</span>
|
||||
<Input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="h-8"
|
||||
bind:value={$sdCfgScaleStore}
|
||||
min="1"
|
||||
max="30"
|
||||
@@ -261,121 +262,141 @@
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Seed (-1 = random)</span>
|
||||
<input
|
||||
<span class="text-xs text-muted-foreground">Seed (-1 = random)</span>
|
||||
<Input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="h-8"
|
||||
bind:value={$sdSeedStore}
|
||||
min="-1"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Batch Size</span>
|
||||
<input
|
||||
<span class="text-xs text-muted-foreground">Batch Size</span>
|
||||
<Input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="h-8"
|
||||
bind:value={$sdBatchSizeStore}
|
||||
min="1"
|
||||
max="8"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Sampler</span>
|
||||
<select
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdSamplerStore}
|
||||
<span class="text-xs text-muted-foreground">Sampler</span>
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$sdSamplerStore}
|
||||
onValueChange={(v) => sdSamplerStore.set(v ?? "")}
|
||||
>
|
||||
<option value="">Default</option>
|
||||
<option value="euler_a">euler_a</option>
|
||||
<option value="euler">euler</option>
|
||||
<option value="heun">heun</option>
|
||||
<option value="dpm2">dpm2</option>
|
||||
<option value="dpmpp2s_a">dpmpp2s_a</option>
|
||||
<option value="dpmpp2m">dpmpp2m</option>
|
||||
<option value="dpmpp2mv2">dpmpp2mv2</option>
|
||||
<option value="ipndm">ipndm</option>
|
||||
<option value="ipndm_v">ipndm_v</option>
|
||||
<option value="lcm">lcm</option>
|
||||
<option value="ddim_trailing">ddim_trailing</option>
|
||||
<option value="tcd">tcd</option>
|
||||
</select>
|
||||
<Select.Trigger class="h-8">{$sdSamplerStore || "Default"}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Item value="">Default</Select.Item>
|
||||
<Select.Item value="euler_a">euler_a</Select.Item>
|
||||
<Select.Item value="euler">euler</Select.Item>
|
||||
<Select.Item value="heun">heun</Select.Item>
|
||||
<Select.Item value="dpm2">dpm2</Select.Item>
|
||||
<Select.Item value="dpmpp2s_a">dpmpp2s_a</Select.Item>
|
||||
<Select.Item value="dpmpp2m">dpmpp2m</Select.Item>
|
||||
<Select.Item value="dpmpp2mv2">dpmpp2mv2</Select.Item>
|
||||
<Select.Item value="ipndm">ipndm</Select.Item>
|
||||
<Select.Item value="ipndm_v">ipndm_v</Select.Item>
|
||||
<Select.Item value="lcm">lcm</Select.Item>
|
||||
<Select.Item value="ddim_trailing">ddim_trailing</Select.Item>
|
||||
<Select.Item value="tcd">tcd</Select.Item>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Scheduler</span>
|
||||
<select
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdSchedulerStore}
|
||||
<span class="text-xs text-muted-foreground">Scheduler</span>
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$sdSchedulerStore}
|
||||
onValueChange={(v) => sdSchedulerStore.set(v ?? "")}
|
||||
>
|
||||
<option value="">Auto for model</option>
|
||||
<option value="discrete">discrete</option>
|
||||
<option value="karras">karras</option>
|
||||
<option value="exponential">exponential</option>
|
||||
<option value="ays">ays</option>
|
||||
<option value="gits">gits</option>
|
||||
</select>
|
||||
<Select.Trigger class="h-8">{$sdSchedulerStore || "Auto for model"}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Item value="">Auto for model</Select.Item>
|
||||
<Select.Item value="discrete">discrete</Select.Item>
|
||||
<Select.Item value="karras">karras</Select.Item>
|
||||
<Select.Item value="exponential">exponential</Select.Item>
|
||||
<Select.Item value="ays">ays</Select.Item>
|
||||
<Select.Item value="gits">gits</Select.Item>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<label class="flex flex-col gap-1 mb-3">
|
||||
<span class="text-xs text-txtsecondary">Negative Prompt</span>
|
||||
<textarea
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-y text-sm"
|
||||
<span class="text-xs text-muted-foreground">Negative Prompt</span>
|
||||
<Textarea
|
||||
bind:value={$sdNegativePromptStore}
|
||||
rows="2"
|
||||
rows={2}
|
||||
placeholder="Elements to avoid..."
|
||||
></textarea>
|
||||
></Textarea>
|
||||
</label>
|
||||
|
||||
<!-- LoRA Selection -->
|
||||
<div>
|
||||
<span class="text-xs text-txtsecondary block mb-1">LoRAs</span>
|
||||
<span class="text-xs text-muted-foreground block mb-1">LoRAs</span>
|
||||
<div class="flex items-center gap-2 mb-2">
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors disabled:opacity-50"
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onclick={loadLoras}
|
||||
disabled={!$selectedModelStore || isLoadingLoras}
|
||||
>
|
||||
{isLoadingLoras ? "Loading..." : lorasLoaded ? "Reload LoRAs" : "Load LoRAs"}
|
||||
</button>
|
||||
</Button>
|
||||
{#if lorasLoaded && availableLoras.length > 0}
|
||||
<select
|
||||
class="flex-1 px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
onchange={addLora}
|
||||
<Select.Root
|
||||
type="single"
|
||||
value=""
|
||||
onValueChange={(v) => {
|
||||
if (v) {
|
||||
const lora = availableLoras.find((l) => l.path === v);
|
||||
if (lora && !selectedLoras.some((s) => s.path === v)) {
|
||||
selectedLoras = [...selectedLoras, { path: lora.path, multiplier: 1.0 }];
|
||||
}
|
||||
}
|
||||
}}
|
||||
>
|
||||
<option value="">Add a LoRA...</option>
|
||||
{#each availableLoras.filter((l) => !selectedLoras.some((s) => s.path === l.path)) as lora}
|
||||
<option value={lora.path}>{lora.name}</option>
|
||||
{/each}
|
||||
</select>
|
||||
<Select.Trigger class="h-8 flex-1">Add a LoRA...</Select.Trigger>
|
||||
<Select.Content>
|
||||
{#each availableLoras.filter((l) => !selectedLoras.some((s) => s.path === l.path)) as lora (lora.path)}
|
||||
<Select.Item value={lora.path}>{lora.name}</Select.Item>
|
||||
{/each}
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
{/if}
|
||||
</div>
|
||||
{#if loraError}
|
||||
<p class="text-xs text-red-500 mb-1">{loraError}</p>
|
||||
{/if}
|
||||
{#if lorasLoaded && availableLoras.length === 0}
|
||||
<p class="text-xs text-txtsecondary">No LoRAs available</p>
|
||||
<p class="text-xs text-muted-foreground">No LoRAs available</p>
|
||||
{/if}
|
||||
{#if selectedLoras.length > 0}
|
||||
<div class="flex flex-col gap-1.5">
|
||||
{#each selectedLoras as lora}
|
||||
<div class="flex items-center gap-2 text-sm">
|
||||
<span class="flex-1 truncate">{getLoraName(lora.path)}</span>
|
||||
<input
|
||||
<Input
|
||||
type="number"
|
||||
class="w-20 px-1.5 py-1 text-xs rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
class="h-7 w-20 text-xs"
|
||||
value={lora.multiplier}
|
||||
oninput={(e) => updateLoraMultiplier(lora.path, parseFloat((e.target as HTMLInputElement).value) || 1)}
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
/>
|
||||
<button
|
||||
class="px-1.5 py-0.5 text-xs rounded border border-gray-200 dark:border-white/10 hover:bg-red-500 hover:text-white hover:border-red-500 transition-colors"
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
class="h-7 px-1.5 text-xs hover:bg-destructive hover:text-destructive-foreground"
|
||||
onclick={() => removeLora(lora.path)}
|
||||
aria-label="Remove LoRA"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
<X class="size-3" />
|
||||
</Button>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
@@ -386,14 +407,14 @@
|
||||
|
||||
<!-- Empty state for no models configured -->
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<div class="flex-1 flex items-center justify-center text-muted-foreground">
|
||||
<p>No models configured. Add models to your configuration to generate images.</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Image display area -->
|
||||
<div class="flex-1 overflow-auto mb-4 flex items-center justify-center bg-surface border border-gray-200 dark:border-white/10 rounded">
|
||||
<div class="flex-1 overflow-auto mb-4 flex items-center justify-center bg-background border border-border rounded-md">
|
||||
{#if isGenerating}
|
||||
<div class="text-center text-txtsecondary">
|
||||
<div class="text-center text-muted-foreground">
|
||||
<div class="inline-block w-8 h-8 border-4 border-primary border-t-transparent rounded-full animate-spin mb-2"></div>
|
||||
<p>Generating image...</p>
|
||||
</div>
|
||||
@@ -403,7 +424,6 @@
|
||||
<p class="text-sm mt-1">{error}</p>
|
||||
</div>
|
||||
{:else if generatedImages.length > 1}
|
||||
<!-- Grid for multiple images (batch) -->
|
||||
<div class="grid grid-cols-2 gap-2 p-2 w-full h-full overflow-auto">
|
||||
{#each generatedImages as img, i}
|
||||
<div class="relative flex items-center justify-center">
|
||||
@@ -418,15 +438,15 @@
|
||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
||||
/>
|
||||
</button>
|
||||
<button
|
||||
class="absolute bottom-2 right-2 p-1.5 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="icon"
|
||||
class="absolute bottom-2 right-2 h-8 w-8 bg-black/60 hover:bg-black/80 text-white"
|
||||
onclick={(e) => { e.stopPropagation(); downloadImage(i); }}
|
||||
aria-label="Download image"
|
||||
>
|
||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
||||
</svg>
|
||||
</button>
|
||||
<Download class="size-4" />
|
||||
</Button>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
@@ -443,18 +463,18 @@
|
||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
||||
/>
|
||||
</button>
|
||||
<button
|
||||
class="absolute bottom-2 right-2 p-2 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="icon"
|
||||
class="absolute bottom-2 right-2 bg-black/60 hover:bg-black/80 text-white"
|
||||
onclick={(e) => { e.stopPropagation(); downloadImage(0); }}
|
||||
aria-label="Download image"
|
||||
>
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
||||
</svg>
|
||||
</button>
|
||||
<Download class="size-5" />
|
||||
</Button>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="text-center text-txtsecondary">
|
||||
<div class="text-center text-muted-foreground">
|
||||
<p>Enter a prompt below to generate an image</p>
|
||||
</div>
|
||||
{/if}
|
||||
@@ -471,24 +491,25 @@
|
||||
/>
|
||||
<div class="flex flex-row md:flex-col gap-2">
|
||||
{#if isGenerating}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white flex-1 md:flex-none" onclick={cancelGeneration}>
|
||||
<Button variant="destructive" class="flex-1 md:flex-none" onclick={cancelGeneration}>
|
||||
Cancel
|
||||
</button>
|
||||
</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90 flex-1 md:flex-none"
|
||||
<Button
|
||||
class="flex-1 md:flex-none"
|
||||
onclick={generate}
|
||||
disabled={!prompt.trim() || !$selectedModelStore}
|
||||
>
|
||||
Generate
|
||||
</button>
|
||||
<button
|
||||
class="btn flex-1 md:flex-none"
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
class="flex-1 md:flex-none"
|
||||
onclick={clearImage}
|
||||
disabled={generatedImages.length === 0 && !error && !prompt.trim()}
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
@@ -505,13 +526,15 @@
|
||||
aria-modal="true"
|
||||
tabindex="-1"
|
||||
>
|
||||
<button
|
||||
class="absolute top-4 right-4 text-white hover:text-gray-300 text-2xl w-10 h-10 flex items-center justify-center rounded-full hover:bg-white/10 transition-colors"
|
||||
<Button
|
||||
variant="secondary"
|
||||
size="icon"
|
||||
class="absolute top-4 right-4 bg-black/60 hover:bg-black/80 text-white"
|
||||
onclick={() => closeFullscreen()}
|
||||
aria-label="Close fullscreen"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
<X class="size-6" />
|
||||
</Button>
|
||||
<img
|
||||
src={generatedImages[fullscreenIndex]}
|
||||
alt="AI generated content"
|
||||
|
||||
@@ -1,44 +1,69 @@
|
||||
<script lang="ts">
|
||||
import { models } from "../../stores/api";
|
||||
import { groupModels } from "../../lib/modelUtils";
|
||||
import * as Select from "$lib/components/ui/select/index.js";
|
||||
|
||||
interface Props {
|
||||
value: string;
|
||||
placeholder?: string;
|
||||
disabled?: boolean;
|
||||
capabilities?: string[];
|
||||
matchAny?: boolean;
|
||||
}
|
||||
|
||||
let { value = $bindable(), placeholder = "Select a model...", disabled = false }: Props = $props();
|
||||
let { value = $bindable(), placeholder = "Select a model...", disabled = false, capabilities, matchAny = false }: Props = $props();
|
||||
|
||||
let grouped = $derived(groupModels($models));
|
||||
let hasModels = $derived(grouped.local.length > 0 || Object.keys(grouped.peersByProvider).length > 0);
|
||||
let grouped = $derived(groupModels($models, capabilities, matchAny));
|
||||
let hasMatching = $derived(grouped.localMatching.length > 0);
|
||||
let hasModels = $derived(hasMatching || grouped.local.length > 0 || Object.keys(grouped.peersByProvider).length > 0);
|
||||
</script>
|
||||
|
||||
{#if hasModels}
|
||||
<select
|
||||
class="min-w-0 flex-1 basis-48 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value
|
||||
<Select.Root
|
||||
type="single"
|
||||
{value}
|
||||
onValueChange={(v) => v !== undefined && (value = v)}
|
||||
{disabled}
|
||||
>
|
||||
<option value="">{placeholder}</option>
|
||||
{#if grouped.local.length > 0}
|
||||
<optgroup label="Local">
|
||||
{#each grouped.local as model (model.id)}
|
||||
<option value={model.id}>{model.id}</option>
|
||||
{#if model.aliases}
|
||||
{#each model.aliases as alias (alias)}
|
||||
<option value={alias}> ↳ {alias}</option>
|
||||
{/each}
|
||||
{/if}
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#each Object.entries(grouped.peersByProvider).sort(([a], [b]) => a.localeCompare(b)) as [peerId, peerModels] (peerId)}
|
||||
<optgroup label="Peer: {peerId}">
|
||||
{#each peerModels as model (model.id)}
|
||||
<option value={model.id}>{model.id}</option>
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/each}
|
||||
</select>
|
||||
<Select.Trigger class="min-w-0 flex-1 basis-48">{value || placeholder}</Select.Trigger>
|
||||
<Select.Content>
|
||||
<Select.Item value="">{placeholder}</Select.Item>
|
||||
{#if hasMatching}
|
||||
<Select.Group>
|
||||
<Select.Label>Matching Capabilities</Select.Label>
|
||||
{#each grouped.localMatching as model (model.id)}
|
||||
<Select.Item value={model.id}>{model.id}</Select.Item>
|
||||
{#if model.aliases}
|
||||
{#each model.aliases as alias (alias)}
|
||||
<Select.Item value={alias}>↳ {alias}</Select.Item>
|
||||
{/each}
|
||||
{/if}
|
||||
{/each}
|
||||
</Select.Group>
|
||||
<Select.Separator />
|
||||
{/if}
|
||||
{#if grouped.local.length > 0}
|
||||
<Select.Group>
|
||||
<Select.Label>Local</Select.Label>
|
||||
{#each grouped.local as model (model.id)}
|
||||
<Select.Item value={model.id}>{model.id}</Select.Item>
|
||||
{#if model.aliases}
|
||||
{#each model.aliases as alias (alias)}
|
||||
<Select.Item value={alias}>↳ {alias}</Select.Item>
|
||||
{/each}
|
||||
{/if}
|
||||
{/each}
|
||||
</Select.Group>
|
||||
<Select.Separator />
|
||||
{/if}
|
||||
{#each Object.entries(grouped.peersByProvider).sort(([a], [b]) => a.localeCompare(b)) as [peerId, peerModels] (peerId)}
|
||||
<Select.Group>
|
||||
<Select.Label>Peer: {peerId}</Select.Label>
|
||||
{#each peerModels as model (model.id)}
|
||||
<Select.Item value={model.id}>{model.id}</Select.Item>
|
||||
{/each}
|
||||
</Select.Group>
|
||||
{/each}
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
{/if}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
</script>
|
||||
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<div class="text-center text-txtsecondary">
|
||||
<div class="text-muted-foreground text-center">
|
||||
<p class="text-lg">{featureName}</p>
|
||||
<p class="text-sm mt-2">To be implemented</p>
|
||||
</div>
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
import { rerank } from "../../lib/rerankApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import { Input } from "$lib/components/ui/input/index.js";
|
||||
import { Textarea } from "$lib/components/ui/textarea/index.js";
|
||||
import * as ToggleGroup from "$lib/components/ui/toggle-group/index.js";
|
||||
|
||||
type RerankRow = { doc: string; score: number | null };
|
||||
type SortOrder = "none" | "asc" | "desc";
|
||||
@@ -234,9 +238,9 @@
|
||||
}
|
||||
|
||||
function scoreColor(score: number | null): string {
|
||||
if (score === null) return "text-txtsecondary";
|
||||
if (score === null) return "text-muted-foreground";
|
||||
if (score > 0) return "text-green-600 dark:text-green-400";
|
||||
return "text-red-500 dark:text-red-400";
|
||||
return "text-destructive";
|
||||
}
|
||||
|
||||
function formatScore(score: number | null): string {
|
||||
@@ -264,11 +268,11 @@
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Top bar: model selector + query input (table mode) + mode toggle -->
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a rerank model..." disabled={isLoading} />
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a rerank model..." disabled={isLoading} capabilities={["reranker"]} />
|
||||
{#if editorMode === "table"}
|
||||
<input
|
||||
<Input
|
||||
type="text"
|
||||
class="min-w-0 flex-1 basis-48 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
class="min-w-0 flex-1 basis-48"
|
||||
placeholder="Query..."
|
||||
bind:value={query}
|
||||
disabled={isLoading}
|
||||
@@ -276,60 +280,50 @@
|
||||
/>
|
||||
{/if}
|
||||
<!-- Table / JSON toggle -->
|
||||
<div class="flex rounded border border-gray-200 dark:border-white/10 overflow-hidden shrink-0">
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm transition-colors {editorMode === 'table'
|
||||
? 'bg-primary text-btn-primary-text'
|
||||
: 'bg-surface hover:bg-secondary-hover'}"
|
||||
onclick={switchToTable}
|
||||
disabled={isLoading}
|
||||
>
|
||||
Table
|
||||
</button>
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm border-l border-gray-200 dark:border-white/10 transition-colors {editorMode === 'json'
|
||||
? 'bg-primary text-btn-primary-text'
|
||||
: 'bg-surface hover:bg-secondary-hover'}"
|
||||
onclick={switchToJson}
|
||||
disabled={isLoading}
|
||||
>
|
||||
JSON
|
||||
</button>
|
||||
</div>
|
||||
<ToggleGroup.Root
|
||||
type="single"
|
||||
variant="outline"
|
||||
value={editorMode}
|
||||
onValueChange={(v) => v && (v === "table" ? switchToTable() : switchToJson())}
|
||||
class="shrink-0"
|
||||
>
|
||||
<ToggleGroup.Item value="table" disabled={isLoading}>Table</ToggleGroup.Item>
|
||||
<ToggleGroup.Item value="json" disabled={isLoading}>JSON</ToggleGroup.Item>
|
||||
</ToggleGroup.Root>
|
||||
</div>
|
||||
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<div class="text-muted-foreground flex flex-1 items-center justify-center">
|
||||
<p>No models configured. Add models to your configuration to use reranking.</p>
|
||||
</div>
|
||||
{:else if editorMode === "json"}
|
||||
<!-- JSON editor -->
|
||||
<div class="flex-1 flex flex-col min-h-0 mb-4">
|
||||
<textarea
|
||||
class="flex-1 w-full font-mono text-sm px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
<div class="mb-4 flex min-h-0 flex-1 flex-col">
|
||||
<Textarea
|
||||
class="w-full flex-1 resize-none font-mono text-sm"
|
||||
bind:value={jsonText}
|
||||
disabled={isLoading}
|
||||
placeholder={'{\n "query": "your search query",\n "documents": [\n "document one",\n "document two"\n ]\n}'}
|
||||
spellcheck={false}
|
||||
></textarea>
|
||||
/>
|
||||
{#if jsonError}
|
||||
<p class="mt-1 text-sm text-red-500">{jsonError}</p>
|
||||
<p class="text-destructive mt-1 text-sm">{jsonError}</p>
|
||||
{/if}
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Document table -->
|
||||
<div class="flex-1 overflow-y-auto mb-4 border border-gray-200 dark:border-white/10 rounded">
|
||||
<table class="w-full border-collapse table-fixed">
|
||||
<div class="mb-4 flex-1 overflow-y-auto rounded-lg border">
|
||||
<table class="w-full table-fixed border-collapse">
|
||||
<colgroup>
|
||||
<col class="w-auto" />
|
||||
<col style="width: 120px" />
|
||||
<col style="width: 40px" />
|
||||
</colgroup>
|
||||
<thead class="sticky top-0 bg-surface border-b border-gray-200 dark:border-white/10">
|
||||
<thead class="bg-card sticky top-0 border-b">
|
||||
<tr>
|
||||
<th class="px-3 py-2 text-left text-sm font-medium text-txtsecondary">Document</th>
|
||||
<th class="text-muted-foreground px-3 py-2 text-left text-sm font-medium">Document</th>
|
||||
<th
|
||||
class="px-3 py-2 text-right text-sm font-medium text-txtsecondary cursor-pointer select-none hover:text-txtprimary transition-colors"
|
||||
class="text-muted-foreground hover:text-foreground cursor-pointer select-none px-3 py-2 text-right text-sm font-medium transition-colors"
|
||||
onclick={cycleSortOrder}
|
||||
>
|
||||
Score{sortIndicator()}
|
||||
@@ -339,11 +333,11 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each displayRows as { row, i } (i)}
|
||||
<tr class="border-b border-gray-100 dark:border-white/5 last:border-0">
|
||||
<tr class="border-b last:border-0">
|
||||
<td class="px-3 py-1.5">
|
||||
<input
|
||||
<Input
|
||||
type="text"
|
||||
class="w-full bg-transparent focus:outline-none focus:ring-1 focus:ring-primary rounded px-1 py-0.5"
|
||||
class="border-0 focus-visible:ring-1 h-7 px-1 py-0.5 bg-transparent"
|
||||
placeholder={i === rows.length - 1 ? "Add document..." : "Document text..."}
|
||||
value={row.doc}
|
||||
oninput={(e) => updateDoc(i, (e.target as HTMLInputElement).value)}
|
||||
@@ -353,21 +347,23 @@
|
||||
</td>
|
||||
<td class="px-3 py-1.5 text-right font-mono text-sm {scoreColor(row.score)}">
|
||||
{#if isLoading && row.score === null && row.doc.trim() !== ""}
|
||||
<span class="inline-block w-4 h-4 border-2 border-current border-t-transparent rounded-full animate-spin align-middle"></span>
|
||||
<span class="inline-block h-4 w-4 animate-spin rounded-full border-2 border-current border-t-transparent align-middle"></span>
|
||||
{:else}
|
||||
{formatScore(row.score)}
|
||||
{/if}
|
||||
</td>
|
||||
<td class="px-2 py-1.5 text-center">
|
||||
<button
|
||||
class="w-7 h-7 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon-sm"
|
||||
class="h-7 w-7 text-muted-foreground hover:text-destructive"
|
||||
onclick={() => deleteRow(i)}
|
||||
disabled={rows.length <= 1}
|
||||
tabindex="-1"
|
||||
tabindex={-1}
|
||||
aria-label="Remove row"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</Button>
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
@@ -378,28 +374,18 @@
|
||||
|
||||
<!-- Bottom toolbar -->
|
||||
{#if hasModels}
|
||||
<div class="shrink-0 flex flex-wrap items-center gap-2">
|
||||
<div class="flex shrink-0 flex-wrap items-center gap-2">
|
||||
{#if isLoading}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancel}>
|
||||
Cancel
|
||||
</button>
|
||||
<Button variant="destructive" onclick={cancel}>Cancel</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
onclick={submit}
|
||||
disabled={!canSubmit}
|
||||
>
|
||||
Rerank
|
||||
</button>
|
||||
<button class="btn" onclick={clear} disabled={isCleared}>
|
||||
Clear
|
||||
</button>
|
||||
<Button onclick={submit} disabled={!canSubmit}>Rerank</Button>
|
||||
<Button variant="outline" onclick={clear} disabled={isCleared}>Clear</Button>
|
||||
{/if}
|
||||
|
||||
{#if error}
|
||||
<span class="text-sm text-red-500 ml-2">{error}</span>
|
||||
<span class="text-destructive ml-2 text-sm">{error}</span>
|
||||
{:else if usage}
|
||||
<span class="text-sm text-txtsecondary ml-2">{usage.total_tokens} tokens</span>
|
||||
<span class="text-muted-foreground ml-2 text-sm">{usage.total_tokens} tokens</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
import { Button } from "$lib/components/ui/button/index.js";
|
||||
import * as Select from "$lib/components/ui/select/index.js";
|
||||
import { RefreshCw, Download } from "@lucide/svelte";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-speech-model", "");
|
||||
const selectedVoiceStore = persistentStore<string>("playground-speech-voice", "coral");
|
||||
@@ -106,8 +109,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
function handleVoiceChange(event: Event) {
|
||||
const value = (event.target as HTMLSelectElement).value;
|
||||
function handleVoiceChange(value: string) {
|
||||
if (value === "(refresh)") {
|
||||
refreshVoices();
|
||||
} else {
|
||||
@@ -206,51 +208,46 @@
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Model and voice selectors -->
|
||||
<div class="shrink-0 flex gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a speech model..." disabled={isGenerating} />
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a speech model..." disabled={isGenerating} capabilities={["audio_speech"]} />
|
||||
<div class="flex gap-2">
|
||||
<select
|
||||
class="shrink-0 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
<Select.Root
|
||||
type="single"
|
||||
value={$selectedVoiceStore}
|
||||
onchange={handleVoiceChange}
|
||||
disabled={isGenerating || isLoadingVoices || !$selectedModelStore}
|
||||
onValueChange={(v) => v && handleVoiceChange(v)}
|
||||
>
|
||||
{#each availableVoices as voice (voice)}
|
||||
<option value={voice}>{voice}</option>
|
||||
{/each}
|
||||
<option value="(refresh)">(refresh)</option>
|
||||
</select>
|
||||
<Select.Trigger class="h-9 w-40">{$selectedVoiceStore}</Select.Trigger>
|
||||
<Select.Content>
|
||||
{#each availableVoices as voice (voice)}
|
||||
<Select.Item value={voice}>{voice}</Select.Item>
|
||||
{/each}
|
||||
<Select.Item value="(refresh)">(refresh)</Select.Item>
|
||||
</Select.Content>
|
||||
</Select.Root>
|
||||
{#if $selectedModelStore && !getVoicesCache()[$selectedModelStore]}
|
||||
<button
|
||||
class="btn shrink-0"
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon"
|
||||
class="shrink-0"
|
||||
onclick={refreshVoices}
|
||||
disabled={isLoadingVoices}
|
||||
title={isLoadingVoices ? "Loading voices..." : "Load voices for this model"}
|
||||
>
|
||||
{#if isLoadingVoices}
|
||||
<svg class="w-5 h-5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
{:else}
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"></path>
|
||||
</svg>
|
||||
{/if}
|
||||
</button>
|
||||
<RefreshCw class={isLoadingVoices ? "animate-spin" : ""} />
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Empty state for no models configured -->
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<div class="flex-1 flex items-center justify-center text-muted-foreground">
|
||||
<p>No models configured. Add models to your configuration to generate speech.</p>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Audio display area -->
|
||||
<div class="shrink-0 mb-4 bg-surface border border-gray-200 dark:border-white/10 rounded p-4 md:p-6">
|
||||
<div class="shrink-0 mb-4 bg-background border border-border rounded-md p-4 md:p-6">
|
||||
{#if isGenerating}
|
||||
<div class="flex items-center justify-center text-txtsecondary py-8">
|
||||
<div class="flex items-center justify-center text-muted-foreground py-8">
|
||||
<div class="text-center">
|
||||
<div class="inline-block w-8 h-8 border-4 border-primary border-t-transparent rounded-full animate-spin mb-2"></div>
|
||||
<p>Generating speech...</p>
|
||||
@@ -267,7 +264,7 @@
|
||||
<div class="flex flex-col gap-4">
|
||||
<!-- Header with metadata and download -->
|
||||
<div class="flex items-center justify-between gap-4">
|
||||
<div class="flex flex-wrap gap-3 text-sm text-txtsecondary">
|
||||
<div class="flex flex-wrap gap-3 text-sm text-muted-foreground">
|
||||
{#if generatedVoice}
|
||||
<span class="flex items-center gap-1">
|
||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -285,15 +282,9 @@
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
<button
|
||||
class="btn shrink-0"
|
||||
onclick={downloadAudio}
|
||||
title="Download audio file"
|
||||
>
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
||||
</svg>
|
||||
</button>
|
||||
<Button variant="outline" size="icon" class="shrink-0" onclick={downloadAudio} title="Download audio file">
|
||||
<Download />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<!-- Audio player with larger controls -->
|
||||
@@ -305,7 +296,7 @@
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="flex items-center justify-center text-txtsecondary py-8">
|
||||
<div class="flex items-center justify-center text-muted-foreground py-8">
|
||||
<div class="text-center">
|
||||
<svg class="w-12 h-12 md:w-16 md:h-16 mx-auto mb-2 opacity-40" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z"></path>
|
||||
@@ -327,24 +318,25 @@
|
||||
/>
|
||||
<div class="shrink-0 flex md:flex-col gap-2">
|
||||
{#if isGenerating}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white flex-1 md:flex-none" onclick={cancelGeneration}>
|
||||
<Button variant="destructive" class="flex-1 md:flex-none" onclick={cancelGeneration}>
|
||||
Cancel
|
||||
</button>
|
||||
</Button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90 flex-1 md:flex-none"
|
||||
<Button
|
||||
class="flex-1 md:flex-none"
|
||||
onclick={generate}
|
||||
disabled={!inputText.trim() || !$selectedModelStore}
|
||||
>
|
||||
Generate
|
||||
</button>
|
||||
<button
|
||||
class="btn flex-1 md:flex-none"
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
class="flex-1 md:flex-none"
|
||||
onclick={clearInput}
|
||||
disabled={!inputText.trim()}
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
</Button>
|
||||
<label class="flex items-center justify-center gap-2 text-sm cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
|
||||
+154
-135
@@ -1,177 +1,196 @@
|
||||
@import "tailwindcss";
|
||||
@import "tw-animate-css";
|
||||
@import "katex/dist/katex.min.css";
|
||||
@custom-variant dark (&:where([data-theme=dark], [data-theme=dark] *));
|
||||
|
||||
@theme {
|
||||
--color-background: rgba(252, 252, 249, 1);
|
||||
--color-surface: rgba(255, 255, 253, 1);
|
||||
@custom-variant dark (&:is(.dark *));
|
||||
|
||||
/* text colors */
|
||||
--color-txtmain: rgba(19, 52, 59, 1);
|
||||
--color-txtsecondary: rgba(98, 108, 113, 1);
|
||||
--color-navlink-active: rgba(245, 245, 245, 1);
|
||||
:root {
|
||||
--radius: 0;
|
||||
|
||||
--color-primary: rgba(50, 184, 198, 1);
|
||||
/* shadcn base palette (zinc) */
|
||||
--background: oklch(1 0 0);
|
||||
--foreground: oklch(0.141 0.005 285.823);
|
||||
--card: oklch(1 0 0);
|
||||
--card-foreground: oklch(0.141 0.005 285.823);
|
||||
--popover: oklch(1 0 0);
|
||||
--popover-foreground: oklch(0.141 0.005 285.823);
|
||||
|
||||
--color-primary-hover: rgba(29, 116, 128, 1);
|
||||
--color-primary-active: rgba(26, 104, 115, 1);
|
||||
--color-secondary: rgba(94, 82, 64, 0.12);
|
||||
--color-secondary-hover: rgba(94, 82, 64, 0.2);
|
||||
--color-secondary-active: rgba(94, 82, 64, 0.25);
|
||||
--color-border: rgba(94, 82, 64, 0.3);
|
||||
--color-btn-primary-text: rgba(252, 252, 249, 1);
|
||||
--color-card-border: rgba(94, 82, 64, 0.12);
|
||||
--color-card-border-inner: rgba(94, 82, 64, 0.12);
|
||||
--color-error: rgba(192, 21, 47, 1);
|
||||
--color-success: rgba(33, 128, 141, 1);
|
||||
--color-warning: rgb(244, 155, 0);
|
||||
--color-info: rgba(98, 108, 113, 1);
|
||||
--color-focus-ring: rgba(33, 128, 141, 0.4);
|
||||
--color-select-caret: rgba(19, 52, 59, 0.8);
|
||||
--color-btn-border: rgba(94, 82, 64, 0.7);
|
||||
/* brand accent: llama-swap teal */
|
||||
--primary: rgb(50 184 198);
|
||||
--primary-foreground: oklch(0.985 0 0);
|
||||
|
||||
--secondary: oklch(0.967 0.001 286.375);
|
||||
--secondary-foreground: oklch(0.21 0.006 285.885);
|
||||
--muted: oklch(0.967 0.001 286.375);
|
||||
--muted-foreground: oklch(0.552 0.016 285.938);
|
||||
--accent: oklch(0.967 0.001 286.375);
|
||||
--accent-foreground: oklch(0.21 0.006 285.885);
|
||||
--destructive: oklch(0.577 0.245 27.325);
|
||||
--border: oklch(0.92 0.004 286.32);
|
||||
--input: oklch(0.92 0.004 286.32);
|
||||
--ring: rgb(50 184 198);
|
||||
|
||||
--chart-1: rgb(50 184 198);
|
||||
--chart-2: oklch(0.6 0.118 184.704);
|
||||
--chart-3: oklch(0.398 0.07 227.392);
|
||||
--chart-4: oklch(0.828 0.189 84.429);
|
||||
--chart-5: oklch(0.769 0.188 70.08);
|
||||
|
||||
--sidebar: oklch(0.985 0 0);
|
||||
--sidebar-foreground: oklch(0.141 0.005 285.823);
|
||||
--sidebar-primary: rgb(50 184 198);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
--sidebar-accent: oklch(0.967 0.001 286.375);
|
||||
--sidebar-accent-foreground: oklch(0.21 0.006 285.885);
|
||||
--sidebar-border: oklch(0.92 0.004 286.32);
|
||||
--sidebar-ring: rgb(50 184 198);
|
||||
|
||||
/* semantic status colors (shared light/dark-aware below) */
|
||||
--success: oklch(0.6 0.118 184.704);
|
||||
--warning: oklch(0.769 0.17 70.08);
|
||||
--info: oklch(0.552 0.016 285.938);
|
||||
}
|
||||
|
||||
@layer theme {
|
||||
/* over ride theme for dark mode */
|
||||
[data-theme="dark"] {
|
||||
--color-background: rgba(31, 33, 33, 1);
|
||||
--color-surface: rgba(38, 40, 40, 1);
|
||||
/* text colors */
|
||||
--color-txtmain: rgba(245, 245, 245, 1);
|
||||
--color-txtsecondary: rgba(167, 169, 169, 0.7);
|
||||
.dark {
|
||||
--background: oklch(0.141 0.005 285.823);
|
||||
--foreground: oklch(0.985 0 0);
|
||||
--card: oklch(0.21 0.006 285.885);
|
||||
--card-foreground: oklch(0.985 0 0);
|
||||
--popover: oklch(0.21 0.006 285.885);
|
||||
--popover-foreground: oklch(0.985 0 0);
|
||||
|
||||
--color-navlink-active: rgba(245, 245, 245, 1);
|
||||
/* brand accent: deeper teal for dark surfaces */
|
||||
--primary: rgb(45 166 178);
|
||||
--primary-foreground: oklch(0.141 0.005 285.823);
|
||||
|
||||
--color-primary: rgba(33, 128, 141, 1);
|
||||
--color-primary-hover: rgba(45, 166, 178, 1);
|
||||
--color-primary-active: rgba(41, 150, 161, 1);
|
||||
--color-secondary: rgba(119, 124, 124, 0.15);
|
||||
--color-secondary-hover: rgba(119, 124, 124, 0.25);
|
||||
--color-secondary-active: rgba(119, 124, 124, 0.3);
|
||||
--color-border: rgba(119, 124, 124, 0.3);
|
||||
--color-error: rgba(255, 84, 89, 1);
|
||||
--color-success: rgba(50, 184, 198, 1);
|
||||
--color-warning: rgb(244, 155, 0);
|
||||
--color-info: rgba(167, 169, 169, 1);
|
||||
--color-focus-ring: rgba(50, 184, 198, 0.4);
|
||||
--color-btn-primary-text: rgba(19, 52, 59, 1);
|
||||
--color-card-border: rgba(119, 124, 124, 0.2);
|
||||
--color-card-border-inner: rgba(119, 124, 124, 0.15);
|
||||
--shadow-inset-sm: inset 0 1px 0 rgba(255, 255, 255, 0.1), inset 0 -1px 0 rgba(0, 0, 0, 0.15);
|
||||
--button-border-secondary: rgba(119, 124, 124, 0.2);
|
||||
}
|
||||
--secondary: oklch(0.274 0.006 286.033);
|
||||
--secondary-foreground: oklch(0.985 0 0);
|
||||
--muted: oklch(0.274 0.006 286.033);
|
||||
--muted-foreground: oklch(0.705 0.015 286.067);
|
||||
--accent: oklch(0.274 0.006 286.033);
|
||||
--accent-foreground: oklch(0.985 0 0);
|
||||
--destructive: oklch(0.704 0.191 22.216);
|
||||
--border: oklch(1 0 0 / 10%);
|
||||
--input: oklch(1 0 0 / 15%);
|
||||
--ring: rgb(50 184 198);
|
||||
|
||||
--chart-1: rgb(50 184 198);
|
||||
--chart-2: oklch(0.696 0.17 162.48);
|
||||
--chart-3: oklch(0.769 0.188 70.08);
|
||||
--chart-4: oklch(0.627 0.265 303.9);
|
||||
--chart-5: oklch(0.645 0.246 16.439);
|
||||
|
||||
--sidebar: oklch(0.21 0.006 285.885);
|
||||
--sidebar-foreground: oklch(0.985 0 0);
|
||||
--sidebar-primary: rgb(50 184 198);
|
||||
--sidebar-primary-foreground: oklch(0.141 0.005 285.823);
|
||||
--sidebar-accent: oklch(0.274 0.006 286.033);
|
||||
--sidebar-accent-foreground: oklch(0.985 0 0);
|
||||
--sidebar-border: oklch(1 0 0 / 10%);
|
||||
--sidebar-ring: rgb(50 184 198);
|
||||
|
||||
--success: oklch(0.696 0.17 162.48);
|
||||
--warning: oklch(0.769 0.17 70.08);
|
||||
--info: oklch(0.705 0.015 286.067);
|
||||
}
|
||||
|
||||
@theme inline {
|
||||
--radius-sm: calc(var(--radius) - 4px);
|
||||
--radius-md: calc(var(--radius) - 2px);
|
||||
--radius-lg: var(--radius);
|
||||
--radius-xl: calc(var(--radius) + 4px);
|
||||
|
||||
--color-background: var(--background);
|
||||
--color-foreground: var(--foreground);
|
||||
--color-card: var(--card);
|
||||
--color-card-foreground: var(--card-foreground);
|
||||
--color-popover: var(--popover);
|
||||
--color-popover-foreground: var(--popover-foreground);
|
||||
--color-primary: var(--primary);
|
||||
--color-primary-foreground: var(--primary-foreground);
|
||||
--color-secondary: var(--secondary);
|
||||
--color-secondary-foreground: var(--secondary-foreground);
|
||||
--color-muted: var(--muted);
|
||||
--color-muted-foreground: var(--muted-foreground);
|
||||
--color-accent: var(--accent);
|
||||
--color-accent-foreground: var(--accent-foreground);
|
||||
--color-destructive: var(--destructive);
|
||||
--color-border: var(--border);
|
||||
--color-input: var(--input);
|
||||
--color-ring: var(--ring);
|
||||
--color-chart-1: var(--chart-1);
|
||||
--color-chart-2: var(--chart-2);
|
||||
--color-chart-3: var(--chart-3);
|
||||
--color-chart-4: var(--chart-4);
|
||||
--color-chart-5: var(--chart-5);
|
||||
--color-sidebar: var(--sidebar);
|
||||
--color-sidebar-foreground: var(--sidebar-foreground);
|
||||
--color-sidebar-primary: var(--sidebar-primary);
|
||||
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
|
||||
--color-sidebar-accent: var(--sidebar-accent);
|
||||
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
|
||||
--color-sidebar-border: var(--sidebar-border);
|
||||
--color-sidebar-ring: var(--sidebar-ring);
|
||||
|
||||
--color-success: var(--success);
|
||||
--color-warning: var(--warning);
|
||||
--color-info: var(--info);
|
||||
--color-error: var(--destructive);
|
||||
}
|
||||
|
||||
@layer base {
|
||||
* {
|
||||
@apply border-border outline-ring/50;
|
||||
}
|
||||
|
||||
body {
|
||||
/* example of how colors using theme colors*/
|
||||
@apply bg-background text-txtmain;
|
||||
@apply bg-background text-foreground;
|
||||
}
|
||||
|
||||
h1 {
|
||||
@apply text-4xl text-txtmain font-bold pb-4;
|
||||
@apply text-3xl font-bold tracking-tight pb-4;
|
||||
}
|
||||
h2 {
|
||||
@apply text-3xl text-txtmain font-bold pb-4;
|
||||
@apply text-2xl font-bold tracking-tight pb-4;
|
||||
}
|
||||
h3 {
|
||||
@apply text-2xl text-txtmain font-bold pb-4;
|
||||
@apply text-xl font-semibold tracking-tight pb-4;
|
||||
}
|
||||
h4 {
|
||||
@apply text-xl text-txtmain font-bold pb-4;
|
||||
@apply text-lg font-semibold pb-4;
|
||||
}
|
||||
h5 {
|
||||
@apply text-lg text-txtmain font-bold pb-4;
|
||||
@apply text-base font-semibold pb-4;
|
||||
}
|
||||
h6 {
|
||||
@apply text-base text-txtmain font-bold pb-4;
|
||||
@apply text-sm font-semibold pb-4;
|
||||
}
|
||||
}
|
||||
|
||||
/* define CSS classes here for specific types of components */
|
||||
@layer components {
|
||||
.container {
|
||||
@apply px-4;
|
||||
}
|
||||
|
||||
/* Tables */
|
||||
/* default padding for ad-hoc tables (header/detail views) */
|
||||
table th {
|
||||
@apply p-2 font-semibold;
|
||||
}
|
||||
table td {
|
||||
@apply p-2;
|
||||
}
|
||||
|
||||
/* Navigation Header */
|
||||
|
||||
.navlink {
|
||||
@apply text-txtsecondary hover:bg-secondary hover:text-txtmain rounded-lg p-2;
|
||||
}
|
||||
.navlink.active {
|
||||
@apply bg-primary text-navlink-active;
|
||||
}
|
||||
|
||||
/* Card component */
|
||||
.card {
|
||||
@apply bg-surface rounded-lg border border-card-border shadow-sm overflow-hidden p-4;
|
||||
}
|
||||
|
||||
.card:hover {
|
||||
@apply shadow-md;
|
||||
}
|
||||
|
||||
.card__body {
|
||||
@apply p-4;
|
||||
}
|
||||
|
||||
.card__header,
|
||||
.card__footer {
|
||||
@apply p-4 border-b border-card-border-inner;
|
||||
}
|
||||
|
||||
/* Status Badges */
|
||||
.status {
|
||||
@apply inline-block px-2 py-1 text-xs font-medium rounded-lg;
|
||||
}
|
||||
|
||||
.status--ready {
|
||||
@apply bg-success/10 text-success;
|
||||
}
|
||||
|
||||
.status--starting,
|
||||
.status--stopping {
|
||||
@apply bg-warning/10 text-warning;
|
||||
}
|
||||
|
||||
.status--stopped {
|
||||
@apply bg-error/10 text-error;
|
||||
}
|
||||
|
||||
/* Buttons */
|
||||
.btn {
|
||||
@apply bg-surface py-2 px-4 text-sm rounded-md border transition-colors duration-200 border-btn-border;
|
||||
}
|
||||
|
||||
.btn:hover {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.btn--sm {
|
||||
@apply px-2 py-0.5 text-xs;
|
||||
}
|
||||
|
||||
.btn:disabled {
|
||||
@apply opacity-50 cursor-not-allowed;
|
||||
}
|
||||
}
|
||||
|
||||
@layer utilities {
|
||||
.ml-2 {
|
||||
margin-left: 0.5rem;
|
||||
}
|
||||
@utility activity-link {
|
||||
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
|
||||
background-size: 200% 100%;
|
||||
-webkit-background-clip: text;
|
||||
background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
animation: gradient-shift 2s linear infinite;
|
||||
}
|
||||
|
||||
.my-8 {
|
||||
margin-top: 2rem;
|
||||
margin-bottom: 2rem;
|
||||
@keyframes gradient-shift {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
}
|
||||
100% {
|
||||
background-position: 200% 50%;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
<script lang="ts" module>
|
||||
import { type VariantProps, tv } from "tailwind-variants";
|
||||
|
||||
export const badgeVariants = tv({
|
||||
base: "h-5 gap-1 rounded-none border border-transparent px-2 py-0.5 text-xs font-medium transition-all has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&>svg]:size-3! focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive group/badge inline-flex w-fit shrink-0 items-center justify-center overflow-hidden whitespace-nowrap transition-colors focus-visible:ring-[3px] [&>svg]:pointer-events-none",
|
||||
variants: {
|
||||
variant: {
|
||||
default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
|
||||
secondary: "bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80",
|
||||
destructive: "bg-destructive/10 [a]:hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 text-destructive dark:bg-destructive/20",
|
||||
outline: "border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground",
|
||||
ghost: "hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50",
|
||||
link: "text-primary underline-offset-4 hover:underline",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
},
|
||||
});
|
||||
|
||||
export type BadgeVariant = VariantProps<typeof badgeVariants>["variant"];
|
||||
</script>
|
||||
|
||||
<script lang="ts">
|
||||
import type { HTMLAnchorAttributes } from "svelte/elements";
|
||||
import { cn, type WithElementRef } from "$lib/utils.js";
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
href,
|
||||
class: className,
|
||||
variant = "default",
|
||||
children,
|
||||
...restProps
|
||||
}: WithElementRef<HTMLAnchorAttributes> & {
|
||||
variant?: BadgeVariant;
|
||||
} = $props();
|
||||
</script>
|
||||
|
||||
<svelte:element
|
||||
this={href ? "a" : "span"}
|
||||
bind:this={ref}
|
||||
data-slot="badge"
|
||||
{href}
|
||||
class={cn(badgeVariants({ variant }), className)}
|
||||
{...restProps}
|
||||
>
|
||||
{@render children?.()}
|
||||
</svelte:element>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user