Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b4ee70154 |
+1
-1
@@ -4,7 +4,7 @@ early_access: false
|
|||||||
reviews:
|
reviews:
|
||||||
profile: "chill"
|
profile: "chill"
|
||||||
request_changes_workflow: false
|
request_changes_workflow: false
|
||||||
high_level_summary: false
|
high_level_summary: true
|
||||||
poem: false
|
poem: false
|
||||||
review_status: true
|
review_status: true
|
||||||
collapse_walkthrough: false
|
collapse_walkthrough: false
|
||||||
|
|||||||
@@ -4,15 +4,11 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- "config-schema.json"
|
- "config-schema.json"
|
||||||
- "config.example.yaml"
|
|
||||||
- ".github/workflows/config-schema.yml"
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
paths:
|
paths:
|
||||||
- "config-schema.json"
|
- "config-schema.json"
|
||||||
- "config.example.yaml"
|
|
||||||
- ".github/workflows/config-schema.yml"
|
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
@@ -43,14 +39,3 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo "✓ config-schema.json is valid"
|
echo "✓ config-schema.json is valid"
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.x"
|
|
||||||
|
|
||||||
- name: Install check-jsonschema
|
|
||||||
run: pip install check-jsonschema
|
|
||||||
|
|
||||||
- name: Validate config.example.yaml against schema
|
|
||||||
run: check-jsonschema --schemafile config-schema.json config.example.yaml
|
|
||||||
|
|||||||
@@ -17,19 +17,12 @@ on:
|
|||||||
- 'docker/build-container.sh'
|
- 'docker/build-container.sh'
|
||||||
- 'docker/*.Containerfile'
|
- 'docker/*.Containerfile'
|
||||||
|
|
||||||
# grant permissions on GITHUB_TOKEN to publish packages
|
|
||||||
# ref: https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write
|
|
||||||
id-token: write
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-and-push:
|
build-and-push:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
|
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ jobs:
|
|||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version-file: go.mod
|
go-version: '1.23'
|
||||||
|
|
||||||
# Only run in this linux based runner
|
# Only run in this linux based runner
|
||||||
- name: Check Formatting
|
- name: Check Formatting
|
||||||
@@ -51,7 +51,7 @@ jobs:
|
|||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@v4
|
||||||
with:
|
with:
|
||||||
path: ./build
|
path: ./build
|
||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
# necessary for testing proxy/Process swapping
|
# necessary for testing proxy/Process swapping
|
||||||
- name: Create simple-responder
|
- name: Create simple-responder
|
||||||
@@ -67,4 +67,4 @@ jobs:
|
|||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
- name: Test all
|
- name: Test all
|
||||||
run: make test-all
|
run: make test-all
|
||||||
@@ -1,131 +0,0 @@
|
|||||||
name: Build Unified Docker Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: "37 5 * * *"
|
|
||||||
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
llama_cpp_ref:
|
|
||||||
description: "llama.cpp commit hash, tag, or branch"
|
|
||||||
required: false
|
|
||||||
default: "master"
|
|
||||||
whisper_ref:
|
|
||||||
description: "whisper.cpp commit hash, tag, or branch"
|
|
||||||
required: false
|
|
||||||
default: "master"
|
|
||||||
sd_ref:
|
|
||||||
description: "stable-diffusion.cpp commit hash, tag, or branch"
|
|
||||||
required: false
|
|
||||||
default: "master"
|
|
||||||
ik_llama_ref:
|
|
||||||
description: "ik_llama.cpp commit hash, tag, or branch (CUDA only)"
|
|
||||||
required: false
|
|
||||||
default: "main"
|
|
||||||
llama_swap_version:
|
|
||||||
description: "llama-swap version (e.g. v198, latest, main)"
|
|
||||||
required: false
|
|
||||||
default: "main"
|
|
||||||
build_cuda:
|
|
||||||
description: "Build CUDA image"
|
|
||||||
type: boolean
|
|
||||||
required: false
|
|
||||||
default: true
|
|
||||||
build_vulkan:
|
|
||||||
description: "Build Vulkan image"
|
|
||||||
type: boolean
|
|
||||||
required: false
|
|
||||||
default: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
setup:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
||||||
steps:
|
|
||||||
- id: set-matrix
|
|
||||||
run: |
|
|
||||||
backends=()
|
|
||||||
# schedule uses defaults (build both); workflow_dispatch respects inputs
|
|
||||||
if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_cuda }}" == "true" ]]; then
|
|
||||||
backends+=("cuda")
|
|
||||||
fi
|
|
||||||
if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_vulkan }}" == "true" ]]; then
|
|
||||||
backends+=("vulkan")
|
|
||||||
fi
|
|
||||||
matrix=$(printf '%s\n' "${backends[@]}" | jq -R . | jq -sc .)
|
|
||||||
echo "matrix=$matrix" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
build:
|
|
||||||
needs: setup
|
|
||||||
if: ${{ needs.setup.outputs.matrix != '[]' }}
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Free up disk space
|
|
||||||
run: |
|
|
||||||
echo "Before cleanup:"
|
|
||||||
df -h
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo rm -rf /opt/ghc
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
|
||||||
sudo docker system prune -af
|
|
||||||
echo "After cleanup:"
|
|
||||||
df -h
|
|
||||||
|
|
||||||
# On GitHub Actions runners, create a fresh builder.
|
|
||||||
# When running locally under act, skip this and reuse the existing
|
|
||||||
# llama-swap-builder (which has ccache warm) to avoid exhausting disk.
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
if: ${{ !env.ACT }}
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
|
||||||
if: ${{ !env.ACT }}
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build unified Docker image (${{ matrix.backend }})
|
|
||||||
env:
|
|
||||||
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
|
|
||||||
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
|
|
||||||
SD_REF: ${{ inputs.sd_ref || 'master' }}
|
|
||||||
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
|
|
||||||
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
|
|
||||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
|
|
||||||
# When running under act, use the local builder that has warm ccache.
|
|
||||||
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
|
|
||||||
# created by setup-buildx-action above.
|
|
||||||
BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }}
|
|
||||||
run: |
|
|
||||||
chmod +x docker/unified/build-image.sh
|
|
||||||
docker/unified/build-image.sh --${{ matrix.backend }}
|
|
||||||
|
|
||||||
- name: Push to GitHub Container Registry
|
|
||||||
if: ${{ !env.ACT }}
|
|
||||||
run: |
|
|
||||||
BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
|
|
||||||
DATE_TAG=$(date -u +%Y-%m-%d)
|
|
||||||
|
|
||||||
docker push "${BASE_TAG}"
|
|
||||||
docker tag "${BASE_TAG}" "${BASE_TAG}-${DATE_TAG}"
|
|
||||||
docker push "${BASE_TAG}-${DATE_TAG}"
|
|
||||||
|
|
||||||
ROOTLESS_TAG="${BASE_TAG}-rootless"
|
|
||||||
docker push "${ROOTLESS_TAG}"
|
|
||||||
docker tag "${ROOTLESS_TAG}" "${ROOTLESS_TAG}-${DATE_TAG}"
|
|
||||||
docker push "${ROOTLESS_TAG}-${DATE_TAG}"
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
## Project Description:
|
|
||||||
|
|
||||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
|
||||||
|
|
||||||
## Tech stack
|
|
||||||
|
|
||||||
- golang
|
|
||||||
- typescript, vite and svelt5 for UI (located in ui/)
|
|
||||||
|
|
||||||
## Workflow Tasks
|
|
||||||
|
|
||||||
- when summarizing changes only include details that require further action
|
|
||||||
- just say "Done." when there is no further action
|
|
||||||
- use the github CLI `gh` to create pull requests and work with github
|
|
||||||
- Rules for creating pull requests:
|
|
||||||
- keep them short and focused on changes.
|
|
||||||
- never include a test plan
|
|
||||||
- write the summary using the same style rules as commit message
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
|
||||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
|
||||||
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
|
|
||||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
|
||||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
|
||||||
|
|
||||||
### Commit message example format:
|
|
||||||
|
|
||||||
```
|
|
||||||
proxy: add new feature
|
|
||||||
|
|
||||||
Add new feature that implements functionality X and Y.
|
|
||||||
|
|
||||||
- key change 1
|
|
||||||
- key change 2
|
|
||||||
- key change 3
|
|
||||||
|
|
||||||
fixes #123
|
|
||||||
```
|
|
||||||
|
|
||||||
## Code Reviews
|
|
||||||
|
|
||||||
- use three levels High, Medium, Low severity
|
|
||||||
- label each discovered issue with a label like H1, M2, L3 respectively
|
|
||||||
- High severity are must fix issues (security, race conditions, critical bugs)
|
|
||||||
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
|
||||||
- Low severity are nice to have changes and nits
|
|
||||||
- Include a suggestion with each discovered item
|
|
||||||
- Limit your code review to three items with the highest priority first
|
|
||||||
- Double check your discovered items and recommended remediations
|
|
||||||
@@ -1 +1,49 @@
|
|||||||
@AGENTS.md
|
## Project Description:
|
||||||
|
|
||||||
|
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||||
|
|
||||||
|
## Tech stack
|
||||||
|
|
||||||
|
- golang
|
||||||
|
- typescript, vite and react for UI (located in ui/)
|
||||||
|
|
||||||
|
## Workflow Tasks
|
||||||
|
|
||||||
|
- when summarizing changes only include details that require further action
|
||||||
|
- just say "Done." when there is no further action
|
||||||
|
- use `gh` to create PRs and load issues
|
||||||
|
- do include Co-Authored-By or created by when committing changes or creating PRs
|
||||||
|
- keep PR descriptions short and focused on changes.
|
||||||
|
- never include a test plan
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||||
|
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||||
|
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||||
|
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||||
|
|
||||||
|
### Commit message example format:
|
||||||
|
|
||||||
|
```
|
||||||
|
proxy: add new feature
|
||||||
|
|
||||||
|
Add new feature that implements functionality X and Y.
|
||||||
|
|
||||||
|
- key change 1
|
||||||
|
- key change 2
|
||||||
|
- key change 3
|
||||||
|
|
||||||
|
fixes #123
|
||||||
|
```
|
||||||
|
|
||||||
|
## Code Reviews
|
||||||
|
|
||||||
|
- use three levels High, Medium, Low severity
|
||||||
|
- label each discovered issue with a label like H1, M2, L3 respectively
|
||||||
|
- High severity are must fix issues (security, race conditions, critical bugs)
|
||||||
|
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
||||||
|
- Low severity are nice to have changes and nits
|
||||||
|
- Include a suggestion with each discovered item
|
||||||
|
- Limit your code review to three items with the highest priority first
|
||||||
|
- Double check your discovered items and recommended remediations
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ mac: ui
|
|||||||
linux: ui
|
linux: ui
|
||||||
@echo "Building Linux binary..."
|
@echo "Building Linux binary..."
|
||||||
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
||||||
#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
|
GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
|
||||||
|
|
||||||
# Build Windows binary
|
# Build Windows binary
|
||||||
windows: ui
|
windows: ui
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
# llama-swap
|
# llama-swap
|
||||||
|
|
||||||
Run multiple generative AI models on your machine and hot-swap between them on demand. llama-swap works with any OpenAI and Anthropic API compatible server and is used by thousands of people to power their local AI workflows.
|
Run multiple LLM models on your machine and hot-swap between them as needed. llama-swap works with any OpenAI API-compatible server, giving you the flexibility to switch models without restarting your applications.
|
||||||
|
|
||||||
Built in Go for performance and simplicity, llama-swap has zero dependencies and is incredibly easy to set up. Get started in minutes - just one binary and one configuration file.
|
Built in Go for performance and simplicity, llama-swap has zero dependencies and is incredibly easy to set up. Get started in minutes - just one binary and one configuration file.
|
||||||
|
|
||||||
@@ -32,10 +32,6 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||||
- `/infill` - for code infilling
|
- `/infill` - for code infilling
|
||||||
- `/completion` - for completion endpoint
|
- `/completion` - for completion endpoint
|
||||||
- ✅ SDAPI via [stable-diffusion.cpp's server](https://github.com/leejet/stable-diffusion.cpp/tree/master/examples/server)
|
|
||||||
- `/sdapi/v1/txt2img`
|
|
||||||
- `/sdapi/v1/img2img`
|
|
||||||
- `/sdapi/v1/loras` - requires `model` in request body to fetch the correct loras
|
|
||||||
- ✅ llama-swap API
|
- ✅ llama-swap API
|
||||||
- `/ui` - web UI
|
- `/ui` - web UI
|
||||||
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||||
@@ -52,27 +48,13 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
|
|
||||||
### Web UI
|
### Web UI
|
||||||
|
|
||||||
llama-swap includes a real time web interface with a playground for testing out all sorts of local models:
|
llama-swap includes a real time web interface for monitoring logs and controlling models:
|
||||||
|
|
||||||
<img width="1125" height="876" alt="image" src="https://github.com/user-attachments/assets/8ee41947-97af-463d-b0f0-8e9c478fac07" />
|
<img width="1164" height="745" alt="image" src="https://github.com/user-attachments/assets/bacf3f9d-819f-430b-9ed2-1bfaa8d54579" />
|
||||||
|
|
||||||
View detailed token metrics:
|
The Activity Page shows recent requests:
|
||||||
|
|
||||||
<img width="1111" height="515" alt="image" src="https://github.com/user-attachments/assets/64bfb280-d7a3-4126-971a-a128fd40410c" />
|
|
||||||
|
|
||||||
Inspect request and responses:
|
|
||||||
|
|
||||||
<img width="1111" height="720" alt="image" src="https://github.com/user-attachments/assets/24fe4aca-1448-4d7c-b9e8-a967589bda6c" />
|
|
||||||
|
|
||||||
Manually load and unload models:
|
|
||||||
|
|
||||||
<img width="1109" height="719" alt="image" src="https://github.com/user-attachments/assets/02b1e1f2-abd0-4050-84ae-facd66ff01c4" />
|
|
||||||
|
|
||||||
|
|
||||||
Real time log streaming:
|
|
||||||
|
|
||||||
<img width="1107" height="559" alt="image" src="https://github.com/user-attachments/assets/39669a10-cff2-409e-836a-5bad8bd0140c" />
|
|
||||||
|
|
||||||
|
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -274,43 +274,6 @@ func main() {
|
|||||||
c.String(200, fmt.Sprintf("%s %s", c.Request.Method, c.Request.URL.Path))
|
c.String(200, fmt.Sprintf("%s %s", c.Request.Method, c.Request.URL.Path))
|
||||||
})
|
})
|
||||||
|
|
||||||
// SD API endpoints
|
|
||||||
r.POST("/sdapi/v1/txt2img", func(c *gin.Context) {
|
|
||||||
body, err := io.ReadAll(c.Request.Body)
|
|
||||||
if err != nil {
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer c.Request.Body.Close()
|
|
||||||
|
|
||||||
modelName := gjson.GetBytes(body, "model").String()
|
|
||||||
c.JSON(http.StatusOK, gin.H{
|
|
||||||
"model": modelName,
|
|
||||||
"images": []string{},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
r.POST("/sdapi/v1/img2img", func(c *gin.Context) {
|
|
||||||
body, err := io.ReadAll(c.Request.Body)
|
|
||||||
if err != nil {
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer c.Request.Body.Close()
|
|
||||||
|
|
||||||
modelName := gjson.GetBytes(body, "model").String()
|
|
||||||
c.JSON(http.StatusOK, gin.H{
|
|
||||||
"model": modelName,
|
|
||||||
"images": []string{},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
r.GET("/sdapi/v1/loras", func(c *gin.Context) {
|
|
||||||
c.JSON(http.StatusOK, gin.H{
|
|
||||||
"loras": []string{},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
address := "127.0.0.1:" + *port // Address with the specified port
|
address := "127.0.0.1:" + *port // Address with the specified port
|
||||||
|
|
||||||
srv := &http.Server{
|
srv := &http.Server{
|
||||||
|
|||||||
+5
-103
@@ -39,49 +39,6 @@
|
|||||||
},
|
},
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
|
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
|
||||||
},
|
|
||||||
"timeouts": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"connect": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 30,
|
|
||||||
"description": "TCP connection timeout in seconds. Set to 0 to disable."
|
|
||||||
},
|
|
||||||
"keepalive": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 30,
|
|
||||||
"description": "TCP keepalive timeout in seconds. Set to 0 to disable."
|
|
||||||
},
|
|
||||||
"responseHeader": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 0,
|
|
||||||
"description": "Time to wait for response headers in seconds. Set to 0 to disable."
|
|
||||||
},
|
|
||||||
"tlsHandshake": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 10,
|
|
||||||
"description": "TLS handshake timeout in seconds. Set to 0 to disable."
|
|
||||||
},
|
|
||||||
"expectContinue": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 1,
|
|
||||||
"description": "Expect-Continue timeout in seconds. Set to 0 to disable."
|
|
||||||
},
|
|
||||||
"idleConn": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 90,
|
|
||||||
"description": "Idle connection timeout in seconds. Set to 0 to disable."
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"description": "Timeout settings for proxy connections."
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -91,12 +48,6 @@
|
|||||||
"default": 120,
|
"default": 120,
|
||||||
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
||||||
},
|
},
|
||||||
"globalTTL": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 0,
|
|
||||||
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
|
|
||||||
},
|
|
||||||
"logLevel": {
|
"logLevel": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
@@ -226,9 +177,9 @@
|
|||||||
},
|
},
|
||||||
"ttl": {
|
"ttl": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": -1,
|
"minimum": 0,
|
||||||
"default": -1,
|
"default": 0,
|
||||||
"description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
|
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable."
|
||||||
},
|
},
|
||||||
"useModelName": {
|
"useModelName": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -249,20 +200,11 @@
|
|||||||
"additionalProperties": true,
|
"additionalProperties": true,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||||
},
|
|
||||||
"setParamsByID": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": true
|
|
||||||
},
|
|
||||||
"default": {},
|
|
||||||
"description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden."
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID."
|
"description": "Dictionary of filter settings. Supports stripParams and setParams."
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -284,9 +226,6 @@
|
|||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"default": false,
|
"default": false,
|
||||||
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
|
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
|
||||||
},
|
|
||||||
"timeouts": {
|
|
||||||
"$ref": "#/definitions/timeouts"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -413,43 +352,6 @@
|
|||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
||||||
},
|
|
||||||
"timeouts": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"connect": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 30,
|
|
||||||
"description": "TCP connection timeout in seconds."
|
|
||||||
},
|
|
||||||
"keepalive": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 30,
|
|
||||||
"description": "TCP keepalive connection timeout in seconds."
|
|
||||||
},
|
|
||||||
"responseHeader": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 0,
|
|
||||||
"description": "Time to wait for response headers in seconds."
|
|
||||||
},
|
|
||||||
"tlsHandshake": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 10,
|
|
||||||
"description": "TLS handshake timeout in seconds."
|
|
||||||
},
|
|
||||||
"idleConn": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0,
|
|
||||||
"default": 90,
|
|
||||||
"description": "Idle connection timeout in seconds."
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"description": "Timeout settings for proxy connections to this peer."
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -457,4 +359,4 @@
|
|||||||
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+15
-68
@@ -75,11 +75,6 @@ sendLoadingState: true
|
|||||||
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||||
includeAliasesInList: false
|
includeAliasesInList: false
|
||||||
|
|
||||||
# globalTTL: the default TTL in seconds before unloading a model
|
|
||||||
# - optional, default: 0 (never automatically unload)
|
|
||||||
# - must be >= 0
|
|
||||||
globalTTL: 0
|
|
||||||
|
|
||||||
# macros: a dictionary of string substitutions
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros are reusable snippets
|
# - macros are reusable snippets
|
||||||
@@ -131,7 +126,7 @@ apiKeys:
|
|||||||
# - below are examples of the all the settings a model can have
|
# - below are examples of the all the settings a model can have
|
||||||
models:
|
models:
|
||||||
# keys are the model names used in API requests
|
# keys are the model names used in API requests
|
||||||
"gpt-oss-120b":
|
"llama":
|
||||||
# macros: a dictionary of string substitutions specific to this model
|
# macros: a dictionary of string substitutions specific to this model
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros defined here override macros defined in the global macros section
|
# - macros defined here override macros defined in the global macros section
|
||||||
@@ -148,7 +143,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
# ${latest-llama} is a macro that is defined above
|
# ${latest-llama} is a macro that is defined above
|
||||||
${latest-llama}
|
${latest-llama}
|
||||||
--model path/to/gpt-oss-120B.gguf
|
--model path/to/llama-8B-Q4_K_M.gguf
|
||||||
--ctx-size ${default_ctx}
|
--ctx-size ${default_ctx}
|
||||||
--temperature ${temp}
|
--temperature ${temp}
|
||||||
|
|
||||||
@@ -156,13 +151,13 @@ models:
|
|||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
name: "gpt-oss 120B"
|
name: "llama 3.1 8B"
|
||||||
|
|
||||||
# description: a description for the model
|
# description: a description for the model
|
||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
description: "A thinking model from OpenAI"
|
description: "A small but capable model used for quick testing"
|
||||||
|
|
||||||
# env: define an array of environment variables to inject into cmd's environment
|
# env: define an array of environment variables to inject into cmd's environment
|
||||||
# - optional, default: empty array
|
# - optional, default: empty array
|
||||||
@@ -177,6 +172,14 @@ models:
|
|||||||
# - if you use a custom port in cmd this *must* be set
|
# - if you use a custom port in cmd this *must* be set
|
||||||
proxy: http://127.0.0.1:8999
|
proxy: http://127.0.0.1:8999
|
||||||
|
|
||||||
|
# aliases: alternative model names that this model configuration is used for
|
||||||
|
# - optional, default: empty array
|
||||||
|
# - aliases must be unique globally
|
||||||
|
# - useful for impersonating a specific model
|
||||||
|
aliases:
|
||||||
|
- "gpt-4o-mini"
|
||||||
|
- "gpt-3.5-turbo"
|
||||||
|
|
||||||
# checkEndpoint: URL path to check if the server is ready
|
# checkEndpoint: URL path to check if the server is ready
|
||||||
# - optional, default: /health
|
# - optional, default: /health
|
||||||
# - endpoint is expected to return an HTTP 200 response
|
# - endpoint is expected to return an HTTP 200 response
|
||||||
@@ -185,10 +188,8 @@ models:
|
|||||||
checkEndpoint: /custom-endpoint
|
checkEndpoint: /custom-endpoint
|
||||||
|
|
||||||
# ttl: automatically unload the model after ttl seconds
|
# ttl: automatically unload the model after ttl seconds
|
||||||
# - optional, default: -1 (use global default)
|
# - optional, default: 0
|
||||||
# - ttl values must be a value greater than or equal to 0
|
# - ttl values must be a value greater than 0
|
||||||
# - a ttl of -1 will use the global TTL value as the default
|
|
||||||
# - a ttl of 0 will mean never unload
|
|
||||||
# - a value of 0 disables automatic unloading of the model
|
# - a value of 0 disables automatic unloading of the model
|
||||||
ttl: 60
|
ttl: 60
|
||||||
|
|
||||||
@@ -196,7 +197,7 @@ models:
|
|||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful for when the upstream server expects a specific model name that
|
# - useful for when the upstream server expects a specific model name that
|
||||||
# is different from the model's ID
|
# is different from the model's ID
|
||||||
useModelName: "openai/gpt-oss-120B"
|
useModelName: "qwen:qwq"
|
||||||
|
|
||||||
# filters: a dictionary of filter settings
|
# filters: a dictionary of filter settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
@@ -215,38 +216,11 @@ models:
|
|||||||
# - useful for enforcing specific parameter values
|
# - useful for enforcing specific parameter values
|
||||||
# - protected params like "model" cannot be overridden
|
# - protected params like "model" cannot be overridden
|
||||||
# - values can be strings, numbers, booleans, arrays, or objects
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
# - always runs for the model
|
|
||||||
setParams:
|
setParams:
|
||||||
# Example: enforce specific sampling parameters
|
# Example: enforce specific sampling parameters
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
top_p: 0.9
|
top_p: 0.9
|
||||||
|
|
||||||
# setParamsByID: a dictionary of parameters to set based the model ID
|
|
||||||
# - optional, default: empty dictionary
|
|
||||||
# - combine with aliases to create variant behaviour without reloading the model
|
|
||||||
# - parameters are set in the request body JSON
|
|
||||||
# - run after setParams so it will override any settings
|
|
||||||
# - protected params like "model" cannot be overridden
|
|
||||||
# - values can be strings, numbers, booleans, arrays, or objects
|
|
||||||
# - model aliases will be automatically created for each key
|
|
||||||
setParamsByID:
|
|
||||||
"${MODEL_ID}":
|
|
||||||
chat_template_kwargs:
|
|
||||||
reasoning_effort: medium
|
|
||||||
"${MODEL_ID}:high":
|
|
||||||
chat_template_kwargs:
|
|
||||||
reasoning_effort: high
|
|
||||||
"${MODEL_ID}:low":
|
|
||||||
chat_template_kwargs:
|
|
||||||
reasoning_effort: low
|
|
||||||
|
|
||||||
# aliases: alternative model names that this model configuration is used for
|
|
||||||
# - optional, default: empty array
|
|
||||||
# - aliases must be unique globally
|
|
||||||
# - useful for impersonating a specific model
|
|
||||||
aliases:
|
|
||||||
- "gpt-4o-mini"
|
|
||||||
|
|
||||||
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - while metadata can contains complex types it is recommended to keep it simple
|
# - while metadata can contains complex types it is recommended to keep it simple
|
||||||
@@ -284,22 +258,6 @@ models:
|
|||||||
# - optional, default: undefined (use global setting)
|
# - optional, default: undefined (use global setting)
|
||||||
sendLoadingState: false
|
sendLoadingState: false
|
||||||
|
|
||||||
# timeouts: configure proxy connection timeouts for this model
|
|
||||||
# - optional, defaults shown below
|
|
||||||
# - useful for models running on slower hardware that need longer timeouts
|
|
||||||
# - connect: TCP dial connection timeout in seconds, default: 30 seconds
|
|
||||||
# - keepalive: TCP connection keepalive timeout, default: 30 seconds
|
|
||||||
# - responseHeader: time to wait for response headers in seconds, default: 0 (no timeout)
|
|
||||||
# - tlsHandshake: TLS handshake timeout in seconds, default: 10 seconds
|
|
||||||
# - idleConn: idle connection timeout in seconds, default: 90 seconds
|
|
||||||
# - set any value to 0 to disable that timeout (not recommended)
|
|
||||||
timeouts:
|
|
||||||
connect: 30
|
|
||||||
keepalive: 0
|
|
||||||
responseHeader: 60
|
|
||||||
tlsHandshake: 10
|
|
||||||
idleConn: 90
|
|
||||||
|
|
||||||
# Unlisted model example:
|
# Unlisted model example:
|
||||||
"qwen-unlisted":
|
"qwen-unlisted":
|
||||||
# unlisted: boolean, true or false
|
# unlisted: boolean, true or false
|
||||||
@@ -442,17 +400,6 @@ peers:
|
|||||||
- z-ai/glm-4.7
|
- z-ai/glm-4.7
|
||||||
- moonshotai/kimi-k2-0905
|
- moonshotai/kimi-k2-0905
|
||||||
- minimax/minimax-m2.1
|
- minimax/minimax-m2.1
|
||||||
# timeouts: configure proxy connection timeouts for this peer
|
|
||||||
# - optional, defaults shown below
|
|
||||||
# - useful when the peer runs on slower hardware
|
|
||||||
# - set any value to 0 to disable that timeout (not recommended)
|
|
||||||
timeouts:
|
|
||||||
connect: 30
|
|
||||||
keepalive: 30
|
|
||||||
responseHeader: 60
|
|
||||||
tlsHandshake: 10
|
|
||||||
idleConn: 90
|
|
||||||
|
|
||||||
# filters: a dictionary of filter settings for peer requests
|
# filters: a dictionary of filter settings for peer requests
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - same capabilities as model filters (stripParams, setParams)
|
# - same capabilities as model filters (stripParams, setParams)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ ARCH=$1
|
|||||||
PUSH_IMAGES=${2:-false}
|
PUSH_IMAGES=${2:-false}
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
@@ -142,7 +142,7 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||||
docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||||
|
|
||||||
@@ -150,7 +150,7 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
case "$ARCH" in
|
case "$ARCH" in
|
||||||
"musa" | "vulkan")
|
"musa" | "vulkan")
|
||||||
log_info "Adding sd-server to $CONTAINER_TAG"
|
log_info "Adding sd-server to $CONTAINER_TAG"
|
||||||
docker build --provenance=false -f llama-swap-sd.Containerfile \
|
docker build -f llama-swap-sd.Containerfile \
|
||||||
--build-arg BASE=${CONTAINER_TAG} \
|
--build-arg BASE=${CONTAINER_TAG} \
|
||||||
--build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \
|
--build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \
|
||||||
--build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
|
--build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
|
||||||
|
|||||||
@@ -1,305 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# Build script for llama-swap-docker with commit hash pinning
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# ./build-image.sh --cuda # Build CUDA image
|
|
||||||
# ./build-image.sh --vulkan # Build Vulkan image
|
|
||||||
# ./build-image.sh --cuda --no-cache # Build CUDA image without cache
|
|
||||||
# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit
|
|
||||||
# LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan # Override llama.cpp release tag (vulkan uses prebuilt binaries)
|
|
||||||
# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit
|
|
||||||
# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit
|
|
||||||
#
|
|
||||||
# Features:
|
|
||||||
# - Auto-detects latest commit hashes from git repos
|
|
||||||
# - Builds llama-swap from local source code
|
|
||||||
# - Allows environment variable overrides for reproducible builds
|
|
||||||
# - Cache-friendly: changing commit hash busts cache appropriately
|
|
||||||
# - Supports both CUDA and Vulkan backends (requires explicit flag)
|
|
||||||
#
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Parse command line arguments
|
|
||||||
BACKEND=""
|
|
||||||
NO_CACHE=false
|
|
||||||
|
|
||||||
if [[ $# -eq 0 ]]; then
|
|
||||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
|
||||||
echo ""
|
|
||||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
|
||||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
|
||||||
echo " --no-cache Force rebuild without using Docker cache"
|
|
||||||
echo " --help, -h Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Environment variables:"
|
|
||||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
|
|
||||||
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
|
|
||||||
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
|
|
||||||
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
for arg in "$@"; do
|
|
||||||
case $arg in
|
|
||||||
--cuda)
|
|
||||||
BACKEND="cuda"
|
|
||||||
;;
|
|
||||||
--vulkan)
|
|
||||||
BACKEND="vulkan"
|
|
||||||
;;
|
|
||||||
--no-cache)
|
|
||||||
NO_CACHE=true
|
|
||||||
;;
|
|
||||||
--help|-h)
|
|
||||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
|
||||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
|
||||||
echo " --no-cache Force rebuild without using Docker cache"
|
|
||||||
echo " --help, -h Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Environment variables:"
|
|
||||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
|
|
||||||
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
|
|
||||||
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
|
|
||||||
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Validate backend selection
|
|
||||||
if [[ -z "$BACKEND" ]]; then
|
|
||||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then
|
|
||||||
# User provided a custom tag, use it as-is
|
|
||||||
:
|
|
||||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
DOCKER_IMAGE_TAG="llama-swap:vulkan"
|
|
||||||
else
|
|
||||||
DOCKER_IMAGE_TAG="llama-swap:cuda"
|
|
||||||
fi
|
|
||||||
DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}"
|
|
||||||
|
|
||||||
# Single unified Dockerfile, backend selected via build arg
|
|
||||||
DOCKERFILE="Dockerfile"
|
|
||||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
echo "Building for: Vulkan (AMD GPUs and compatible hardware)"
|
|
||||||
else
|
|
||||||
echo "Building for: CUDA (NVIDIA GPUs)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Git repository URLs
|
|
||||||
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
|
||||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
|
||||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
|
||||||
|
|
||||||
# Function to get the latest commit hash from a git repo's default branch
|
|
||||||
get_latest_commit() {
|
|
||||||
local repo_url="$1"
|
|
||||||
local branch="${2:-master}"
|
|
||||||
|
|
||||||
# Try to get the latest commit hash for the specified branch
|
|
||||||
git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to get the default branch name (master or main)
|
|
||||||
get_default_branch() {
|
|
||||||
local repo_url="$1"
|
|
||||||
|
|
||||||
# Check for master first
|
|
||||||
if git ls-remote --heads "${repo_url}" master &>/dev/null; then
|
|
||||||
echo "master"
|
|
||||||
elif git ls-remote --heads "${repo_url}" main &>/dev/null; then
|
|
||||||
echo "main"
|
|
||||||
else
|
|
||||||
echo "master" # fallback
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to get the latest release tag from a GitHub repo
|
|
||||||
get_latest_release_tag() {
|
|
||||||
local owner_repo="$1"
|
|
||||||
curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \
|
|
||||||
| grep '"tag_name"' | head -1 | cut -d'"' -f4
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "=========================================="
|
|
||||||
echo "llama-swap-docker Build Script"
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Determine commit hashes / release tags - use env vars or auto-detect
|
|
||||||
# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries).
|
|
||||||
# For cuda builds (or whisper on any backend), use git commit hashes.
|
|
||||||
if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then
|
|
||||||
LLAMA_HASH="${LLAMA_COMMIT_HASH}"
|
|
||||||
echo "llama.cpp: Using provided version: ${LLAMA_HASH}"
|
|
||||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp")
|
|
||||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest release tag for llama.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}"
|
|
||||||
else
|
|
||||||
LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}")
|
|
||||||
LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}")
|
|
||||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then
|
|
||||||
WHISPER_HASH="${WHISPER_COMMIT_HASH}"
|
|
||||||
echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}"
|
|
||||||
else
|
|
||||||
WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}")
|
|
||||||
WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}")
|
|
||||||
if [[ -z "${WHISPER_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n "${SD_COMMIT_HASH:-}" ]]; then
|
|
||||||
SD_HASH="${SD_COMMIT_HASH}"
|
|
||||||
echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}"
|
|
||||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp")
|
|
||||||
if [[ -z "${SD_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}"
|
|
||||||
else
|
|
||||||
SD_BRANCH=$(get_default_branch "${SD_REPO}")
|
|
||||||
SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}")
|
|
||||||
if [[ -z "${SD_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Starting Docker build..."
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Build the Docker image with commit hashes as build args
|
|
||||||
# Build context is the repository root (..) so the Dockerfile can access Go source
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
||||||
BUILD_ARGS=(
|
|
||||||
--build-arg "BACKEND=${BACKEND}"
|
|
||||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
|
||||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
|
||||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
|
||||||
-t "${DOCKER_IMAGE_TAG}"
|
|
||||||
-f "${SCRIPT_DIR}/${DOCKERFILE}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if [[ "$NO_CACHE" == true ]]; then
|
|
||||||
BUILD_ARGS+=(--no-cache)
|
|
||||||
echo "Note: Building without cache"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Use docker buildx with a custom builder for parallelism control
|
|
||||||
# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var
|
|
||||||
# We need to use a custom builder with a buildkitd.toml config file
|
|
||||||
BUILDER_NAME="llama-swap-builder"
|
|
||||||
|
|
||||||
# Check if our custom builder exists with the right config, create/update if needed
|
|
||||||
if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then
|
|
||||||
echo "Creating custom buildx builder with max-parallelism=1..."
|
|
||||||
|
|
||||||
# Create buildkitd.toml config file
|
|
||||||
cat > buildkitd.toml << 'BUILDKIT_EOF'
|
|
||||||
[worker.oci]
|
|
||||||
max-parallelism = 1
|
|
||||||
BUILDKIT_EOF
|
|
||||||
|
|
||||||
# Create the builder with the config
|
|
||||||
docker buildx create --name "$BUILDER_NAME" \
|
|
||||||
--driver docker-container \
|
|
||||||
--buildkitd-config buildkitd.toml \
|
|
||||||
--use
|
|
||||||
else
|
|
||||||
# Switch to our builder
|
|
||||||
docker buildx use "$BUILDER_NAME"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Building with sequential stages (one at a time), each using all CPU cores..."
|
|
||||||
echo "Using builder: $BUILDER_NAME"
|
|
||||||
|
|
||||||
# Use docker buildx build with --load to load the image into Docker
|
|
||||||
# The --builder flag ensures we use our custom builder with max-parallelism=1
|
|
||||||
# Build context is the repository root so we can access Go source files
|
|
||||||
docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Verifying build artifacts..."
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Verify all expected binaries exist in the image
|
|
||||||
MISSING_BINARIES=()
|
|
||||||
|
|
||||||
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
|
|
||||||
if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
|
|
||||||
MISSING_BINARIES+=("${binary}")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
|
|
||||||
echo "ERROR: Build succeeded but the following binaries are missing from the image:"
|
|
||||||
for binary in "${MISSING_BINARIES[@]}"; do
|
|
||||||
echo " - ${binary}"
|
|
||||||
done
|
|
||||||
echo ""
|
|
||||||
echo "This usually indicates a build stage failure. Try running with --no-cache flag:"
|
|
||||||
echo " ./build-image.sh --vulkan --no-cache"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Build complete!"
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
echo "Image tag: ${DOCKER_IMAGE_TAG}"
|
|
||||||
echo ""
|
|
||||||
echo "Built with:"
|
|
||||||
echo " llama.cpp: ${LLAMA_HASH}"
|
|
||||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
|
||||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
|
||||||
echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
|
||||||
echo ""
|
|
||||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
echo "Run with:"
|
|
||||||
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
|
|
||||||
echo ""
|
|
||||||
echo "Note: For AMD GPUs, you may also need to mount render devices:"
|
|
||||||
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
|
|
||||||
else
|
|
||||||
echo "Run with:"
|
|
||||||
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
|
||||||
fi
|
|
||||||
@@ -1,203 +0,0 @@
|
|||||||
# Unified multi-stage Dockerfile for AI inference tools
|
|
||||||
# Supports CUDA and Vulkan backends via BACKEND build arg
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# docker buildx build --build-arg BACKEND=cuda -t llama-swap:unified-cuda .
|
|
||||||
# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:unified-vulkan .
|
|
||||||
# docker buildx build --build-arg BACKEND=cuda --build-arg CMAKE_CUDA_ARCHITECTURES="86;89" -t llama-swap:unified-cuda .
|
|
||||||
#
|
|
||||||
# Each project has its own install script that handles cloning, building,
|
|
||||||
# and installing binaries. Build stages are independent for cache efficiency.
|
|
||||||
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
|
|
||||||
# ── Builder bases ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda
|
|
||||||
|
|
||||||
ARG CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}
|
|
||||||
ENV CCACHE_DIR=/ccache
|
|
||||||
ENV CCACHE_MAXSIZE=2G
|
|
||||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
build-essential cmake git python3 python3-pip libssl-dev \
|
|
||||||
curl ca-certificates ccache make wget \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
# ──
|
|
||||||
|
|
||||||
FROM ubuntu:24.04 AS builder-base-vulkan
|
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV CCACHE_DIR=/ccache
|
|
||||||
ENV CCACHE_MAXSIZE=2G
|
|
||||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
build-essential cmake git python3 python3-pip libssl-dev \
|
|
||||||
curl ca-certificates ccache make wget software-properties-common \
|
|
||||||
libvulkan-dev glslang-tools spirv-tools vulkan-validationlayers glslc \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
# ── Select builder base by BACKEND ────────────────────────────────────
|
|
||||||
|
|
||||||
FROM builder-base-${BACKEND} AS builder-base
|
|
||||||
|
|
||||||
# ── Build whisper.cpp (fastest build, run first) ──────────────────────
|
|
||||||
|
|
||||||
FROM builder-base AS whisper-build
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
ARG WHISPER_COMMIT_HASH=master
|
|
||||||
COPY install-whisper.sh /build/
|
|
||||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
|
||||||
--mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \
|
|
||||||
BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}"
|
|
||||||
|
|
||||||
# ── Build stable-diffusion.cpp ────────────────────────────────────────
|
|
||||||
|
|
||||||
FROM builder-base AS sd-build
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
ARG SD_COMMIT_HASH=master
|
|
||||||
COPY install-sd.sh /build/
|
|
||||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
|
||||||
--mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \
|
|
||||||
BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}"
|
|
||||||
|
|
||||||
# ── Build llama.cpp (slowest build, run last) ─────────────────────────
|
|
||||||
|
|
||||||
FROM builder-base AS llama-build
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
ARG LLAMA_COMMIT_HASH=master
|
|
||||||
COPY install-llama.sh /build/
|
|
||||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
|
||||||
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
|
|
||||||
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
|
|
||||||
|
|
||||||
# ── Build ik_llama.cpp (CUDA only) ────────────────────────────────────
|
|
||||||
#
|
|
||||||
# Two named stages allow ARG BACKEND to select at build time:
|
|
||||||
# - ik-llama-cuda : real build (from builder-base-cuda)
|
|
||||||
# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely)
|
|
||||||
# BuildKit only evaluates the selected branch, so vulkan builds never
|
|
||||||
# pull nvidia/cuda:*-devel or compile ik_llama.cpp.
|
|
||||||
|
|
||||||
FROM builder-base-vulkan AS ik-llama-vulkan
|
|
||||||
RUN mkdir -p /install/bin
|
|
||||||
|
|
||||||
FROM builder-base-cuda AS ik-llama-cuda
|
|
||||||
ARG IK_LLAMA_COMMIT_HASH=main
|
|
||||||
COPY install-ik-llama.sh /build/
|
|
||||||
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
|
|
||||||
--mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \
|
|
||||||
bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}"
|
|
||||||
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
FROM ik-llama-${BACKEND} AS ik-llama-build
|
|
||||||
|
|
||||||
# ── Download llama-swap release binary ────────────────────────────────
|
|
||||||
|
|
||||||
FROM builder-base AS llama-swap-download
|
|
||||||
ARG LS_VERSION=latest
|
|
||||||
COPY install-llama-swap.sh /build/
|
|
||||||
RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
|
|
||||||
|
|
||||||
# ── Runtime bases ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
|
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
|
|
||||||
ENV PATH="/usr/local/bin:${PATH}"
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
libgomp1 python3 curl ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# CUDA stub drivers for container compatibility
|
|
||||||
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
|
|
||||||
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
|
|
||||||
|
|
||||||
# ──
|
|
||||||
|
|
||||||
FROM ubuntu:24.04 AS runtime-vulkan
|
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV PATH="/usr/local/bin:${PATH}"
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
libgomp1 libvulkan1 mesa-vulkan-drivers \
|
|
||||||
python3 curl ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# ── Select runtime base by BACKEND ────────────────────────────────────
|
|
||||||
|
|
||||||
FROM runtime-${BACKEND} AS runtime
|
|
||||||
|
|
||||||
ARG BACKEND=cuda
|
|
||||||
ARG LLAMA_COMMIT_HASH=unknown
|
|
||||||
ARG WHISPER_COMMIT_HASH=unknown
|
|
||||||
ARG SD_COMMIT_HASH=unknown
|
|
||||||
ARG IK_LLAMA_COMMIT_HASH=unknown
|
|
||||||
ARG RUN_UID=0
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
python3-numpy python3-sentencepiece \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Create non-root user when RUN_UID != 0
|
|
||||||
RUN if [ "$RUN_UID" != "0" ]; then \
|
|
||||||
groupadd --system --gid $RUN_UID llama-swap && \
|
|
||||||
useradd --system --uid $RUN_UID --gid $RUN_UID \
|
|
||||||
--home /app --shell /sbin/nologin llama-swap; \
|
|
||||||
fi && \
|
|
||||||
mkdir -p /etc/llama-swap/config && \
|
|
||||||
chown -R ${RUN_UID}:${RUN_UID} /etc/llama-swap
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Copy whisper.cpp binaries and libraries
|
|
||||||
COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/
|
|
||||||
COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/
|
|
||||||
COPY --from=whisper-build /install/lib/ /usr/local/lib/
|
|
||||||
|
|
||||||
# Copy stable-diffusion.cpp binaries and libraries
|
|
||||||
COPY --from=sd-build /install/bin/sd-server /usr/local/bin/
|
|
||||||
COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/
|
|
||||||
COPY --from=sd-build /install/lib/ /usr/local/lib/
|
|
||||||
|
|
||||||
# Copy llama.cpp binaries (statically linked)
|
|
||||||
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
|
|
||||||
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
|
|
||||||
|
|
||||||
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
|
||||||
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
|
||||||
|
|
||||||
# Copy llama-swap binary
|
|
||||||
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
|
||||||
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
|
||||||
|
|
||||||
RUN ldconfig
|
|
||||||
|
|
||||||
COPY config.example.yaml /etc/llama-swap/config/config.yaml
|
|
||||||
|
|
||||||
# Version tracking
|
|
||||||
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
|
||||||
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
|
|
||||||
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
|
|
||||||
echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \
|
|
||||||
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
|
|
||||||
echo "backend: ${BACKEND}" >> /versions.txt && \
|
|
||||||
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
|
||||||
|
|
||||||
RUN mkdir -p /models && chown ${RUN_UID}:${RUN_UID} /models
|
|
||||||
WORKDIR /models
|
|
||||||
USER ${RUN_UID}
|
|
||||||
ENTRYPOINT ["llama-swap"]
|
|
||||||
CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
# Unified Docker Container
|
|
||||||
|
|
||||||
These scripts create a custom llama-swap container that contains:
|
|
||||||
|
|
||||||
- llama-server for LLMs, rerank and embedding model support
|
|
||||||
- sd-server (stable-diffusion.cpp) for image generation
|
|
||||||
- whisper.cpp for ASR
|
|
||||||
|
|
||||||
@@ -1,303 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# Build script for unified container with version pinning
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# ./build-image.sh --cuda # Build CUDA image
|
|
||||||
# ./build-image.sh --vulkan # Build Vulkan image
|
|
||||||
# ./build-image.sh --cuda --no-cache # Build without cache
|
|
||||||
# LLAMA_REF=b1234 ./build-image.sh --vulkan # Pin llama.cpp to a commit hash
|
|
||||||
# LLAMA_REF=v1.2.3 ./build-image.sh --cuda # Pin llama.cpp to a tag
|
|
||||||
# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag
|
|
||||||
# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch
|
|
||||||
# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version
|
|
||||||
# IK_LLAMA_REF=main ./build-image.sh --cuda # Pin ik_llama.cpp to main branch (CUDA only)
|
|
||||||
#
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
BACKEND=""
|
|
||||||
NO_CACHE=false
|
|
||||||
|
|
||||||
for arg in "$@"; do
|
|
||||||
case $arg in
|
|
||||||
--cuda)
|
|
||||||
BACKEND="cuda"
|
|
||||||
;;
|
|
||||||
--vulkan)
|
|
||||||
BACKEND="vulkan"
|
|
||||||
;;
|
|
||||||
--no-cache)
|
|
||||||
NO_CACHE=true
|
|
||||||
;;
|
|
||||||
--help|-h)
|
|
||||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
|
||||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
|
||||||
echo " --no-cache Force rebuild without using Docker cache"
|
|
||||||
echo " --help, -h Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Environment variables:"
|
|
||||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)"
|
|
||||||
echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch"
|
|
||||||
echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch"
|
|
||||||
echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch"
|
|
||||||
echo " IK_LLAMA_REF Pin ik_llama.cpp to a commit, tag, or branch (CUDA only)"
|
|
||||||
echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ -z "$BACKEND" ]]; then
|
|
||||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
|
||||||
echo ""
|
|
||||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}}"
|
|
||||||
|
|
||||||
# Git repository URLs
|
|
||||||
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
|
||||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
|
||||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
|
||||||
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
|
|
||||||
IK_LLAMA_REPO="https://github.com/ikawrakow/ik_llama.cpp.git"
|
|
||||||
|
|
||||||
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
|
|
||||||
# Requires only: git, network access to the remote.
|
|
||||||
resolve_ref() {
|
|
||||||
local repo_url="$1"
|
|
||||||
local ref="$2"
|
|
||||||
|
|
||||||
# Full 40-char SHA — use as-is
|
|
||||||
if [[ "${ref}" =~ ^[0-9a-f]{40}$ ]]; then
|
|
||||||
echo "${ref}"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Try tag then branch (exact match)
|
|
||||||
local hash
|
|
||||||
hash=$(git ls-remote "${repo_url}" "refs/tags/${ref}" "refs/heads/${ref}" 2>/dev/null | head -1 | cut -f1)
|
|
||||||
if [[ -n "${hash}" ]]; then
|
|
||||||
echo "${hash}"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Short hash (7+ chars): scan all refs for a SHA with this prefix
|
|
||||||
if [[ "${ref}" =~ ^[0-9a-f]{7,}$ ]]; then
|
|
||||||
hash=$(git ls-remote "${repo_url}" 2>/dev/null | grep "^${ref}" | head -1 | cut -f1)
|
|
||||||
if [[ -n "${hash}" ]]; then
|
|
||||||
echo "${hash}"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "ERROR: Could not resolve ref '${ref}' for ${repo_url}" >&2
|
|
||||||
if [[ "${ref}" =~ ^[0-9a-f]+$ && ${#ref} -lt 7 ]]; then
|
|
||||||
echo " Short hashes must be at least 7 characters (got ${#ref})." >&2
|
|
||||||
else
|
|
||||||
echo " Tried: tag, branch, git ls-remote prefix match" >&2
|
|
||||||
fi
|
|
||||||
echo " Use a full 40-char SHA, a tag name, a branch name, or a 7+ char short hash." >&2
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Resolve HEAD of a repo without needing to know the default branch name.
|
|
||||||
get_latest_hash() {
|
|
||||||
git ls-remote "${1}" HEAD 2>/dev/null | head -1 | cut -f1
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "=========================================="
|
|
||||||
echo "llama-swap Unified Build (${BACKEND})"
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Resolve llama.cpp ref
|
|
||||||
if [[ -n "${LLAMA_REF:-}" ]]; then
|
|
||||||
LLAMA_HASH=$(resolve_ref "${LLAMA_REPO}" "${LLAMA_REF}") || exit 1
|
|
||||||
echo "llama.cpp: ${LLAMA_REF} -> ${LLAMA_HASH}"
|
|
||||||
else
|
|
||||||
LLAMA_HASH=$(get_latest_hash "${LLAMA_REPO}")
|
|
||||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "llama.cpp: latest HEAD: ${LLAMA_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Resolve whisper.cpp ref
|
|
||||||
if [[ -n "${WHISPER_REF:-}" ]]; then
|
|
||||||
WHISPER_HASH=$(resolve_ref "${WHISPER_REPO}" "${WHISPER_REF}") || exit 1
|
|
||||||
echo "whisper.cpp: ${WHISPER_REF} -> ${WHISPER_HASH}"
|
|
||||||
else
|
|
||||||
WHISPER_HASH=$(get_latest_hash "${WHISPER_REPO}")
|
|
||||||
if [[ -z "${WHISPER_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "whisper.cpp: latest HEAD: ${WHISPER_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Resolve stable-diffusion.cpp ref
|
|
||||||
if [[ -n "${SD_REF:-}" ]]; then
|
|
||||||
SD_HASH=$(resolve_ref "${SD_REPO}" "${SD_REF}") || exit 1
|
|
||||||
echo "stable-diffusion.cpp: ${SD_REF} -> ${SD_HASH}"
|
|
||||||
else
|
|
||||||
SD_HASH=$(get_latest_hash "${SD_REPO}")
|
|
||||||
if [[ -z "${SD_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Resolve ik_llama.cpp ref (CUDA only)
|
|
||||||
if [[ "$BACKEND" == "cuda" ]]; then
|
|
||||||
if [[ -n "${IK_LLAMA_REF:-}" ]]; then
|
|
||||||
IK_LLAMA_HASH=$(resolve_ref "${IK_LLAMA_REPO}" "${IK_LLAMA_REF}") || exit 1
|
|
||||||
echo "ik_llama.cpp: ${IK_LLAMA_REF} -> ${IK_LLAMA_HASH}"
|
|
||||||
else
|
|
||||||
IK_LLAMA_HASH=$(get_latest_hash "${IK_LLAMA_REPO}")
|
|
||||||
if [[ -z "${IK_LLAMA_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for ik_llama.cpp" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "ik_llama.cpp: latest HEAD: ${IK_LLAMA_HASH}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
IK_LLAMA_HASH="n/a"
|
|
||||||
echo "ik_llama.cpp: skipped (vulkan build)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Resolve llama-swap ref
|
|
||||||
if [[ -n "${LS_VERSION:-}" ]]; then
|
|
||||||
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
|
|
||||||
echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
|
|
||||||
else
|
|
||||||
LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
|
|
||||||
if [[ -z "${LS_HASH}" ]]; then
|
|
||||||
echo "ERROR: Could not determine latest commit for llama-swap" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "llama-swap: latest HEAD: ${LS_HASH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Starting Docker build..."
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
|
|
||||||
BUILD_ARGS=(
|
|
||||||
--build-arg "BACKEND=${BACKEND}"
|
|
||||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
|
||||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
|
||||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
|
||||||
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
|
|
||||||
--build-arg "LS_VERSION=${LS_HASH}"
|
|
||||||
-t "${DOCKER_IMAGE_TAG}"
|
|
||||||
-f "${SCRIPT_DIR}/Dockerfile"
|
|
||||||
)
|
|
||||||
|
|
||||||
if [[ "$NO_CACHE" == true ]]; then
|
|
||||||
BUILD_ARGS+=(--no-cache)
|
|
||||||
echo "Note: Building without cache"
|
|
||||||
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
|
|
||||||
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache"
|
|
||||||
BUILD_ARGS+=(
|
|
||||||
--cache-from "type=registry,ref=${CACHE_REF}"
|
|
||||||
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
|
|
||||||
)
|
|
||||||
echo "Note: Using registry cache (${CACHE_REF})"
|
|
||||||
fi
|
|
||||||
|
|
||||||
DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Verifying build artifacts..."
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
EXPECTED_BINARIES=(llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap)
|
|
||||||
if [[ "$BACKEND" == "cuda" ]]; then
|
|
||||||
EXPECTED_BINARIES+=(ik-llama-server)
|
|
||||||
fi
|
|
||||||
|
|
||||||
MISSING_BINARIES=()
|
|
||||||
for binary in "${EXPECTED_BINARIES[@]}"; do
|
|
||||||
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
|
|
||||||
MISSING_BINARIES+=("${binary}")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
|
|
||||||
echo "ERROR: Build succeeded but the following binaries are missing:"
|
|
||||||
for binary in "${MISSING_BINARIES[@]}"; do
|
|
||||||
echo " - ${binary}"
|
|
||||||
done
|
|
||||||
echo ""
|
|
||||||
echo "Try running with --no-cache flag:"
|
|
||||||
echo " ./build-image.sh --${BACKEND} --no-cache"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
VERIFIED_LIST="llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
|
||||||
if [[ "$BACKEND" == "cuda" ]]; then
|
|
||||||
VERIFIED_LIST="${VERIFIED_LIST}, ik-llama-server"
|
|
||||||
fi
|
|
||||||
echo "All expected binaries verified: ${VERIFIED_LIST}"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Building rootless image..."
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless"
|
|
||||||
docker buildx build --load -t "${ROOTLESS_TAG}" - <<EOF
|
|
||||||
FROM ${DOCKER_IMAGE_TAG}
|
|
||||||
USER root
|
|
||||||
RUN groupadd --system --gid 10001 llama-swap && \\
|
|
||||||
useradd --system --uid 10001 --gid 10001 \\
|
|
||||||
--home /app --shell /sbin/nologin llama-swap && \\
|
|
||||||
chown -R 10001:10001 /etc/llama-swap /models
|
|
||||||
USER 10001
|
|
||||||
EOF
|
|
||||||
|
|
||||||
echo "Rootless image built: ${ROOTLESS_TAG}"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo "Build complete!"
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
echo "Image tags:"
|
|
||||||
echo " ${DOCKER_IMAGE_TAG}"
|
|
||||||
echo " ${ROOTLESS_TAG}"
|
|
||||||
echo ""
|
|
||||||
echo "Built with:"
|
|
||||||
echo " llama.cpp: ${LLAMA_HASH}"
|
|
||||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
|
||||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
|
||||||
if [[ "$BACKEND" == "cuda" ]]; then
|
|
||||||
echo " ik_llama.cpp: ${IK_LLAMA_HASH}"
|
|
||||||
fi
|
|
||||||
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
|
||||||
echo ""
|
|
||||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
|
||||||
echo "Run with:"
|
|
||||||
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
|
|
||||||
echo ""
|
|
||||||
echo "Note: For AMD GPUs, you may also need:"
|
|
||||||
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
|
|
||||||
else
|
|
||||||
echo "Run with:"
|
|
||||||
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
|
||||||
fi
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
# placeholder example configuration
|
|
||||||
healthCheckTimeout: 300
|
|
||||||
logRequests: true
|
|
||||||
|
|
||||||
models:
|
|
||||||
"llama":
|
|
||||||
cmd: >
|
|
||||||
llama-server
|
|
||||||
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
|
||||||
--port ${PORT}
|
|
||||||
|
|
||||||
"whisper":
|
|
||||||
checkEndpoint: /v1/audio/transcriptions/
|
|
||||||
cmd: >
|
|
||||||
whisper-server
|
|
||||||
--port ${PORT}
|
|
||||||
--m /models/whisper.bin
|
|
||||||
--flash-attn
|
|
||||||
--request-path /v1/audio/transcriptions --inference-path ""
|
|
||||||
|
|
||||||
"image":
|
|
||||||
checkEndpoint: /
|
|
||||||
cmd: |
|
|
||||||
/app/sd-server
|
|
||||||
--listen-port 9999
|
|
||||||
--diffusion-fa
|
|
||||||
--diffusion-model /models/z_image_turbo-Q8_0.gguf
|
|
||||||
--vae /models/ae.safetensors
|
|
||||||
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
|
|
||||||
--offload-to-cpu
|
|
||||||
--cfg-scale 1.0
|
|
||||||
--height 512 --width 512
|
|
||||||
--steps 8
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Install ik_llama.cpp - clone, build, and install binaries
|
|
||||||
# Usage: ./install-ik-llama.sh <commit_hash>
|
|
||||||
# Note: CUDA only; always built against builder-base-cuda
|
|
||||||
set -e
|
|
||||||
|
|
||||||
COMMIT_HASH="${1:-main}"
|
|
||||||
|
|
||||||
mkdir -p /install/bin
|
|
||||||
|
|
||||||
# Clone and checkout (init-based so cache-mounted build dir doesn't break clone)
|
|
||||||
echo "=== Cloning ik_llama.cpp at ${COMMIT_HASH} ==="
|
|
||||||
mkdir -p /src/ik_llama.cpp
|
|
||||||
cd /src/ik_llama.cpp
|
|
||||||
if [ ! -d .git ]; then
|
|
||||||
git init
|
|
||||||
git remote add origin https://github.com/ikawrakow/ik_llama.cpp.git
|
|
||||||
fi
|
|
||||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
|
||||||
git checkout FETCH_HEAD
|
|
||||||
|
|
||||||
CMAKE_FLAGS=(
|
|
||||||
-DGGML_NATIVE=OFF
|
|
||||||
-DBUILD_SHARED_LIBS=OFF
|
|
||||||
-DCMAKE_BUILD_TYPE=Release
|
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
|
||||||
-DGGML_CUDA=ON
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
|
||||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda -Wl,--allow-shlib-undefined"
|
|
||||||
)
|
|
||||||
|
|
||||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
|
||||||
|
|
||||||
echo "=== Building ik_llama.cpp ==="
|
|
||||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
|
||||||
cmake --build build --config Release -j"$(nproc)" --target llama-server
|
|
||||||
|
|
||||||
if [ ! -f "build/bin/llama-server" ]; then
|
|
||||||
echo "FATAL: llama-server not found in build/bin/" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Install as ik-llama-server to avoid collision with llama.cpp's llama-server
|
|
||||||
cp "build/bin/llama-server" "/install/bin/ik-llama-server"
|
|
||||||
echo "=== ik_llama.cpp build complete ==="
|
|
||||||
ls -la /install/bin/
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Install llama-swap - download latest release binary from GitHub
|
|
||||||
# Usage: ./install-llama-swap.sh [version]
|
|
||||||
# version: release version number (e.g., "170") or "latest" (default)
|
|
||||||
set -e
|
|
||||||
|
|
||||||
VERSION="${1:-latest}"
|
|
||||||
REPO="mostlygeek/llama-swap"
|
|
||||||
|
|
||||||
mkdir -p /install/bin
|
|
||||||
|
|
||||||
# If a full commit hash is given, find the release tag that points to it
|
|
||||||
if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
|
|
||||||
echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
|
|
||||||
TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
|
|
||||||
| grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
|
|
||||||
if [ -n "${TAG}" ]; then
|
|
||||||
echo "Resolved to tag: ${TAG}"
|
|
||||||
VERSION="${TAG#v}"
|
|
||||||
else
|
|
||||||
echo "No release tag found for commit ${VERSION:0:7}, using latest"
|
|
||||||
VERSION="latest"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Strip leading 'v' prefix so both "198" and "v198" work
|
|
||||||
VERSION="${VERSION#v}"
|
|
||||||
|
|
||||||
# Resolve "latest" to actual version number
|
|
||||||
if [ "$VERSION" = "latest" ]; then
|
|
||||||
echo "=== Resolving latest llama-swap release ==="
|
|
||||||
VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" \
|
|
||||||
| grep '"tag_name"' | head -1 | cut -d'"' -f4 | sed 's/^v//')
|
|
||||||
if [ -z "$VERSION" ]; then
|
|
||||||
echo "FATAL: Could not determine latest release version" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Latest version: ${VERSION}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Download and extract
|
|
||||||
URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_amd64.tar.gz"
|
|
||||||
echo "=== Downloading llama-swap v${VERSION} ==="
|
|
||||||
echo "URL: $URL"
|
|
||||||
curl -fSL -o /tmp/llama-swap.tar.gz "$URL"
|
|
||||||
tar -xzf /tmp/llama-swap.tar.gz -C /install/bin/
|
|
||||||
rm /tmp/llama-swap.tar.gz
|
|
||||||
|
|
||||||
# Validate
|
|
||||||
if [ ! -x "/install/bin/llama-swap" ]; then
|
|
||||||
echo "FATAL: llama-swap binary not found or not executable" >&2
|
|
||||||
ls -la /install/bin/ >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "$VERSION" > /install/llama-swap-version
|
|
||||||
|
|
||||||
echo "=== llama-swap v${VERSION} installed ==="
|
|
||||||
ls -la /install/bin/llama-swap
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Install llama.cpp - clone, build, and install binaries
|
|
||||||
# Usage: BACKEND=cuda|vulkan ./install-llama.sh <commit_hash>
|
|
||||||
set -e
|
|
||||||
|
|
||||||
COMMIT_HASH="${1:-master}"
|
|
||||||
BACKEND="${BACKEND:-cuda}"
|
|
||||||
|
|
||||||
mkdir -p /install/bin
|
|
||||||
|
|
||||||
# Clone and checkout (init-based so cache-mounted /src/llama.cpp/build dir doesn't break clone)
|
|
||||||
echo "=== Cloning llama.cpp at ${COMMIT_HASH} ==="
|
|
||||||
mkdir -p /src/llama.cpp
|
|
||||||
cd /src/llama.cpp
|
|
||||||
if [ ! -d .git ]; then
|
|
||||||
git init
|
|
||||||
git remote add origin https://github.com/ggml-org/llama.cpp.git
|
|
||||||
fi
|
|
||||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
|
||||||
git checkout FETCH_HEAD
|
|
||||||
|
|
||||||
# Common cmake flags
|
|
||||||
CMAKE_FLAGS=(
|
|
||||||
-DGGML_NATIVE=OFF
|
|
||||||
-DBUILD_SHARED_LIBS=OFF
|
|
||||||
-DCMAKE_BUILD_TYPE=Release
|
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
|
||||||
-DLLAMA_BUILD_TESTS=OFF
|
|
||||||
)
|
|
||||||
|
|
||||||
if [ "$BACKEND" = "cuda" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=ON
|
|
||||||
-DGGML_VULKAN=OFF
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
|
||||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
|
||||||
)
|
|
||||||
elif [ "$BACKEND" = "vulkan" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=OFF
|
|
||||||
-DGGML_VULKAN=ON
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
TARGETS=(llama-cli llama-server)
|
|
||||||
|
|
||||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
|
||||||
|
|
||||||
echo "=== Building llama.cpp for ${BACKEND} ==="
|
|
||||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
|
||||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
|
||||||
|
|
||||||
for bin in "${TARGETS[@]}"; do
|
|
||||||
if [ ! -f "build/bin/$bin" ]; then
|
|
||||||
echo "FATAL: $bin not found in build/bin/" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
cp "build/bin/$bin" "/install/bin/"
|
|
||||||
done
|
|
||||||
echo "=== llama.cpp build complete ==="
|
|
||||||
ls -la /install/bin/
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Install stable-diffusion.cpp - clone, build, and install binaries and library
|
|
||||||
# Usage: BACKEND=cuda|vulkan ./install-sd.sh <commit_hash>
|
|
||||||
set -e
|
|
||||||
|
|
||||||
COMMIT_HASH="${1:-master}"
|
|
||||||
BACKEND="${BACKEND:-cuda}"
|
|
||||||
|
|
||||||
mkdir -p /install/bin /install/lib
|
|
||||||
|
|
||||||
# Clone and checkout (init-based so cache-mounted /src/stable-diffusion.cpp/build dir doesn't break clone)
|
|
||||||
echo "=== Cloning stable-diffusion.cpp at ${COMMIT_HASH} ==="
|
|
||||||
mkdir -p /src/stable-diffusion.cpp
|
|
||||||
cd /src/stable-diffusion.cpp
|
|
||||||
if [ ! -d .git ]; then
|
|
||||||
git init
|
|
||||||
git remote add origin https://github.com/leejet/stable-diffusion.cpp.git
|
|
||||||
fi
|
|
||||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
|
||||||
git checkout FETCH_HEAD
|
|
||||||
git submodule update --init --recursive --depth=1
|
|
||||||
|
|
||||||
# Common cmake flags
|
|
||||||
CMAKE_FLAGS=(
|
|
||||||
-DGGML_NATIVE=OFF
|
|
||||||
-DCMAKE_BUILD_TYPE=Release
|
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
|
||||||
-DSD_BUILD_EXAMPLES=ON
|
|
||||||
)
|
|
||||||
|
|
||||||
if [ "$BACKEND" = "cuda" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=ON
|
|
||||||
-DGGML_VULKAN=OFF
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
|
||||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
|
||||||
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
|
||||||
-DSD_CUDA=ON
|
|
||||||
)
|
|
||||||
elif [ "$BACKEND" = "vulkan" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=OFF
|
|
||||||
-DGGML_VULKAN=ON
|
|
||||||
-DSD_VULKAN=ON
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
TARGETS=(stable-diffusion sd-cli sd-server)
|
|
||||||
|
|
||||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
|
||||||
|
|
||||||
echo "=== Building stable-diffusion.cpp for ${BACKEND} ==="
|
|
||||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
|
||||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
|
||||||
|
|
||||||
for bin in sd-cli sd-server; do
|
|
||||||
if [ ! -f "build/bin/$bin" ]; then
|
|
||||||
echo "FATAL: $bin not found in build/bin/" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
cp "build/bin/$bin" "/install/bin/"
|
|
||||||
done
|
|
||||||
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
|
|
||||||
|
|
||||||
echo "=== stable-diffusion.cpp build complete ==="
|
|
||||||
ls -la /install/bin/ /install/lib/
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Install whisper.cpp - clone, build, and install binaries
|
|
||||||
# Usage: BACKEND=cuda|vulkan ./install-whisper.sh <commit_hash>
|
|
||||||
set -e
|
|
||||||
|
|
||||||
COMMIT_HASH="${1:-master}"
|
|
||||||
BACKEND="${BACKEND:-cuda}"
|
|
||||||
|
|
||||||
mkdir -p /install/bin /install/lib
|
|
||||||
|
|
||||||
# Clone and checkout (init-based so cache-mounted /src/whisper.cpp/build dir doesn't break clone)
|
|
||||||
echo "=== Cloning whisper.cpp at ${COMMIT_HASH} ==="
|
|
||||||
mkdir -p /src/whisper.cpp
|
|
||||||
cd /src/whisper.cpp
|
|
||||||
if [ ! -d .git ]; then
|
|
||||||
git init
|
|
||||||
git remote add origin https://github.com/ggml-org/whisper.cpp.git
|
|
||||||
fi
|
|
||||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
|
||||||
git checkout FETCH_HEAD
|
|
||||||
|
|
||||||
# Common cmake flags
|
|
||||||
CMAKE_FLAGS=(
|
|
||||||
-DGGML_NATIVE=OFF
|
|
||||||
-DCMAKE_BUILD_TYPE=Release
|
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
|
||||||
)
|
|
||||||
|
|
||||||
if [ "$BACKEND" = "cuda" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=ON
|
|
||||||
-DGGML_VULKAN=OFF
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
|
||||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
|
||||||
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
|
||||||
)
|
|
||||||
elif [ "$BACKEND" = "vulkan" ]; then
|
|
||||||
CMAKE_FLAGS+=(
|
|
||||||
-DGGML_CUDA=OFF
|
|
||||||
-DGGML_VULKAN=ON
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
TARGETS=(whisper-cli whisper-server)
|
|
||||||
|
|
||||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
|
||||||
|
|
||||||
echo "=== Building whisper.cpp for ${BACKEND} ==="
|
|
||||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
|
||||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
|
||||||
|
|
||||||
for bin in "${TARGETS[@]}"; do
|
|
||||||
if [ ! -f "build/bin/$bin" ]; then
|
|
||||||
echo "FATAL: $bin not found in build/bin/" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
cp "build/bin/$bin" "/install/bin/"
|
|
||||||
done
|
|
||||||
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
|
|
||||||
|
|
||||||
echo "=== whisper.cpp build complete ==="
|
|
||||||
ls -la /install/bin/
|
|
||||||
@@ -319,29 +319,6 @@ models:
|
|||||||
# - recommended to be omitted and the default used
|
# - recommended to be omitted and the default used
|
||||||
concurrencyLimit: 0
|
concurrencyLimit: 0
|
||||||
|
|
||||||
# timeouts: configure proxy connection timeouts for this model
|
|
||||||
# - optional, defaults shown below
|
|
||||||
# - useful for models on slower hardware that need longer timeouts
|
|
||||||
# - increase responseHeader to avoid "timeout awaiting response headers" errors
|
|
||||||
# - set any value to 0 to disable that timeout (not recommended)
|
|
||||||
timeouts:
|
|
||||||
# connect: TCP connection timeout in seconds
|
|
||||||
# - default: 30
|
|
||||||
connect: 30
|
|
||||||
|
|
||||||
# responseHeader: time to wait for response headers in seconds
|
|
||||||
# - default: 60
|
|
||||||
# - for slow image generation or large models, consider increasing to 300+ seconds
|
|
||||||
responseHeader: 60
|
|
||||||
|
|
||||||
# tlsHandshake: TLS handshake timeout in seconds
|
|
||||||
# - default: 10
|
|
||||||
tlsHandshake: 10
|
|
||||||
|
|
||||||
# idleConn: idle connection timeout in seconds
|
|
||||||
# - default: 90
|
|
||||||
idleConn: 90
|
|
||||||
|
|
||||||
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
||||||
# - optional, default: undefined (use global setting)
|
# - optional, default: undefined (use global setting)
|
||||||
sendLoadingState: false
|
sendLoadingState: false
|
||||||
@@ -467,17 +444,6 @@ peers:
|
|||||||
# - required
|
# - required
|
||||||
# - requested path to llama-swap will be appended to the end of the proxy value
|
# - requested path to llama-swap will be appended to the end of the proxy value
|
||||||
proxy: http://192.168.1.23
|
proxy: http://192.168.1.23
|
||||||
|
|
||||||
# timeouts: configure proxy connection timeouts for this peer
|
|
||||||
# - optional, defaults shown below
|
|
||||||
# - useful when the peer runs on slower hardware
|
|
||||||
# - set any value to 0 to disable that timeout (not recommended)
|
|
||||||
timeouts:
|
|
||||||
connect: 30
|
|
||||||
responseHeader: 60
|
|
||||||
tlsHandshake: 10
|
|
||||||
idleConn: 90
|
|
||||||
|
|
||||||
# models: a list of models served by the peer
|
# models: a list of models served by the peer
|
||||||
# - required
|
# - required
|
||||||
models:
|
models:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
module github.com/mostlygeek/llama-swap
|
module github.com/mostlygeek/llama-swap
|
||||||
|
|
||||||
go 1.26.1
|
go 1.25.4
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/billziss-gh/golib v0.2.0
|
github.com/billziss-gh/golib v0.2.0
|
||||||
|
|||||||
@@ -124,7 +124,6 @@ type Config struct {
|
|||||||
LogToStdout string `yaml:"logToStdout"`
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
CaptureBuffer int `yaml:"captureBuffer"`
|
CaptureBuffer int `yaml:"captureBuffer"`
|
||||||
GlobalTTL int `yaml:"globalTTL"`
|
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||||
@@ -204,7 +203,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
LogToStdout: LogToStdoutProxy,
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
CaptureBuffer: 5,
|
CaptureBuffer: 5,
|
||||||
GlobalTTL: 0,
|
|
||||||
}
|
}
|
||||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
@@ -218,10 +216,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.GlobalTTL < 0 {
|
|
||||||
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
|
||||||
}
|
|
||||||
|
|
||||||
switch config.LogToStdout {
|
switch config.LogToStdout {
|
||||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||||
default:
|
default:
|
||||||
@@ -261,15 +255,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||||
|
|
||||||
// set model TTL to globalTTL it is the default value
|
|
||||||
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
|
||||||
modelConfig.UnloadAfter = config.GlobalTTL
|
|
||||||
}
|
|
||||||
|
|
||||||
if modelConfig.UnloadAfter < 0 {
|
|
||||||
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate model macros
|
// Validate model macros
|
||||||
for _, macro := range modelConfig.Macros {
|
for _, macro := range modelConfig.Macros {
|
||||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||||
@@ -308,26 +293,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
|
||||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
|
||||||
|
|
||||||
// Substitute macros in SetParamsByID keys and values
|
|
||||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
|
||||||
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
|
||||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
|
||||||
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
|
||||||
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
|
||||||
if err != nil {
|
|
||||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
|
||||||
}
|
|
||||||
newParamMap, ok := newValAny.(map[string]any)
|
|
||||||
if !ok {
|
|
||||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
|
||||||
}
|
|
||||||
newSetParamsByID[newKey] = newParamMap
|
|
||||||
}
|
|
||||||
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
|
||||||
}
|
|
||||||
|
|
||||||
// Substitute in metadata (type-preserving)
|
// Substitute in metadata (type-preserving)
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
@@ -353,8 +318,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
|
||||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
|
||||||
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||||
@@ -374,8 +337,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
"proxy": modelConfig.Proxy,
|
"proxy": modelConfig.Proxy,
|
||||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||||
"name": modelConfig.Name,
|
|
||||||
"description": modelConfig.Description,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for fieldName, fieldValue := range fieldMap {
|
for fieldName, fieldValue := range fieldMap {
|
||||||
@@ -398,34 +359,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate SetParamsByID keys and values
|
|
||||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
|
||||||
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
|
||||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
|
||||||
}
|
|
||||||
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
|
||||||
return Config{}, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
|
||||||
for key := range modelConfig.Filters.SetParamsByID {
|
|
||||||
if key == modelId {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, exists := config.Models[key]; exists {
|
|
||||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
|
||||||
}
|
|
||||||
if existingModel, exists := config.aliases[key]; exists {
|
|
||||||
if existingModel != modelId {
|
|
||||||
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
|
||||||
}
|
|
||||||
continue // already registered as explicit alias for this model
|
|
||||||
}
|
|
||||||
config.aliases[key] = modelId
|
|
||||||
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -163,15 +163,6 @@ groups:
|
|||||||
|
|
||||||
modelLoadingState := false
|
modelLoadingState := false
|
||||||
|
|
||||||
defaultTimeout := TimeoutsConfig{
|
|
||||||
Connect: 30,
|
|
||||||
KeepAlive: 30,
|
|
||||||
ResponseHeader: 0,
|
|
||||||
TLSHandshake: 10,
|
|
||||||
ExpectContinue: 1,
|
|
||||||
IdleConn: 90,
|
|
||||||
}
|
|
||||||
|
|
||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
@@ -196,7 +187,6 @@ groups:
|
|||||||
Name: "Model 1",
|
Name: "Model 1",
|
||||||
Description: "This is model 1",
|
Description: "This is model 1",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model2": {
|
"model2": {
|
||||||
Cmd: "path/to/server --arg1 one",
|
Cmd: "path/to/server --arg1 one",
|
||||||
@@ -205,7 +195,6 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model3": {
|
"model3": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -214,7 +203,6 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model4": {
|
"model4": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -223,7 +211,6 @@ groups:
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestConfig_GroupMemberIsUnique(t *testing.T) {
|
func TestConfig_GroupMemberIsUnique(t *testing.T) {
|
||||||
@@ -849,71 +848,6 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConfig_GlobalTTL(t *testing.T) {
|
|
||||||
t.Run("globalTTL sets default for models", func(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
globalTTL: 300
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: server --port ${PORT}
|
|
||||||
`
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, 300, config.GlobalTTL)
|
|
||||||
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
globalTTL: 300
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: server --port ${PORT}
|
|
||||||
ttl: 0
|
|
||||||
`
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
globalTTL: 300
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: server --port ${PORT}
|
|
||||||
ttl: 600
|
|
||||||
`
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("globalTTL defaults to 0", func(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: server --port ${PORT}
|
|
||||||
`
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, 0, config.GlobalTTL)
|
|
||||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("negative globalTTL rejected", func(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
globalTTL: -1
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: server --port ${PORT}
|
|
||||||
`
|
|
||||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.Error(t, err)
|
|
||||||
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_EnvMacros(t *testing.T) {
|
func TestConfig_EnvMacros(t *testing.T) {
|
||||||
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||||
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||||
@@ -1439,108 +1373,3 @@ models:
|
|||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConfig_TimeoutsParsing(t *testing.T) {
|
|
||||||
configYaml := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: test-server --port ${PORT}
|
|
||||||
timeouts:
|
|
||||||
connect: 45
|
|
||||||
responseHeader: 120
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
modelConfig, found := config.Models["model1"]
|
|
||||||
require.True(t, found, "model1 should exist in config")
|
|
||||||
|
|
||||||
assert.Equal(t, 45, modelConfig.Timeouts.Connect)
|
|
||||||
assert.Equal(t, 120, modelConfig.Timeouts.ResponseHeader)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_TimeoutsDefaults(t *testing.T) {
|
|
||||||
configYaml := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: test-server --port ${PORT}
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
modelConfig, found := config.Models["model1"]
|
|
||||||
require.True(t, found, "model1 should exist in config")
|
|
||||||
|
|
||||||
// Default values should be set during unmarshaling
|
|
||||||
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
|
|
||||||
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
|
|
||||||
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
|
|
||||||
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
|
|
||||||
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_TimeoutsZeroAllowed(t *testing.T) {
|
|
||||||
configYaml := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: test-server --port ${PORT}
|
|
||||||
timeouts:
|
|
||||||
connect: 0
|
|
||||||
responseHeader: 0
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
modelConfig, found := config.Models["model1"]
|
|
||||||
require.True(t, found, "model1 should exist in config")
|
|
||||||
|
|
||||||
// Explicit 0 should be preserved (disables timeout)
|
|
||||||
assert.Equal(t, 0, modelConfig.Timeouts.Connect)
|
|
||||||
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_PeerTimeoutsParsing(t *testing.T) {
|
|
||||||
configYaml := `
|
|
||||||
peers:
|
|
||||||
peer1:
|
|
||||||
proxy: http://example.com
|
|
||||||
models: [model1]
|
|
||||||
timeouts:
|
|
||||||
connect: 45
|
|
||||||
responseHeader: 120
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
peerConfig, found := config.Peers["peer1"]
|
|
||||||
require.True(t, found, "peer1 should exist in config")
|
|
||||||
|
|
||||||
assert.Equal(t, 45, peerConfig.Timeouts.Connect)
|
|
||||||
assert.Equal(t, 120, peerConfig.Timeouts.ResponseHeader)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_PeerTimeoutsDefaults(t *testing.T) {
|
|
||||||
configYaml := `
|
|
||||||
peers:
|
|
||||||
peer1:
|
|
||||||
proxy: http://example.com
|
|
||||||
models: [model1]
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
peerConfig, found := config.Peers["peer1"]
|
|
||||||
require.True(t, found, "peer1 should exist in config")
|
|
||||||
|
|
||||||
// Default values should be set during unmarshaling
|
|
||||||
assert.Equal(t, 30, peerConfig.Timeouts.Connect)
|
|
||||||
assert.Equal(t, 60, peerConfig.Timeouts.ResponseHeader)
|
|
||||||
assert.Equal(t, 10, peerConfig.Timeouts.TLSHandshake)
|
|
||||||
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
|
|
||||||
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -155,15 +155,6 @@ groups:
|
|||||||
|
|
||||||
modelLoadingState := false
|
modelLoadingState := false
|
||||||
|
|
||||||
defaultTimeout := TimeoutsConfig{
|
|
||||||
Connect: 30,
|
|
||||||
KeepAlive: 30,
|
|
||||||
ResponseHeader: 0,
|
|
||||||
TLSHandshake: 10,
|
|
||||||
ExpectContinue: 1,
|
|
||||||
IdleConn: 90,
|
|
||||||
}
|
|
||||||
|
|
||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
@@ -182,7 +173,6 @@ groups:
|
|||||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||||
CheckEndpoint: "/health",
|
CheckEndpoint: "/health",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model2": {
|
"model2": {
|
||||||
Cmd: "path/to/server --arg1 one",
|
Cmd: "path/to/server --arg1 one",
|
||||||
@@ -192,7 +182,6 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model3": {
|
"model3": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -202,7 +191,6 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
"model4": {
|
"model4": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -212,7 +200,6 @@ groups:
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
Timeouts: defaultTimeout,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -20,12 +20,6 @@ type Filters struct {
|
|||||||
// SetParams is a dictionary of parameters to set/override in requests
|
// SetParams is a dictionary of parameters to set/override in requests
|
||||||
// Protected params (like "model") cannot be set
|
// Protected params (like "model") cannot be set
|
||||||
SetParams map[string]any `yaml:"setParams"`
|
SetParams map[string]any `yaml:"setParams"`
|
||||||
|
|
||||||
// SetParamsByID maps requested model IDs to parameters to set/override in requests.
|
|
||||||
// Useful with aliases: a single loaded model can behave differently depending on
|
|
||||||
// which alias the client used. Applied after SetParams, so it can override those values.
|
|
||||||
// Protected params (like "model") cannot be set.
|
|
||||||
SetParamsByID map[string]map[string]any `yaml:"setParamsByID"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SanitizedStripParams returns a sorted list of parameters to strip,
|
// SanitizedStripParams returns a sorted list of parameters to strip,
|
||||||
@@ -57,33 +51,6 @@ func (f Filters) SanitizedStripParams() []string {
|
|||||||
return cleaned
|
return cleaned
|
||||||
}
|
}
|
||||||
|
|
||||||
// SanitizedSetParamsByID returns the params to set for the given requestedModelID,
|
|
||||||
// with protected params removed and keys sorted for consistent iteration order.
|
|
||||||
// Returns nil if the ID has no entry or all its params are protected.
|
|
||||||
func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) {
|
|
||||||
if len(f.SetParamsByID) == 0 {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
params, found := f.SetParamsByID[requestedModelID]
|
|
||||||
if !found || len(params) == 0 {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
result := make(map[string]any, len(params))
|
|
||||||
keys := make([]string, 0, len(params))
|
|
||||||
for key, value := range params {
|
|
||||||
if slices.Contains(ProtectedParams, key) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
result[key] = value
|
|
||||||
keys = append(keys, key)
|
|
||||||
}
|
|
||||||
sort.Strings(keys)
|
|
||||||
if len(result) == 0 {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
return result, keys
|
|
||||||
}
|
|
||||||
|
|
||||||
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
||||||
// and keys sorted for consistent iteration order
|
// and keys sorted for consistent iteration order
|
||||||
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
||||||
|
|||||||
@@ -162,123 +162,6 @@ func TestFilters_SanitizedSetParams(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFilters_SanitizedSetParamsByID(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
setParamsByID map[string]map[string]any
|
|
||||||
requestedModelID string
|
|
||||||
wantParams map[string]any
|
|
||||||
wantKeys []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "empty SetParamsByID returns nil",
|
|
||||||
setParamsByID: nil,
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: nil,
|
|
||||||
wantKeys: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "empty map returns nil",
|
|
||||||
setParamsByID: map[string]map[string]any{},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: nil,
|
|
||||||
wantKeys: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "non-matching model ID returns nil",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model2": {"temperature": 0.9},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: nil,
|
|
||||||
wantKeys: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "matching model ID returns correct params",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model1": {"temperature": 0.7, "top_p": 0.9},
|
|
||||||
"model2": {"temperature": 0.5},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: map[string]any{
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_p": 0.9,
|
|
||||||
},
|
|
||||||
wantKeys: []string{"temperature", "top_p"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "protected param model is filtered out",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model1": {
|
|
||||||
"model": "should-be-filtered",
|
|
||||||
"temperature": 0.7,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: map[string]any{
|
|
||||||
"temperature": 0.7,
|
|
||||||
},
|
|
||||||
wantKeys: []string{"temperature"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "only protected param returns nil",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model1": {
|
|
||||||
"model": "should-be-filtered",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: nil,
|
|
||||||
wantKeys: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "keys are sorted",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model1": {
|
|
||||||
"z_param": "z",
|
|
||||||
"a_param": "a",
|
|
||||||
"m_param": "m",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1",
|
|
||||||
wantParams: map[string]any{
|
|
||||||
"z_param": "z",
|
|
||||||
"a_param": "a",
|
|
||||||
"m_param": "m",
|
|
||||||
},
|
|
||||||
wantKeys: []string{"a_param", "m_param", "z_param"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "alias style key lookup",
|
|
||||||
setParamsByID: map[string]map[string]any{
|
|
||||||
"model1:high": {"reasoning_effort": "high"},
|
|
||||||
"model1:low": {"reasoning_effort": "low"},
|
|
||||||
},
|
|
||||||
requestedModelID: "model1:high",
|
|
||||||
wantParams: map[string]any{
|
|
||||||
"reasoning_effort": "high",
|
|
||||||
},
|
|
||||||
wantKeys: []string{"reasoning_effort"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
f := Filters{SetParamsByID: tt.setParamsByID}
|
|
||||||
gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID)
|
|
||||||
|
|
||||||
if tt.wantParams == nil {
|
|
||||||
assert.Nil(t, gotParams)
|
|
||||||
assert.Nil(t, gotKeys)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
assert.Equal(t, tt.wantKeys, gotKeys)
|
|
||||||
assert.Equal(t, tt.wantParams, gotParams)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProtectedParams(t *testing.T) {
|
func TestProtectedParams(t *testing.T) {
|
||||||
// Verify that "model" is protected
|
// Verify that "model" is protected
|
||||||
assert.Contains(t, ProtectedParams, "model")
|
assert.Contains(t, ProtectedParams, "model")
|
||||||
|
|||||||
@@ -104,62 +104,6 @@ models:
|
|||||||
assert.Contains(t, err.Error(), "self-reference")
|
assert.Contains(t, err.Error(), "self-reference")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test macro substitution in name and description fields
|
|
||||||
func TestConfig_MacroInNameAndDescription(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
startPort: 10000
|
|
||||||
macros:
|
|
||||||
"VARIANT": "Q4_K_M"
|
|
||||||
"FAMILY": "llama"
|
|
||||||
|
|
||||||
models:
|
|
||||||
my-model:
|
|
||||||
cmd: echo ok
|
|
||||||
proxy: http://localhost:8080
|
|
||||||
name: "${FAMILY} ${VARIANT}"
|
|
||||||
description: "A ${FAMILY} model in ${VARIANT} format"
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
|
|
||||||
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test MODEL_ID macro in name and description fields
|
|
||||||
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
startPort: 10000
|
|
||||||
models:
|
|
||||||
llama-3b:
|
|
||||||
cmd: echo ok
|
|
||||||
proxy: http://localhost:8080
|
|
||||||
name: "Model: ${MODEL_ID}"
|
|
||||||
description: "Running ${MODEL_ID}"
|
|
||||||
`
|
|
||||||
|
|
||||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
|
|
||||||
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test unknown macro in name or description returns an error
|
|
||||||
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
startPort: 10000
|
|
||||||
models:
|
|
||||||
test:
|
|
||||||
cmd: echo ok
|
|
||||||
proxy: http://localhost:8080
|
|
||||||
name: "Model ${UNDEFINED}"
|
|
||||||
`
|
|
||||||
|
|
||||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.Error(t, err)
|
|
||||||
assert.Contains(t, err.Error(), "UNDEFINED")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test undefined macro reference error
|
// Test undefined macro reference error
|
||||||
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
|
|||||||
@@ -5,21 +5,6 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
MODEL_CONFIG_DEFAULT_TTL = -1
|
|
||||||
)
|
|
||||||
|
|
||||||
// TimeoutsConfig holds timeout settings for proxy connections
|
|
||||||
// 0 = no timeout
|
|
||||||
type TimeoutsConfig struct {
|
|
||||||
Connect int `yaml:"connect"`
|
|
||||||
KeepAlive int `yaml:"keepalive"`
|
|
||||||
ResponseHeader int `yaml:"responseHeader"`
|
|
||||||
TLSHandshake int `yaml:"tlsHandshake"`
|
|
||||||
ExpectContinue int `yaml:"expectContinue"`
|
|
||||||
IdleConn int `yaml:"idleConn"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
CmdStop string `yaml:"cmdStop"`
|
CmdStop string `yaml:"cmdStop"`
|
||||||
@@ -51,9 +36,6 @@ type ModelConfig struct {
|
|||||||
|
|
||||||
// override global setting
|
// override global setting
|
||||||
SendLoadingState *bool `yaml:"sendLoadingState"`
|
SendLoadingState *bool `yaml:"sendLoadingState"`
|
||||||
|
|
||||||
// Timeout settings for proxy connections
|
|
||||||
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
@@ -65,22 +47,12 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/health",
|
CheckEndpoint: "/health",
|
||||||
UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
|
UnloadAfter: 0,
|
||||||
Unlisted: false,
|
Unlisted: false,
|
||||||
UseModelName: "",
|
UseModelName: "",
|
||||||
ConcurrencyLimit: 0,
|
ConcurrencyLimit: 0,
|
||||||
Name: "",
|
Name: "",
|
||||||
Description: "",
|
Description: "",
|
||||||
|
|
||||||
// matches http.DefaultTransport
|
|
||||||
Timeouts: TimeoutsConfig{
|
|
||||||
Connect: 30,
|
|
||||||
KeepAlive: 30,
|
|
||||||
ResponseHeader: 0,
|
|
||||||
TLSHandshake: 10,
|
|
||||||
ExpectContinue: 1,
|
|
||||||
IdleConn: 90,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// the default cmdStop to taskkill /f /t /pid ${PID}
|
// the default cmdStop to taskkill /f /t /pid ${PID}
|
||||||
|
|||||||
@@ -73,72 +73,6 @@ models:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConfig_SetParamsByIDAutoAlias(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: path/to/cmd --port ${PORT}
|
|
||||||
filters:
|
|
||||||
setParamsByID:
|
|
||||||
"${MODEL_ID}:high":
|
|
||||||
reasoning_effort: high
|
|
||||||
"${MODEL_ID}:low":
|
|
||||||
reasoning_effort: low
|
|
||||||
`
|
|
||||||
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
// Keys (other than the model's own ID) should be registered as aliases
|
|
||||||
realName, found := cfg.RealModelName("model1:high")
|
|
||||||
assert.True(t, found, "model1:high should be an auto-registered alias")
|
|
||||||
assert.Equal(t, "model1", realName)
|
|
||||||
|
|
||||||
realName, found = cfg.RealModelName("model1:low")
|
|
||||||
assert.True(t, found, "model1:low should be an auto-registered alias")
|
|
||||||
assert.Equal(t, "model1", realName)
|
|
||||||
|
|
||||||
// Auto-aliases should also appear in modelConfig.Aliases
|
|
||||||
aliases := cfg.Models["model1"].Aliases
|
|
||||||
assert.Contains(t, aliases, "model1:high")
|
|
||||||
assert.Contains(t, aliases, "model1:low")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: path/to/cmd --port ${PORT}
|
|
||||||
filters:
|
|
||||||
setParamsByID:
|
|
||||||
model2:
|
|
||||||
reasoning_effort: high
|
|
||||||
model2:
|
|
||||||
cmd: path/to/cmd --port ${PORT}
|
|
||||||
`
|
|
||||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.ErrorContains(t, err, "conflicts with an existing model ID")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) {
|
|
||||||
content := `
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: path/to/cmd --port ${PORT}
|
|
||||||
filters:
|
|
||||||
setParamsByID:
|
|
||||||
"shared-alias":
|
|
||||||
reasoning_effort: high
|
|
||||||
model2:
|
|
||||||
cmd: path/to/cmd --port ${PORT}
|
|
||||||
filters:
|
|
||||||
setParamsByID:
|
|
||||||
"shared-alias":
|
|
||||||
reasoning_effort: low
|
|
||||||
`
|
|
||||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
|
||||||
assert.ErrorContains(t, err, "duplicate alias")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
models:
|
models:
|
||||||
|
|||||||
@@ -12,9 +12,6 @@ type PeerConfig struct {
|
|||||||
ApiKey string `yaml:"apiKey"`
|
ApiKey string `yaml:"apiKey"`
|
||||||
Models []string `yaml:"models"`
|
Models []string `yaml:"models"`
|
||||||
Filters Filters `yaml:"filters"`
|
Filters Filters `yaml:"filters"`
|
||||||
|
|
||||||
// Timeout settings for proxy connections
|
|
||||||
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
@@ -24,17 +21,6 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
ApiKey: "",
|
ApiKey: "",
|
||||||
Models: []string{},
|
Models: []string{},
|
||||||
Filters: Filters{},
|
Filters: Filters{},
|
||||||
|
|
||||||
// mostly matches http.DefaultTransport but with a 60s ResponseHeader timeout
|
|
||||||
// to match the pre PR #619 functionality
|
|
||||||
Timeouts: TimeoutsConfig{
|
|
||||||
Connect: 30,
|
|
||||||
KeepAlive: 30,
|
|
||||||
ResponseHeader: 60,
|
|
||||||
TLSHandshake: 10,
|
|
||||||
ExpectContinue: 1,
|
|
||||||
IdleConn: 90,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := unmarshal(&defaults); err != nil {
|
if err := unmarshal(&defaults); err != nil {
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ const ConfigFileChangedEventID = 0x03
|
|||||||
const LogDataEventID = 0x04
|
const LogDataEventID = 0x04
|
||||||
const TokenMetricsEventID = 0x05
|
const TokenMetricsEventID = 0x05
|
||||||
const ModelPreloadedEventID = 0x06
|
const ModelPreloadedEventID = 0x06
|
||||||
const InFlightRequestsEventID = 0x07
|
|
||||||
|
|
||||||
type ProcessStateChangeEvent struct {
|
type ProcessStateChangeEvent struct {
|
||||||
ProcessName string
|
ProcessName string
|
||||||
@@ -59,11 +58,3 @@ type ModelPreloadedEvent struct {
|
|||||||
func (e ModelPreloadedEvent) Type() uint32 {
|
func (e ModelPreloadedEvent) Type() uint32 {
|
||||||
return ModelPreloadedEventID
|
return ModelPreloadedEventID
|
||||||
}
|
}
|
||||||
|
|
||||||
type InFlightRequestsEvent struct {
|
|
||||||
Total int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e InFlightRequestsEvent) Type() uint32 {
|
|
||||||
return InFlightRequestsEventID
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -350,11 +350,6 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
|||||||
usage := parsed.Get("usage")
|
usage := parsed.Get("usage")
|
||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
// v1/responses format nests usage under response.usage
|
|
||||||
if !usage.Exists() {
|
|
||||||
usage = parsed.Get("response.usage")
|
|
||||||
}
|
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
return parseMetrics(modelID, start, usage, timings)
|
return parseMetrics(modelID, start, usage, timings)
|
||||||
}
|
}
|
||||||
@@ -365,8 +360,6 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
|||||||
}
|
}
|
||||||
|
|
||||||
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
|
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
|
||||||
wallDurationMs := int(time.Since(start).Milliseconds())
|
|
||||||
|
|
||||||
// default values
|
// default values
|
||||||
cachedTokens := -1 // unknown or missing data
|
cachedTokens := -1 // unknown or missing data
|
||||||
outputTokens := 0
|
outputTokens := 0
|
||||||
@@ -375,7 +368,7 @@ func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result)
|
|||||||
// timings data
|
// timings data
|
||||||
tokensPerSecond := -1.0
|
tokensPerSecond := -1.0
|
||||||
promptPerSecond := -1.0
|
promptPerSecond := -1.0
|
||||||
durationMs := wallDurationMs
|
durationMs := int(time.Since(start).Milliseconds())
|
||||||
|
|
||||||
if usage.Exists() {
|
if usage.Exists() {
|
||||||
if pt := usage.Get("prompt_tokens"); pt.Exists() {
|
if pt := usage.Get("prompt_tokens"); pt.Exists() {
|
||||||
@@ -404,10 +397,7 @@ func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result)
|
|||||||
outputTokens = int(timings.Get("predicted_n").Int())
|
outputTokens = int(timings.Get("predicted_n").Int())
|
||||||
promptPerSecond = timings.Get("prompt_per_second").Float()
|
promptPerSecond = timings.Get("prompt_per_second").Float()
|
||||||
tokensPerSecond = timings.Get("predicted_per_second").Float()
|
tokensPerSecond = timings.Get("predicted_per_second").Float()
|
||||||
timingsDurationMs := int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
durationMs = int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
||||||
if timingsDurationMs > durationMs {
|
|
||||||
durationMs = timingsDurationMs
|
|
||||||
}
|
|
||||||
|
|
||||||
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
||||||
cachedTokens = int(cachedValue.Int())
|
cachedTokens = int(cachedValue.Int())
|
||||||
@@ -513,9 +503,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
|
|||||||
supported := map[string]bool{"gzip": true, "deflate": true}
|
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||||
var filtered []string
|
var filtered []string
|
||||||
|
|
||||||
for part := range strings.SplitSeq(acceptEncoding, ",") {
|
for _, part := range strings.Split(acceptEncoding, ",") {
|
||||||
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||||
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
|
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
||||||
if supported[strings.ToLower(encoding)] {
|
if supported[strings.ToLower(encoding)] {
|
||||||
filtered = append(filtered, strings.TrimSpace(part))
|
filtered = append(filtered, strings.TrimSpace(part))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ import (
|
|||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/tidwall/gjson"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||||
@@ -571,27 +570,6 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||||
t.Run("keeps wall clock duration when timings underreport request time", func(t *testing.T) {
|
|
||||||
start := time.Now().Add(-5 * time.Second)
|
|
||||||
usage := gjson.Parse(`{"prompt_tokens": 5, "completion_tokens": 1}`)
|
|
||||||
timings := gjson.Parse(`{
|
|
||||||
"prompt_n": 5,
|
|
||||||
"predicted_n": 1,
|
|
||||||
"prompt_per_second": 10.0,
|
|
||||||
"predicted_per_second": 2.0,
|
|
||||||
"prompt_ms": 5.0,
|
|
||||||
"predicted_ms": 15.0
|
|
||||||
}`)
|
|
||||||
|
|
||||||
metrics, err := parseMetrics("test-model", start, usage, timings)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, 5, metrics.InputTokens)
|
|
||||||
assert.Equal(t, 1, metrics.OutputTokens)
|
|
||||||
assert.Equal(t, 10.0, metrics.PromptPerSecond)
|
|
||||||
assert.Equal(t, 2.0, metrics.TokensPerSecond)
|
|
||||||
assert.GreaterOrEqual(t, metrics.DurationMs, 5000)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("prefers timings over usage data", func(t *testing.T) {
|
t.Run("prefers timings over usage data", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
@@ -731,35 +709,6 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
|
|
||||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
|
||||||
|
|
||||||
// v1/responses SSE format: usage is nested under response.usage
|
|
||||||
responseBody := "event: response.completed\n" +
|
|
||||||
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
|
|
||||||
"\n\n"
|
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
||||||
w.Header().Set("Content-Type", "text/event-stream")
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
w.Write([]byte(responseBody))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
req := httptest.NewRequest("POST", "/v1/responses", nil)
|
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
ginCtx, _ := gin.CreateTestContext(rec)
|
|
||||||
|
|
||||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
|
||||||
assert.Equal(t, 1, len(metrics))
|
|
||||||
assert.Equal(t, "test-model", metrics[0].Model)
|
|
||||||
assert.Equal(t, 17, metrics[0].InputTokens)
|
|
||||||
assert.Equal(t, 23, metrics[0].OutputTokens)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
|||||||
+15
-17
@@ -34,25 +34,23 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
|
|||||||
}
|
}
|
||||||
sort.Strings(peerIDs)
|
sort.Strings(peerIDs)
|
||||||
|
|
||||||
|
// Create a shared transport with reasonable timeouts for peer connections
|
||||||
|
// these can be tuned with feedback later
|
||||||
|
peerTransport := &http.Transport{
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: 30 * time.Second, // Connection timeout
|
||||||
|
KeepAlive: 30 * time.Second,
|
||||||
|
}).DialContext,
|
||||||
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
|
ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
|
||||||
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 10,
|
||||||
|
IdleConnTimeout: 90 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
for _, peerID := range peerIDs {
|
for _, peerID := range peerIDs {
|
||||||
peer := peers[peerID]
|
peer := peers[peerID]
|
||||||
|
|
||||||
// Create a transport with per-peer timeout configuration
|
|
||||||
peerTransport := &http.Transport{
|
|
||||||
Proxy: http.ProxyFromEnvironment,
|
|
||||||
DialContext: (&net.Dialer{
|
|
||||||
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
|
|
||||||
KeepAlive: time.Duration(peer.Timeouts.KeepAlive) * time.Second,
|
|
||||||
}).DialContext,
|
|
||||||
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
|
|
||||||
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
|
|
||||||
ExpectContinueTimeout: time.Duration(peer.Timeouts.ExpectContinue) * time.Second,
|
|
||||||
ForceAttemptHTTP2: true,
|
|
||||||
MaxIdleConns: 100,
|
|
||||||
MaxIdleConnsPerHost: 10,
|
|
||||||
IdleConnTimeout: time.Duration(peer.Timeouts.IdleConn) * time.Second,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create reverse proxy for this peer
|
// Create reverse proxy for this peer
|
||||||
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
||||||
reverseProxy.Transport = peerTransport
|
reverseProxy.Transport = peerTransport
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -267,45 +266,3 @@ func TestProxyRequest_SSEHeaderModification(t *testing.T) {
|
|||||||
// The X-Accel-Buffering header should be set to "no" for SSE
|
// The X-Accel-Buffering header should be set to "no" for SSE
|
||||||
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewPeerProxy_CustomTimeouts(t *testing.T) {
|
|
||||||
proxyURL, _ := url.Parse("http://localhost:8080")
|
|
||||||
|
|
||||||
peers := config.PeerDictionaryConfig{
|
|
||||||
"test-peer": config.PeerConfig{
|
|
||||||
Proxy: "http://localhost:8080",
|
|
||||||
ProxyURL: proxyURL,
|
|
||||||
Models: []string{"model1"},
|
|
||||||
Timeouts: config.TimeoutsConfig{
|
|
||||||
Connect: 45,
|
|
||||||
ResponseHeader: 300,
|
|
||||||
TLSHandshake: 15,
|
|
||||||
ExpectContinue: 2,
|
|
||||||
IdleConn: 120,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
peerProxy, err := NewPeerProxy(peers, testLogger)
|
|
||||||
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.NotNil(t, peerProxy)
|
|
||||||
assert.True(t, peerProxy.HasPeerModel("model1"))
|
|
||||||
|
|
||||||
// Verify the timeout values are actually applied to the transport
|
|
||||||
member, found := peerProxy.proxyMap["model1"]
|
|
||||||
require.True(t, found, "model1 should exist in proxyMap")
|
|
||||||
assert.NotNil(t, member.reverseProxy)
|
|
||||||
assert.NotNil(t, member.reverseProxy.Transport)
|
|
||||||
|
|
||||||
transport, ok := member.reverseProxy.Transport.(*http.Transport)
|
|
||||||
require.True(t, ok, "Transport should be *http.Transport")
|
|
||||||
|
|
||||||
// Verify all timeout values are correctly applied
|
|
||||||
assert.Equal(t, 300*time.Second, transport.ResponseHeaderTimeout)
|
|
||||||
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
|
||||||
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
|
||||||
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
|
||||||
// ForceAttemptHTTP2 should be enabled
|
|
||||||
assert.True(t, transport.ForceAttemptHTTP2)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -96,24 +96,6 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
|||||||
var reverseProxy *httputil.ReverseProxy
|
var reverseProxy *httputil.ReverseProxy
|
||||||
if proxyURL != nil {
|
if proxyURL != nil {
|
||||||
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
|
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
|
||||||
|
|
||||||
// Create custom transport with configured timeouts
|
|
||||||
transport := &http.Transport{
|
|
||||||
Proxy: http.ProxyFromEnvironment,
|
|
||||||
DialContext: (&net.Dialer{
|
|
||||||
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
|
|
||||||
KeepAlive: time.Duration(config.Timeouts.KeepAlive) * time.Second,
|
|
||||||
}).DialContext,
|
|
||||||
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
|
|
||||||
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
|
|
||||||
ExpectContinueTimeout: time.Duration(config.Timeouts.ExpectContinue) * time.Second,
|
|
||||||
ForceAttemptHTTP2: true,
|
|
||||||
MaxIdleConns: 100,
|
|
||||||
MaxIdleConnsPerHost: 10,
|
|
||||||
IdleConnTimeout: time.Duration(config.Timeouts.IdleConn) * time.Second,
|
|
||||||
}
|
|
||||||
reverseProxy.Transport = transport
|
|
||||||
|
|
||||||
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
// prevent nginx from buffering streaming responses (e.g., SSE)
|
// prevent nginx from buffering streaming responses (e.g., SSE)
|
||||||
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
||||||
|
|||||||
+10
-47
@@ -2,7 +2,6 @@ package proxy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
@@ -118,12 +117,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
expectedMessage := "I_sense_imminent_danger"
|
expectedMessage := "I_sense_imminent_danger"
|
||||||
conf := getTestSimpleResponderConfig(expectedMessage)
|
config := getTestSimpleResponderConfig(expectedMessage)
|
||||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
assert.Equal(t, 0, config.UnloadAfter)
|
||||||
conf.UnloadAfter = 3 // seconds
|
config.UnloadAfter = 3 // seconds
|
||||||
assert.Equal(t, 3, conf.UnloadAfter)
|
assert.Equal(t, 3, config.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
|
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
// this should take 4 seconds
|
// this should take 4 seconds
|
||||||
@@ -160,12 +159,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
|
|||||||
t.Skip("skipping test, edit process_test.go to run it ")
|
t.Skip("skipping test, edit process_test.go to run it ")
|
||||||
}
|
}
|
||||||
|
|
||||||
conf := getTestSimpleResponderConfig("fast_ttl")
|
config := getTestSimpleResponderConfig("fast_ttl")
|
||||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
assert.Equal(t, 0, config.UnloadAfter)
|
||||||
conf.UnloadAfter = 1 // second
|
config.UnloadAfter = 1 // second
|
||||||
assert.Equal(t, 1, conf.UnloadAfter)
|
assert.Equal(t, 1, config.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
|
process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
for i := 0; i < 100; i++ {
|
for i := 0; i < 100; i++ {
|
||||||
@@ -570,39 +569,3 @@ func (w *panicOnWriteResponseWriter) Write(b []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
return w.ResponseRecorder.Write(b)
|
return w.ResponseRecorder.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestProcess_CustomTimeouts(t *testing.T) {
|
|
||||||
modelConfig := config.ModelConfig{
|
|
||||||
Cmd: "echo test",
|
|
||||||
Proxy: "http://localhost:8080",
|
|
||||||
CheckEndpoint: "/health",
|
|
||||||
Timeouts: config.TimeoutsConfig{
|
|
||||||
Connect: 45,
|
|
||||||
ResponseHeader: 120,
|
|
||||||
TLSHandshake: 15,
|
|
||||||
ExpectContinue: 2,
|
|
||||||
IdleConn: 120,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
debugLogger := NewLogMonitorWriter(io.Discard)
|
|
||||||
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
|
||||||
|
|
||||||
// Verify the process was created successfully
|
|
||||||
assert.NotNil(t, process)
|
|
||||||
assert.Equal(t, "test-model", process.ID)
|
|
||||||
assert.NotNil(t, process.reverseProxy)
|
|
||||||
assert.NotNil(t, process.reverseProxy.Transport)
|
|
||||||
|
|
||||||
// Verify it's using http.Transport (not some other type)
|
|
||||||
transport, ok := process.reverseProxy.Transport.(*http.Transport)
|
|
||||||
assert.True(t, ok, "Transport should be *http.Transport")
|
|
||||||
assert.NotNil(t, transport)
|
|
||||||
|
|
||||||
// Verify the timeouts are correctly applied
|
|
||||||
assert.Equal(t, 120*time.Second, transport.ResponseHeaderTimeout)
|
|
||||||
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
|
||||||
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
|
||||||
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
|
||||||
assert.True(t, transport.ForceAttemptHTTP2)
|
|
||||||
}
|
|
||||||
|
|||||||
+19
-81
@@ -28,40 +28,6 @@ const (
|
|||||||
|
|
||||||
type proxyCtxKey string
|
type proxyCtxKey string
|
||||||
|
|
||||||
type InflightCounter struct {
|
|
||||||
mu sync.Mutex
|
|
||||||
total int
|
|
||||||
}
|
|
||||||
|
|
||||||
func newInflightCounter() *InflightCounter {
|
|
||||||
return &InflightCounter{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ic *InflightCounter) Current() int {
|
|
||||||
ic.mu.Lock()
|
|
||||||
total := ic.total
|
|
||||||
ic.mu.Unlock()
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ic *InflightCounter) Increment() int {
|
|
||||||
ic.mu.Lock()
|
|
||||||
ic.total++
|
|
||||||
total := ic.total
|
|
||||||
ic.mu.Unlock()
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ic *InflightCounter) Decrement() int {
|
|
||||||
ic.mu.Lock()
|
|
||||||
if ic.total > 0 {
|
|
||||||
ic.total--
|
|
||||||
}
|
|
||||||
total := ic.total
|
|
||||||
ic.mu.Unlock()
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
type ProxyManager struct {
|
type ProxyManager struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
|
|
||||||
@@ -77,8 +43,6 @@ type ProxyManager struct {
|
|||||||
|
|
||||||
processGroups map[string]*ProcessGroup
|
processGroups map[string]*ProcessGroup
|
||||||
|
|
||||||
inFlightCounter *InflightCounter
|
|
||||||
|
|
||||||
// shutdown signaling
|
// shutdown signaling
|
||||||
shutdownCtx context.Context
|
shutdownCtx context.Context
|
||||||
shutdownCancel context.CancelFunc
|
shutdownCancel context.CancelFunc
|
||||||
@@ -191,8 +155,6 @@ func New(proxyConfig config.Config) *ProxyManager {
|
|||||||
|
|
||||||
processGroups: make(map[string]*ProcessGroup),
|
processGroups: make(map[string]*ProcessGroup),
|
||||||
|
|
||||||
inFlightCounter: newInflightCounter(),
|
|
||||||
|
|
||||||
shutdownCtx: shutdownCtx,
|
shutdownCtx: shutdownCtx,
|
||||||
shutdownCancel: shutdownCancel,
|
shutdownCancel: shutdownCancel,
|
||||||
|
|
||||||
@@ -314,42 +276,37 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
|
|
||||||
// Set up routes using the Gin engine
|
// Set up routes using the Gin engine
|
||||||
// Protected routes use pm.apiKeyAuth() middleware
|
// Protected routes use pm.apiKeyAuth() middleware
|
||||||
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
// Support legacy /v1/completions api, see issue #12
|
// Support legacy /v1/completions api, see issue #12
|
||||||
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
||||||
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
// Support anthropic count_tokens API (Also added in the above PR)
|
// Support anthropic count_tokens API (Also added in the above PR)
|
||||||
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support embeddings and reranking
|
// Support embeddings and reranking
|
||||||
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /reranking endpoint + aliases
|
// llama-server's /reranking endpoint + aliases
|
||||||
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /infill endpoint for code infilling
|
// llama-server's /infill endpoint for code infilling
|
||||||
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /completion endpoint
|
// llama-server's /completion endpoint
|
||||||
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyGETModelHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||||
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||||
|
|
||||||
// sd.cpp /sdapi/v1 endpoints
|
|
||||||
pm.ginEngine.POST("/sdapi/v1/txt2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
|
||||||
pm.ginEngine.POST("/sdapi/v1/img2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
|
||||||
pm.ginEngine.GET("/sdapi/v1/loras", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
|
||||||
|
|
||||||
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
||||||
|
|
||||||
@@ -368,7 +325,7 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
|
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
|
||||||
c.Redirect(http.StatusFound, "/ui/models")
|
c.Redirect(http.StatusFound, "/ui/models")
|
||||||
})
|
})
|
||||||
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyToUpstream)
|
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream)
|
||||||
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
|
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
|
||||||
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
|
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
|
||||||
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
||||||
@@ -432,14 +389,6 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
gin.DisableConsoleColor()
|
gin.DisableConsoleColor()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) trackInflight() gin.HandlerFunc {
|
|
||||||
return func(c *gin.Context) {
|
|
||||||
event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Increment()})
|
|
||||||
defer event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Decrement()})
|
|
||||||
c.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ServeHTTP implements http.Handler interface
|
// ServeHTTP implements http.Handler interface
|
||||||
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
pm.ginEngine.ServeHTTP(w, r)
|
pm.ginEngine.ServeHTTP(w, r)
|
||||||
@@ -725,17 +674,6 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
|
||||||
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
|
||||||
for _, key := range setParamsByIDKeys {
|
|
||||||
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
nextHandler = processGroup.ProxyRequest
|
nextHandler = processGroup.ProxyRequest
|
||||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
|||||||
@@ -14,13 +14,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
Id string `json:"id"`
|
Id string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
Unlisted bool `json:"unlisted"`
|
Unlisted bool `json:"unlisted"`
|
||||||
PeerID string `json:"peerID"`
|
PeerID string `json:"peerID"`
|
||||||
Aliases []string `json:"aliases,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func addApiHandlers(pm *ProxyManager) {
|
func addApiHandlers(pm *ProxyManager) {
|
||||||
@@ -84,7 +83,6 @@ func (pm *ProxyManager) getModelStatus() []Model {
|
|||||||
Description: pm.config.Models[modelID].Description,
|
Description: pm.config.Models[modelID].Description,
|
||||||
State: state,
|
State: state,
|
||||||
Unlisted: pm.config.Models[modelID].Unlisted,
|
Unlisted: pm.config.Models[modelID].Unlisted,
|
||||||
Aliases: pm.config.Models[modelID].Aliases,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,7 +107,6 @@ const (
|
|||||||
msgTypeModelStatus messageType = "modelStatus"
|
msgTypeModelStatus messageType = "modelStatus"
|
||||||
msgTypeLogData messageType = "logData"
|
msgTypeLogData messageType = "logData"
|
||||||
msgTypeMetrics messageType = "metrics"
|
msgTypeMetrics messageType = "metrics"
|
||||||
msgTypeInFlight messageType = "inflight"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type messageEnvelope struct {
|
type messageEnvelope struct {
|
||||||
@@ -169,18 +166,6 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sendInFlight := func(total int) {
|
|
||||||
jsonData, err := json.Marshal(gin.H{"total": total})
|
|
||||||
if err == nil {
|
|
||||||
select {
|
|
||||||
case sendBuffer <- messageEnvelope{Type: msgTypeInFlight, Data: string(jsonData)}:
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send updated models list
|
* Send updated models list
|
||||||
*/
|
*/
|
||||||
@@ -208,19 +193,11 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
|||||||
sendMetrics([]TokenMetrics{e.Metrics})
|
sendMetrics([]TokenMetrics{e.Metrics})
|
||||||
})()
|
})()
|
||||||
|
|
||||||
/**
|
|
||||||
* Send in-flight request stats related to token stats "Waiting: N" count.
|
|
||||||
*/
|
|
||||||
defer event.On(func(e InFlightRequestsEvent) {
|
|
||||||
sendInFlight(e.Total)
|
|
||||||
})()
|
|
||||||
|
|
||||||
// send initial batch of data
|
// send initial batch of data
|
||||||
sendLogData("proxy", pm.proxyLogger.GetHistory())
|
sendLogData("proxy", pm.proxyLogger.GetHistory())
|
||||||
sendLogData("upstream", pm.upstreamLogger.GetHistory())
|
sendLogData("upstream", pm.upstreamLogger.GetHistory())
|
||||||
sendModels()
|
sendModels()
|
||||||
sendMetrics(pm.metricsMonitor.getMetrics())
|
sendMetrics(pm.metricsMonitor.getMetrics())
|
||||||
sendInFlight(pm.inFlightCounter.Current())
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
|||||||
+1
-135
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
|||||||
// Verify extended fields are present
|
// Verify extended fields are present
|
||||||
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||||
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||||
assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
|
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1046,61 +1046,6 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
|
|||||||
// t.Logf("%v", response)
|
// t.Logf("%v", response)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestProxyManager_FiltersSetParamsByID(t *testing.T) {
|
|
||||||
// no explicit aliases — setParamsByID keys are auto-registered as aliases
|
|
||||||
configStr := strings.Replace(`
|
|
||||||
logLevel: error
|
|
||||||
models:
|
|
||||||
model1:
|
|
||||||
cmd: 'SRPATH --port ${PORT} --silent --respond model1'
|
|
||||||
proxy: "http://127.0.0.1:${PORT}"
|
|
||||||
filters:
|
|
||||||
setParams:
|
|
||||||
reasoning_effort: medium
|
|
||||||
setParamsByID:
|
|
||||||
"${MODEL_ID}:high":
|
|
||||||
reasoning_effort: high
|
|
||||||
"${MODEL_ID}:low":
|
|
||||||
reasoning_effort: low
|
|
||||||
`, "SRPATH", simpleResponderPath, -1)
|
|
||||||
|
|
||||||
cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
|
||||||
if !assert.NoError(t, err, "invalid test configuration") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy := New(cfg)
|
|
||||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
requestedModel string
|
|
||||||
wantEffort string
|
|
||||||
}{
|
|
||||||
// setParams applies, no setParamsByID match
|
|
||||||
{requestedModel: "model1", wantEffort: "medium"},
|
|
||||||
// setParamsByID overrides setParams
|
|
||||||
{requestedModel: "model1:high", wantEffort: "high"},
|
|
||||||
{requestedModel: "model1:low", wantEffort: "low"},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.requestedModel, func(t *testing.T) {
|
|
||||||
reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel)
|
|
||||||
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusOK, w.Code)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
|
|
||||||
|
|
||||||
requestBody, _ := response["request_body"].(string)
|
|
||||||
gotEffort := gjson.Get(requestBody, "reasoning_effort").String()
|
|
||||||
assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||||
config := config.AddDefaultGroupToConfig(config.Config{
|
config := config.AddDefaultGroupToConfig(config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
@@ -1659,82 +1604,3 @@ models:
|
|||||||
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestProxyManager_SdApiTxt2ImgRouting(t *testing.T) {
|
|
||||||
conf := config.AddDefaultGroupToConfig(config.Config{
|
|
||||||
HealthCheckTimeout: 15,
|
|
||||||
Models: map[string]config.ModelConfig{
|
|
||||||
"sd-model": getTestSimpleResponderConfig("sd-model"),
|
|
||||||
},
|
|
||||||
LogLevel: "error",
|
|
||||||
})
|
|
||||||
|
|
||||||
proxy := New(conf)
|
|
||||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
|
||||||
|
|
||||||
t.Run("successful txt2img with model", func(t *testing.T) {
|
|
||||||
reqBody := `{"model":"sd-model","prompt":"a cat"}`
|
|
||||||
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusOK, w.Code)
|
|
||||||
assert.Contains(t, w.Body.String(), "sd-model")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("successful img2img with model", func(t *testing.T) {
|
|
||||||
reqBody := `{"model":"sd-model","prompt":"a cat","init_images":[]}`
|
|
||||||
req := httptest.NewRequest("POST", "/sdapi/v1/img2img", bytes.NewBufferString(reqBody))
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusOK, w.Code)
|
|
||||||
assert.Contains(t, w.Body.String(), "sd-model")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing model returns 400", func(t *testing.T) {
|
|
||||||
reqBody := `{"prompt":"a cat"}`
|
|
||||||
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
|
||||||
assert.Contains(t, w.Body.String(), "missing or invalid 'model' key")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProxyManager_SdApiGetLoras(t *testing.T) {
|
|
||||||
conf := config.AddDefaultGroupToConfig(config.Config{
|
|
||||||
HealthCheckTimeout: 15,
|
|
||||||
Models: map[string]config.ModelConfig{
|
|
||||||
"sd-model": getTestSimpleResponderConfig("sd-model"),
|
|
||||||
},
|
|
||||||
LogLevel: "error",
|
|
||||||
})
|
|
||||||
|
|
||||||
proxy := New(conf)
|
|
||||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
|
||||||
|
|
||||||
t.Run("successful GET loras with model query param", func(t *testing.T) {
|
|
||||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=sd-model", nil)
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusOK, w.Code)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing model query param returns 400", func(t *testing.T) {
|
|
||||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras", nil)
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
|
||||||
assert.Contains(t, w.Body.String(), "missing required 'model' query parameter")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("unknown model returns 400", func(t *testing.T) {
|
|
||||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=nonexistent", nil)
|
|
||||||
w := CreateTestResponseRecorder()
|
|
||||||
proxy.ServeHTTP(w, req)
|
|
||||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
|
||||||
assert.Contains(t, w.Body.String(), "could not find suitable handler")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
legacy-peer-deps=true
|
|
||||||
Generated
+1060
-850
File diff suppressed because it is too large
Load Diff
@@ -12,18 +12,18 @@
|
|||||||
"test:watch": "vitest"
|
"test:watch": "vitest"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||||
"@tailwindcss/vite": "^4.1.8",
|
"@tailwindcss/vite": "^4.1.8",
|
||||||
"@tsconfig/svelte": "^5.0.4",
|
"@tsconfig/svelte": "^5.0.4",
|
||||||
"@types/hast": "^3.0.4",
|
"@types/hast": "^3.0.4",
|
||||||
"@types/node": "^25.1.0",
|
"@types/node": "^25.1.0",
|
||||||
"svelte": "^5.46.4",
|
"svelte": "^5.19.0",
|
||||||
"svelte-check": "^4.1.4",
|
"svelte-check": "^4.1.4",
|
||||||
"tailwindcss": "^4.1.8",
|
"tailwindcss": "^4.1.8",
|
||||||
"typescript": "~5.8.3",
|
"typescript": "~5.8.3",
|
||||||
"vite": "^8.0.0",
|
"vite": "^6.3.5",
|
||||||
"vite-plugin-compression2": "^2.5.1",
|
"vite-plugin-compression2": "^2.4.0",
|
||||||
"vitest": "^4.1.0"
|
"vitest": "^4.0.18"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"highlight.js": "^11.11.1",
|
"highlight.js": "^11.11.1",
|
||||||
|
|||||||
@@ -6,28 +6,23 @@
|
|||||||
import Models from "./routes/Models.svelte";
|
import Models from "./routes/Models.svelte";
|
||||||
import Activity from "./routes/Activity.svelte";
|
import Activity from "./routes/Activity.svelte";
|
||||||
import Playground from "./routes/Playground.svelte";
|
import Playground from "./routes/Playground.svelte";
|
||||||
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
|
||||||
import { enableAPIEvents } from "./stores/api";
|
import { enableAPIEvents } from "./stores/api";
|
||||||
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||||
import { currentRoute } from "./stores/route";
|
|
||||||
|
|
||||||
const routes = {
|
const routes = {
|
||||||
"/": PlaygroundStub,
|
"/": Playground,
|
||||||
"/models": Models,
|
"/models": Models,
|
||||||
"/logs": LogViewer,
|
"/logs": LogViewer,
|
||||||
"/activity": Activity,
|
"/activity": Activity,
|
||||||
"*": PlaygroundStub,
|
"*": Playground,
|
||||||
};
|
};
|
||||||
|
|
||||||
function handleRouteLoaded(event: { detail: { route: string | RegExp } }) {
|
// Sync theme to document attribute
|
||||||
const route = event.detail.route;
|
|
||||||
currentRoute.set(typeof route === "string" ? route : "/");
|
|
||||||
}
|
|
||||||
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Sync title to document
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
||||||
document.title = `${icon} ${$appTitle}`;
|
document.title = `${icon} ${$appTitle}`;
|
||||||
@@ -48,11 +43,6 @@
|
|||||||
<Header />
|
<Header />
|
||||||
|
|
||||||
<main class="flex-1 overflow-auto p-4">
|
<main class="flex-1 overflow-auto p-4">
|
||||||
<div class="h-full" class:hidden={$currentRoute !== "/"}>
|
<Router {routes} />
|
||||||
<Playground />
|
|
||||||
</div>
|
|
||||||
<div class="h-full" class:hidden={$currentRoute === "/"}>
|
|
||||||
<Router {routes} on:routeLoaded={handleRouteLoaded} />
|
|
||||||
</div>
|
|
||||||
</main>
|
</main>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { link } from "svelte-spa-router";
|
import { link, location } from "svelte-spa-router";
|
||||||
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
||||||
import { currentRoute } from "../stores/route";
|
|
||||||
import { playgroundActivity } from "../stores/playgroundActivity";
|
|
||||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||||
|
|
||||||
function handleTitleChange(newTitle: string): void {
|
function handleTitleChange(newTitle: string): void {
|
||||||
@@ -24,10 +22,9 @@
|
|||||||
handleTitleChange(target.textContent || "(set title)");
|
handleTitleChange(target.textContent || "(set title)");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isActive(path: string, current: string): boolean {
|
function isActive(path: string, currentLocation: string): boolean {
|
||||||
return path === "/" ? current === "/" : current.startsWith(path);
|
return path === "/" ? currentLocation === "/" : currentLocation.startsWith(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<header
|
<header
|
||||||
@@ -50,7 +47,8 @@
|
|||||||
<a
|
<a
|
||||||
href="/"
|
href="/"
|
||||||
use:link
|
use:link
|
||||||
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
|
class:font-semibold={isActive("/", $location)}
|
||||||
>
|
>
|
||||||
Playground
|
Playground
|
||||||
</a>
|
</a>
|
||||||
@@ -58,7 +56,7 @@
|
|||||||
href="/models"
|
href="/models"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/models", $currentRoute)}
|
class:font-semibold={isActive("/models", $location)}
|
||||||
>
|
>
|
||||||
Models
|
Models
|
||||||
</a>
|
</a>
|
||||||
@@ -66,7 +64,7 @@
|
|||||||
href="/activity"
|
href="/activity"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/activity", $currentRoute)}
|
class:font-semibold={isActive("/activity", $location)}
|
||||||
>
|
>
|
||||||
Activity
|
Activity
|
||||||
</a>
|
</a>
|
||||||
@@ -74,7 +72,7 @@
|
|||||||
href="/logs"
|
href="/logs"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/logs", $currentRoute)}
|
class:font-semibold={isActive("/logs", $location)}
|
||||||
>
|
>
|
||||||
Logs
|
Logs
|
||||||
</a>
|
</a>
|
||||||
@@ -98,23 +96,3 @@
|
|||||||
<ConnectionStatus />
|
<ConnectionStatus />
|
||||||
</menu>
|
</menu>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<style>
|
|
||||||
.activity-link {
|
|
||||||
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
|
|
||||||
background-size: 200% 100%;
|
|
||||||
-webkit-background-clip: text;
|
|
||||||
background-clip: text;
|
|
||||||
-webkit-text-fill-color: transparent;
|
|
||||||
animation: gradient-shift 2s linear infinite;
|
|
||||||
}
|
|
||||||
|
|
||||||
@keyframes gradient-shift {
|
|
||||||
0% {
|
|
||||||
background-position: 0% 50%;
|
|
||||||
}
|
|
||||||
100% {
|
|
||||||
background-position: 200% 50%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|||||||
@@ -65,17 +65,10 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
let preElement: HTMLPreElement;
|
let preElement: HTMLPreElement;
|
||||||
let userScrolledUp = $state(false);
|
|
||||||
|
|
||||||
function handleScroll() {
|
// Auto scroll to bottom when logs change
|
||||||
if (!preElement) return;
|
|
||||||
const { scrollTop, scrollHeight, clientHeight } = preElement;
|
|
||||||
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Auto scroll to bottom when logs change, unless user has scrolled up
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (preElement && filteredLogs && !userScrolledUp) {
|
if (preElement && filteredLogs) {
|
||||||
preElement.scrollTop = preElement.scrollHeight;
|
preElement.scrollTop = preElement.scrollHeight;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -89,7 +82,7 @@
|
|||||||
<div class="flex gap-2 items-center">
|
<div class="flex gap-2 items-center">
|
||||||
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
<path d="M2 4v3h5v12h3V7h5V4H2zm19 5h-9v3h3v7h3v-7h3V9z"/>
|
<path fill-rule="evenodd" d="M10.5 3.75a6 6 0 0 0-5.98 6.496A5.25 5.25 0 0 0 6.75 20.25H18a4.5 4.5 0 0 0 2.206-8.423 3.75 3.75 0 0 0-4.133-4.303A6.001 6.001 0 0 0 10.5 3.75Zm2.25 6a.75.75 0 0 0-1.5 0v4.94l-1.72-1.72a.75.75 0 0 0-1.06 1.06l3 3a.75.75 0 0 0 1.06 0l3-3a.75.75 0 1 0-1.06-1.06l-1.72 1.72V9.75Z" clip-rule="evenodd" />
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
||||||
@@ -134,6 +127,6 @@
|
|||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
||||||
<pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -165,9 +165,6 @@
|
|||||||
{#if model.description}
|
{#if model.description}
|
||||||
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||||
{/if}
|
{/if}
|
||||||
{#if model.aliases && model.aliases.length > 0}
|
|
||||||
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
|
|
||||||
{/if}
|
|
||||||
</td>
|
</td>
|
||||||
<td class="w-12">
|
<td class="w-12">
|
||||||
{#if model.state === "stopped"}
|
{#if model.state === "stopped"}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { inFlightRequests, metrics } from "../stores/api";
|
import { metrics } from "../stores/api";
|
||||||
import TokenHistogram from "./TokenHistogram.svelte";
|
import TokenHistogram from "./TokenHistogram.svelte";
|
||||||
|
|
||||||
interface HistogramData {
|
interface HistogramData {
|
||||||
@@ -15,14 +15,7 @@
|
|||||||
let stats = $derived.by(() => {
|
let stats = $derived.by(() => {
|
||||||
const totalRequests = $metrics.length;
|
const totalRequests = $metrics.length;
|
||||||
if (totalRequests === 0) {
|
if (totalRequests === 0) {
|
||||||
return {
|
return { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||||
totalRequests: 0,
|
|
||||||
totalInputTokens: 0,
|
|
||||||
totalOutputTokens: 0,
|
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
|
||||||
histogramData: null,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
|
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
|
||||||
@@ -31,14 +24,7 @@
|
|||||||
// Calculate token statistics using output_tokens and duration_ms
|
// Calculate token statistics using output_tokens and duration_ms
|
||||||
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
|
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
|
||||||
if (validMetrics.length === 0) {
|
if (validMetrics.length === 0) {
|
||||||
return {
|
return { totalRequests, totalInputTokens, totalOutputTokens, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||||
totalRequests,
|
|
||||||
totalInputTokens,
|
|
||||||
totalOutputTokens,
|
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
|
||||||
histogramData: null,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate tokens/second for each valid metric
|
// Calculate tokens/second for each valid metric
|
||||||
@@ -77,7 +63,6 @@
|
|||||||
totalRequests,
|
totalRequests,
|
||||||
totalInputTokens,
|
totalInputTokens,
|
||||||
totalOutputTokens,
|
totalOutputTokens,
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: {
|
tokenStats: {
|
||||||
p99: p99.toFixed(2),
|
p99: p99.toFixed(2),
|
||||||
p95: p95.toFixed(2),
|
p95: p95.toFixed(2),
|
||||||
@@ -110,12 +95,7 @@
|
|||||||
|
|
||||||
<tbody class="bg-surface divide-y divide-card-border-inner">
|
<tbody class="bg-surface divide-y divide-card-border-inner">
|
||||||
<tr class="hover:bg-secondary">
|
<tr class="hover:bg-secondary">
|
||||||
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
|
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{stats.totalRequests}</td>
|
||||||
<div class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
|
|
||||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
|
|
||||||
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { transcribeAudio } from "../../lib/audioApi";
|
import { transcribeAudio } from "../../lib/audioApi";
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
|
|
||||||
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
||||||
@@ -16,22 +15,18 @@
|
|||||||
let fileInput = $state<HTMLInputElement | null>(null);
|
let fileInput = $state<HTMLInputElement | null>(null);
|
||||||
let copied = $state(false);
|
let copied = $state(false);
|
||||||
|
|
||||||
const ACCEPTED_FORMATS = ['.mp3', '.wav', '.ogg'];
|
const ACCEPTED_FORMATS = ['.mp3', '.wav'];
|
||||||
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
|
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
|
||||||
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
|
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
|
||||||
|
|
||||||
$effect(() => {
|
|
||||||
playgroundStores.audioTranscribing.set(isTranscribing);
|
|
||||||
});
|
|
||||||
|
|
||||||
function validateFile(file: File): { valid: boolean; error?: string } {
|
function validateFile(file: File): { valid: boolean; error?: string } {
|
||||||
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
||||||
|
|
||||||
if (!ACCEPTED_FORMATS.includes(ext)) {
|
if (!ACCEPTED_FORMATS.includes(ext)) {
|
||||||
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV, OGG' };
|
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV' };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.size > MAX_FILE_SIZE) {
|
if (file.size > MAX_FILE_SIZE) {
|
||||||
@@ -208,7 +203,7 @@
|
|||||||
<div>
|
<div>
|
||||||
<p class="mb-2">Drag and drop an audio file here</p>
|
<p class="mb-2">Drag and drop an audio file here</p>
|
||||||
<p class="text-sm">or use the Browse button below</p>
|
<p class="text-sm">or use the Browse button below</p>
|
||||||
<p class="text-xs mt-4">Accepted formats: MP3, WAV, OGG (max 25MB)</p>
|
<p class="text-xs mt-4">Accepted formats: MP3, WAV (max 25MB)</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -218,7 +213,7 @@
|
|||||||
<div class="shrink-0 flex gap-2">
|
<div class="shrink-0 flex gap-2">
|
||||||
<input
|
<input
|
||||||
type="file"
|
type="file"
|
||||||
accept=".mp3,.wav,.ogg"
|
accept=".mp3,.wav"
|
||||||
class="hidden"
|
class="hidden"
|
||||||
onchange={handleFileSelect}
|
onchange={handleFileSelect}
|
||||||
bind:this={fileInput}
|
bind:this={fileInput}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { streamChatCompletion } from "../../lib/chatApi";
|
import { streamChatCompletion } from "../../lib/chatApi";
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
|
||||||
import type { ChatMessage, ContentPart } from "../../lib/types";
|
import type { ChatMessage, ContentPart } from "../../lib/types";
|
||||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
@@ -12,16 +11,7 @@
|
|||||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||||
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
||||||
|
|
||||||
function loadMessages(): ChatMessage[] {
|
let messages = $state<ChatMessage[]>([]);
|
||||||
try {
|
|
||||||
const saved = localStorage.getItem("playground-messages");
|
|
||||||
return saved ? JSON.parse(saved) : [];
|
|
||||||
} catch {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let messages = $state<ChatMessage[]>(loadMessages());
|
|
||||||
let userInput = $state("");
|
let userInput = $state("");
|
||||||
let isStreaming = $state(false);
|
let isStreaming = $state(false);
|
||||||
let isReasoning = $state(false);
|
let isReasoning = $state(false);
|
||||||
@@ -34,52 +24,21 @@
|
|||||||
let imageError = $state<string | null>(null);
|
let imageError = $state<string | null>(null);
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
let userScrolledUp = $state(false);
|
|
||||||
|
|
||||||
|
// Auto-scroll when messages change
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
playgroundStores.chatStreaming.set(isStreaming);
|
if (messages.length > 0 && messagesContainer) {
|
||||||
});
|
|
||||||
|
|
||||||
function handleMessagesScroll() {
|
|
||||||
if (!messagesContainer) return;
|
|
||||||
const { scrollTop, scrollHeight, clientHeight } = messagesContainer;
|
|
||||||
// Consider "at bottom" if within 40px of the bottom
|
|
||||||
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Auto-scroll when messages change — skip if user scrolled up
|
|
||||||
$effect(() => {
|
|
||||||
if (messages.length > 0 && messagesContainer && !userScrolledUp) {
|
|
||||||
messagesContainer.scrollTo({
|
messagesContainer.scrollTo({
|
||||||
top: messagesContainer.scrollHeight,
|
top: messagesContainer.scrollHeight,
|
||||||
behavior: isStreaming ? "instant" : "smooth",
|
behavior: "smooth",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Persist messages to localStorage (throttled to once per 2s)
|
|
||||||
let lastSaveTime = 0;
|
|
||||||
$effect(() => {
|
|
||||||
const json = JSON.stringify(messages);
|
|
||||||
const elapsed = Date.now() - lastSaveTime;
|
|
||||||
const save = () => {
|
|
||||||
try { localStorage.setItem("playground-messages", json); } catch {}
|
|
||||||
lastSaveTime = Date.now();
|
|
||||||
};
|
|
||||||
if (elapsed >= 2000) {
|
|
||||||
save();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const timer = setTimeout(save, 2000 - elapsed);
|
|
||||||
return () => clearTimeout(timer);
|
|
||||||
});
|
|
||||||
|
|
||||||
async function sendMessage() {
|
async function sendMessage() {
|
||||||
const trimmedInput = userInput.trim();
|
const trimmedInput = userInput.trim();
|
||||||
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
|
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
|
||||||
|
|
||||||
userScrolledUp = false;
|
|
||||||
|
|
||||||
// Build message content (multimodal if images attached)
|
// Build message content (multimodal if images attached)
|
||||||
let content: string | ContentPart[];
|
let content: string | ContentPart[];
|
||||||
if (attachedImages.length > 0) {
|
if (attachedImages.length > 0) {
|
||||||
@@ -362,7 +321,6 @@
|
|||||||
<div
|
<div
|
||||||
class="flex-1 overflow-y-auto mb-4 px-2"
|
class="flex-1 overflow-y-auto mb-4 px-2"
|
||||||
bind:this={messagesContainer}
|
bind:this={messagesContainer}
|
||||||
onscroll={handleMessagesScroll}
|
|
||||||
>
|
>
|
||||||
{#if messages.length === 0}
|
{#if messages.length === 0}
|
||||||
<div class="h-full flex items-center justify-center text-txtsecondary">
|
<div class="h-full flex items-center justify-center text-txtsecondary">
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { renderMarkdown, escapeHtml, renderStreamingMarkdown, createStreamingCache } from "../../lib/markdown";
|
import { renderMarkdown, escapeHtml } from "../../lib/markdown";
|
||||||
import type { RenderedBlock } from "../../lib/markdown";
|
|
||||||
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
||||||
import { getTextContent, getImageUrls } from "../../lib/types";
|
import { getTextContent, getImageUrls } from "../../lib/types";
|
||||||
import type { ContentPart } from "../../lib/types";
|
import type { ContentPart } from "../../lib/types";
|
||||||
@@ -23,17 +22,11 @@
|
|||||||
let hasImages = $derived(imageUrls.length > 0);
|
let hasImages = $derived(imageUrls.length > 0);
|
||||||
let canEdit = $derived(onEdit !== undefined && !hasImages);
|
let canEdit = $derived(onEdit !== undefined && !hasImages);
|
||||||
|
|
||||||
let streamingCache = createStreamingCache();
|
let renderedContent = $derived(
|
||||||
let renderedParts = $derived.by(() => {
|
role === "assistant" && !isStreaming
|
||||||
if (role !== "assistant") {
|
? renderMarkdown(textContent)
|
||||||
return { blocks: [{ id: -1, html: escapeHtml(textContent).replace(/\n/g, '<br>') }] as RenderedBlock[], pendingHtml: "" };
|
: escapeHtml(textContent).replace(/\n/g, '<br>')
|
||||||
}
|
);
|
||||||
if (!isStreaming) {
|
|
||||||
streamingCache = createStreamingCache();
|
|
||||||
return { blocks: [{ id: -1, html: renderMarkdown(textContent) }] as RenderedBlock[], pendingHtml: "" };
|
|
||||||
}
|
|
||||||
return renderStreamingMarkdown(textContent, streamingCache);
|
|
||||||
});
|
|
||||||
let copied = $state(false);
|
let copied = $state(false);
|
||||||
let showRaw = $state(false);
|
let showRaw = $state(false);
|
||||||
let isEditing = $state(false);
|
let isEditing = $state(false);
|
||||||
@@ -116,54 +109,13 @@
|
|||||||
cancelEdit();
|
cancelEdit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
|
||||||
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
|
|
||||||
|
|
||||||
function codeBlockCopy(node: HTMLElement) {
|
|
||||||
function attachButtons() {
|
|
||||||
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
|
|
||||||
pre.setAttribute('data-copy-btn', 'true');
|
|
||||||
const btn = document.createElement('button');
|
|
||||||
btn.className = 'code-copy-btn';
|
|
||||||
btn.title = 'Copy code';
|
|
||||||
btn.innerHTML = COPY_SVG;
|
|
||||||
btn.addEventListener('click', async () => {
|
|
||||||
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
|
|
||||||
try {
|
|
||||||
if (navigator.clipboard && window.isSecureContext) {
|
|
||||||
await navigator.clipboard.writeText(text);
|
|
||||||
} else {
|
|
||||||
const ta = document.createElement('textarea');
|
|
||||||
ta.value = text;
|
|
||||||
ta.style.cssText = 'position:fixed;left:-9999px';
|
|
||||||
document.body.appendChild(ta);
|
|
||||||
ta.select();
|
|
||||||
document.execCommand('copy');
|
|
||||||
document.body.removeChild(ta);
|
|
||||||
}
|
|
||||||
btn.innerHTML = CHECK_SVG;
|
|
||||||
btn.classList.add('copied');
|
|
||||||
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
|
|
||||||
} catch (e) {
|
|
||||||
console.error('copy failed', e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
pre.appendChild(btn);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
attachButtons();
|
|
||||||
const mo = new MutationObserver(attachButtons);
|
|
||||||
mo.observe(node, { childList: true, subtree: true });
|
|
||||||
return { destroy: () => mo.disconnect() };
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||||
<div
|
<div
|
||||||
class="relative group rounded-lg px-4 py-2 {role === 'user'
|
class="relative group max-w-[85%] rounded-lg px-4 py-2 {role === 'user'
|
||||||
? 'max-w-[85%] bg-primary text-btn-primary-text'
|
? 'bg-primary text-btn-primary-text'
|
||||||
: 'w-full sm:w-4/5 bg-surface border border-gray-200 dark:border-white/10'}"
|
: 'bg-surface border border-gray-200 dark:border-white/10'}"
|
||||||
>
|
>
|
||||||
{#if role === "assistant"}
|
{#if role === "assistant"}
|
||||||
{#if reasoning_content || isReasoning}
|
{#if reasoning_content || isReasoning}
|
||||||
@@ -215,11 +167,8 @@
|
|||||||
{#if showRaw}
|
{#if showRaw}
|
||||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||||
{:else}
|
{:else}
|
||||||
<div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
|
<div class="prose prose-sm dark:prose-invert max-w-none">
|
||||||
{#each renderedParts.blocks as block (block.id)}
|
{@html renderedContent}
|
||||||
{@html block.html}
|
|
||||||
{/each}
|
|
||||||
{@html renderedParts.pendingHtml}
|
|
||||||
{#if isStreaming && !isReasoning}
|
{#if isStreaming && !isReasoning}
|
||||||
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
|
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -340,42 +289,14 @@
|
|||||||
|
|
||||||
<style>
|
<style>
|
||||||
.prose :global(pre) {
|
.prose :global(pre) {
|
||||||
position: relative;
|
|
||||||
background-color: var(--color-surface);
|
background-color: var(--color-surface);
|
||||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||||
border-radius: 0.375rem;
|
border-radius: 0.375rem;
|
||||||
padding: 0.75rem;
|
padding: 0.75rem;
|
||||||
padding-right: 2.5rem;
|
|
||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
margin: 0.5rem 0;
|
margin: 0.5rem 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.prose :global(.code-copy-btn) {
|
|
||||||
position: absolute;
|
|
||||||
top: 0.375rem;
|
|
||||||
right: 0.375rem;
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
justify-content: center;
|
|
||||||
padding: 0.25rem;
|
|
||||||
border-radius: 0.25rem;
|
|
||||||
border: 1px solid var(--color-border);
|
|
||||||
background: var(--color-surface);
|
|
||||||
color: var(--color-txtsecondary);
|
|
||||||
cursor: pointer;
|
|
||||||
transition: background-color 0.15s;
|
|
||||||
line-height: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.prose :global(.code-copy-btn:hover) {
|
|
||||||
background: var(--color-secondary);
|
|
||||||
}
|
|
||||||
|
|
||||||
.prose :global(.code-copy-btn.copied) {
|
|
||||||
color: var(--color-success);
|
|
||||||
opacity: 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
.prose :global(code) {
|
.prose :global(code) {
|
||||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
||||||
font-size: 0.875em;
|
font-size: 0.875em;
|
||||||
|
|||||||
@@ -2,90 +2,20 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { generateImage } from "../../lib/imageApi";
|
import { generateImage } from "../../lib/imageApi";
|
||||||
import { generateSdImage, fetchSdLoras } from "../../lib/sdApi";
|
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||||
import type { ImageApiMode, SdApiLora, SdApiLoraRef } from "../../lib/types";
|
|
||||||
|
|
||||||
const selectedModelStore = persistentStore<string>("playground-image-model", "");
|
const selectedModelStore = persistentStore<string>("playground-image-model", "");
|
||||||
const selectedSizeStore = persistentStore<string>("playground-image-size", "1024x1024");
|
const selectedSizeStore = persistentStore<string>("playground-image-size", "1024x1024");
|
||||||
const apiModeStore = persistentStore<ImageApiMode>("playground-image-api-mode", "openai");
|
|
||||||
|
|
||||||
// SDAPI persistent settings
|
|
||||||
const sdNegativePromptStore = persistentStore<string>("playground-sdapi-negative-prompt", "");
|
|
||||||
const sdStepsStore = persistentStore<number>("playground-sdapi-steps", 20);
|
|
||||||
const sdCfgScaleStore = persistentStore<number>("playground-sdapi-cfg-scale", 7);
|
|
||||||
const sdSeedStore = persistentStore<number>("playground-sdapi-seed", -1);
|
|
||||||
const sdSamplerStore = persistentStore<string>("playground-sdapi-sampler", "");
|
|
||||||
const sdSchedulerStore = persistentStore<string>("playground-sdapi-scheduler", "");
|
|
||||||
const sdBatchSizeStore = persistentStore<number>("playground-sdapi-batch-size", 1);
|
|
||||||
|
|
||||||
let prompt = $state("");
|
let prompt = $state("");
|
||||||
let isGenerating = $state(false);
|
let isGenerating = $state(false);
|
||||||
let generatedImages = $state<string[]>([]);
|
let generatedImage = $state<string | null>(null);
|
||||||
let error = $state<string | null>(null);
|
let error = $state<string | null>(null);
|
||||||
let abortController = $state<AbortController | null>(null);
|
let abortController = $state<AbortController | null>(null);
|
||||||
let showFullscreen = $state(false);
|
let showFullscreen = $state(false);
|
||||||
let fullscreenIndex = $state(0);
|
|
||||||
let showSettings = $state(false);
|
|
||||||
|
|
||||||
// SDAPI lora state
|
|
||||||
let availableLoras = $state<SdApiLora[]>([]);
|
|
||||||
let selectedLoras = $state<SdApiLoraRef[]>([]);
|
|
||||||
let isLoadingLoras = $state(false);
|
|
||||||
let lorasLoaded = $state(false);
|
|
||||||
let loraError = $state<string | null>(null);
|
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
let isSdapi = $derived($apiModeStore === "sdapi");
|
|
||||||
|
|
||||||
$effect(() => {
|
|
||||||
playgroundStores.imageGenerating.set(isGenerating);
|
|
||||||
});
|
|
||||||
|
|
||||||
async function loadLoras() {
|
|
||||||
if (!$selectedModelStore || isLoadingLoras) return;
|
|
||||||
isLoadingLoras = true;
|
|
||||||
loraError = null;
|
|
||||||
try {
|
|
||||||
const loras = await fetchSdLoras($selectedModelStore);
|
|
||||||
availableLoras = loras;
|
|
||||||
lorasLoaded = true;
|
|
||||||
} catch (err) {
|
|
||||||
availableLoras = [];
|
|
||||||
loraError = err instanceof Error ? err.message : "Failed to load LoRAs";
|
|
||||||
lorasLoaded = false;
|
|
||||||
} finally {
|
|
||||||
isLoadingLoras = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function addLora(event: Event) {
|
|
||||||
const select = event.target as HTMLSelectElement;
|
|
||||||
const path = select.value;
|
|
||||||
if (!path) return;
|
|
||||||
|
|
||||||
const lora = availableLoras.find((l) => l.path === path);
|
|
||||||
if (lora && !selectedLoras.some((l) => l.path === path)) {
|
|
||||||
selectedLoras = [...selectedLoras, { path: lora.path, multiplier: 1.0 }];
|
|
||||||
}
|
|
||||||
select.value = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function removeLora(path: string) {
|
|
||||||
selectedLoras = selectedLoras.filter((l) => l.path !== path);
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateLoraMultiplier(path: string, multiplier: number) {
|
|
||||||
selectedLoras = selectedLoras.map((l) =>
|
|
||||||
l.path === path ? { ...l, multiplier } : l
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function getLoraName(path: string): string {
|
|
||||||
return availableLoras.find((l) => l.path === path)?.name ?? path;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function generate() {
|
async function generate() {
|
||||||
const trimmedPrompt = prompt.trim();
|
const trimmedPrompt = prompt.trim();
|
||||||
@@ -96,44 +26,19 @@
|
|||||||
abortController = new AbortController();
|
abortController = new AbortController();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (isSdapi) {
|
const response = await generateImage(
|
||||||
const [w, h] = $selectedSizeStore.split("x").map(Number);
|
$selectedModelStore,
|
||||||
const request = {
|
trimmedPrompt,
|
||||||
model: $selectedModelStore,
|
$selectedSizeStore,
|
||||||
prompt: trimmedPrompt,
|
abortController.signal
|
||||||
negative_prompt: $sdNegativePromptStore || undefined,
|
);
|
||||||
width: w,
|
|
||||||
height: h,
|
|
||||||
steps: $sdStepsStore,
|
|
||||||
cfg_scale: $sdCfgScaleStore,
|
|
||||||
seed: $sdSeedStore,
|
|
||||||
batch_size: $sdBatchSizeStore,
|
|
||||||
sampler_name: $sdSamplerStore || undefined,
|
|
||||||
scheduler: $sdSchedulerStore || undefined,
|
|
||||||
lora: selectedLoras.length > 0 ? selectedLoras : undefined,
|
|
||||||
};
|
|
||||||
|
|
||||||
const response = await generateSdImage(request, abortController.signal);
|
if (response.data && response.data.length > 0) {
|
||||||
if (response.images && response.images.length > 0) {
|
const imageData = response.data[0];
|
||||||
generatedImages = response.images.map(
|
if (imageData.b64_json) {
|
||||||
(img) => `data:image/png;base64,${img}`
|
generatedImage = `data:image/png;base64,${imageData.b64_json}`;
|
||||||
);
|
} else if (imageData.url) {
|
||||||
}
|
generatedImage = imageData.url;
|
||||||
} else {
|
|
||||||
const response = await generateImage(
|
|
||||||
$selectedModelStore,
|
|
||||||
trimmedPrompt,
|
|
||||||
$selectedSizeStore,
|
|
||||||
abortController.signal
|
|
||||||
);
|
|
||||||
|
|
||||||
if (response.data && response.data.length > 0) {
|
|
||||||
const imageData = response.data[0];
|
|
||||||
if (imageData.b64_json) {
|
|
||||||
generatedImages = [`data:image/png;base64,${imageData.b64_json}`];
|
|
||||||
} else if (imageData.url) {
|
|
||||||
generatedImages = [imageData.url];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -153,29 +58,28 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
function clearImage() {
|
function clearImage() {
|
||||||
generatedImages = [];
|
generatedImage = null;
|
||||||
error = null;
|
error = null;
|
||||||
prompt = "";
|
prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function downloadImage(index: number = 0) {
|
function downloadImage() {
|
||||||
const img = generatedImages[index];
|
if (!generatedImage) return;
|
||||||
if (!img) return;
|
|
||||||
|
|
||||||
const link = document.createElement("a");
|
const link = document.createElement("a");
|
||||||
link.href = img;
|
link.href = generatedImage;
|
||||||
link.download = `generated-image-${Date.now()}-${index}.png`;
|
link.download = `generated-image-${Date.now()}.png`;
|
||||||
document.body.appendChild(link);
|
document.body.appendChild(link);
|
||||||
link.click();
|
link.click();
|
||||||
document.body.removeChild(link);
|
document.body.removeChild(link);
|
||||||
}
|
}
|
||||||
|
|
||||||
function openFullscreen(index: number = 0) {
|
function openFullscreen() {
|
||||||
fullscreenIndex = index;
|
|
||||||
showFullscreen = true;
|
showFullscreen = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeFullscreen(event?: MouseEvent) {
|
function closeFullscreen(event?: MouseEvent) {
|
||||||
|
// Only close if clicking the background, not the image
|
||||||
if (event && event.target !== event.currentTarget) {
|
if (event && event.target !== event.currentTarget) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -191,19 +95,9 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex flex-col h-full">
|
<div class="flex flex-col h-full">
|
||||||
<!-- Model selector and mode toggle -->
|
<!-- Model selector -->
|
||||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} />
|
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} />
|
||||||
|
|
||||||
<select
|
|
||||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$apiModeStore}
|
|
||||||
disabled={isGenerating}
|
|
||||||
>
|
|
||||||
<option value="openai">OpenAI</option>
|
|
||||||
<option value="sdapi">SDAPI</option>
|
|
||||||
</select>
|
|
||||||
|
|
||||||
<select
|
<select
|
||||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||||
bind:value={$selectedSizeStore}
|
bind:value={$selectedSizeStore}
|
||||||
@@ -224,166 +118,8 @@
|
|||||||
<option value="1024x1792">1024x1792 (SDXL)</option>
|
<option value="1024x1792">1024x1792 (SDXL)</option>
|
||||||
</optgroup>
|
</optgroup>
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
{#if isSdapi}
|
|
||||||
<button
|
|
||||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors"
|
|
||||||
onclick={() => showSettings = !showSettings}
|
|
||||||
>
|
|
||||||
{showSettings ? "Hide Settings" : "Settings"}
|
|
||||||
</button>
|
|
||||||
{/if}
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- SDAPI Settings Panel -->
|
|
||||||
{#if isSdapi && showSettings}
|
|
||||||
<div class="shrink-0 mb-4 p-4 rounded border border-gray-200 dark:border-white/10 bg-surface">
|
|
||||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">Steps</span>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdStepsStore}
|
|
||||||
min="1"
|
|
||||||
max="150"
|
|
||||||
/>
|
|
||||||
</label>
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">CFG Scale</span>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdCfgScaleStore}
|
|
||||||
min="1"
|
|
||||||
max="30"
|
|
||||||
step="0.5"
|
|
||||||
/>
|
|
||||||
</label>
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">Seed (-1 = random)</span>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdSeedStore}
|
|
||||||
min="-1"
|
|
||||||
/>
|
|
||||||
</label>
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">Batch Size</span>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdBatchSizeStore}
|
|
||||||
min="1"
|
|
||||||
max="8"
|
|
||||||
/>
|
|
||||||
</label>
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">Sampler</span>
|
|
||||||
<select
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdSamplerStore}
|
|
||||||
>
|
|
||||||
<option value="">Default</option>
|
|
||||||
<option value="euler_a">euler_a</option>
|
|
||||||
<option value="euler">euler</option>
|
|
||||||
<option value="heun">heun</option>
|
|
||||||
<option value="dpm2">dpm2</option>
|
|
||||||
<option value="dpmpp2s_a">dpmpp2s_a</option>
|
|
||||||
<option value="dpmpp2m">dpmpp2m</option>
|
|
||||||
<option value="dpmpp2mv2">dpmpp2mv2</option>
|
|
||||||
<option value="ipndm">ipndm</option>
|
|
||||||
<option value="ipndm_v">ipndm_v</option>
|
|
||||||
<option value="lcm">lcm</option>
|
|
||||||
<option value="ddim_trailing">ddim_trailing</option>
|
|
||||||
<option value="tcd">tcd</option>
|
|
||||||
</select>
|
|
||||||
</label>
|
|
||||||
<label class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs text-txtsecondary">Scheduler</span>
|
|
||||||
<select
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
bind:value={$sdSchedulerStore}
|
|
||||||
>
|
|
||||||
<option value="">Auto for model</option>
|
|
||||||
<option value="discrete">discrete</option>
|
|
||||||
<option value="karras">karras</option>
|
|
||||||
<option value="exponential">exponential</option>
|
|
||||||
<option value="ays">ays</option>
|
|
||||||
<option value="gits">gits</option>
|
|
||||||
</select>
|
|
||||||
</label>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<label class="flex flex-col gap-1 mb-3">
|
|
||||||
<span class="text-xs text-txtsecondary">Negative Prompt</span>
|
|
||||||
<textarea
|
|
||||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-y text-sm"
|
|
||||||
bind:value={$sdNegativePromptStore}
|
|
||||||
rows="2"
|
|
||||||
placeholder="Elements to avoid..."
|
|
||||||
></textarea>
|
|
||||||
</label>
|
|
||||||
|
|
||||||
<!-- LoRA Selection -->
|
|
||||||
<div>
|
|
||||||
<span class="text-xs text-txtsecondary block mb-1">LoRAs</span>
|
|
||||||
<div class="flex items-center gap-2 mb-2">
|
|
||||||
<button
|
|
||||||
class="px-3 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors disabled:opacity-50"
|
|
||||||
onclick={loadLoras}
|
|
||||||
disabled={!$selectedModelStore || isLoadingLoras}
|
|
||||||
>
|
|
||||||
{isLoadingLoras ? "Loading..." : lorasLoaded ? "Reload LoRAs" : "Load LoRAs"}
|
|
||||||
</button>
|
|
||||||
{#if lorasLoaded && availableLoras.length > 0}
|
|
||||||
<select
|
|
||||||
class="flex-1 px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
onchange={addLora}
|
|
||||||
>
|
|
||||||
<option value="">Add a LoRA...</option>
|
|
||||||
{#each availableLoras.filter((l) => !selectedLoras.some((s) => s.path === l.path)) as lora}
|
|
||||||
<option value={lora.path}>{lora.name}</option>
|
|
||||||
{/each}
|
|
||||||
</select>
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
{#if loraError}
|
|
||||||
<p class="text-xs text-red-500 mb-1">{loraError}</p>
|
|
||||||
{/if}
|
|
||||||
{#if lorasLoaded && availableLoras.length === 0}
|
|
||||||
<p class="text-xs text-txtsecondary">No LoRAs available</p>
|
|
||||||
{/if}
|
|
||||||
{#if selectedLoras.length > 0}
|
|
||||||
<div class="flex flex-col gap-1.5">
|
|
||||||
{#each selectedLoras as lora}
|
|
||||||
<div class="flex items-center gap-2 text-sm">
|
|
||||||
<span class="flex-1 truncate">{getLoraName(lora.path)}</span>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
class="w-20 px-1.5 py-1 text-xs rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-1 focus:ring-primary"
|
|
||||||
value={lora.multiplier}
|
|
||||||
oninput={(e) => updateLoraMultiplier(lora.path, parseFloat((e.target as HTMLInputElement).value) || 1)}
|
|
||||||
min="0"
|
|
||||||
max="2"
|
|
||||||
step="0.1"
|
|
||||||
/>
|
|
||||||
<button
|
|
||||||
class="px-1.5 py-0.5 text-xs rounded border border-gray-200 dark:border-white/10 hover:bg-red-500 hover:text-white hover:border-red-500 transition-colors"
|
|
||||||
onclick={() => removeLora(lora.path)}
|
|
||||||
aria-label="Remove LoRA"
|
|
||||||
>
|
|
||||||
x
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
{/each}
|
|
||||||
</div>
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{/if}
|
|
||||||
|
|
||||||
<!-- Empty state for no models configured -->
|
<!-- Empty state for no models configured -->
|
||||||
{#if !hasModels}
|
{#if !hasModels}
|
||||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||||
@@ -402,50 +138,22 @@
|
|||||||
<p class="font-medium">Error</p>
|
<p class="font-medium">Error</p>
|
||||||
<p class="text-sm mt-1">{error}</p>
|
<p class="text-sm mt-1">{error}</p>
|
||||||
</div>
|
</div>
|
||||||
{:else if generatedImages.length > 1}
|
{:else if generatedImage}
|
||||||
<!-- Grid for multiple images (batch) -->
|
|
||||||
<div class="grid grid-cols-2 gap-2 p-2 w-full h-full overflow-auto">
|
|
||||||
{#each generatedImages as img, i}
|
|
||||||
<div class="relative flex items-center justify-center">
|
|
||||||
<button
|
|
||||||
class="p-0 border-0 bg-transparent cursor-pointer"
|
|
||||||
onclick={() => openFullscreen(i)}
|
|
||||||
aria-label="View fullscreen"
|
|
||||||
>
|
|
||||||
<img
|
|
||||||
src={img}
|
|
||||||
alt="AI generated content {i + 1}"
|
|
||||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
|
||||||
/>
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
class="absolute bottom-2 right-2 p-1.5 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
|
||||||
onclick={(e) => { e.stopPropagation(); downloadImage(i); }}
|
|
||||||
aria-label="Download image"
|
|
||||||
>
|
|
||||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
|
||||||
</svg>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
{/each}
|
|
||||||
</div>
|
|
||||||
{:else if generatedImages.length === 1}
|
|
||||||
<div class="relative max-w-full max-h-full flex items-center justify-center">
|
<div class="relative max-w-full max-h-full flex items-center justify-center">
|
||||||
<button
|
<button
|
||||||
class="p-0 border-0 bg-transparent cursor-pointer"
|
class="p-0 border-0 bg-transparent cursor-pointer"
|
||||||
onclick={() => openFullscreen(0)}
|
onclick={openFullscreen}
|
||||||
aria-label="View fullscreen"
|
aria-label="View fullscreen"
|
||||||
>
|
>
|
||||||
<img
|
<img
|
||||||
src={generatedImages[0]}
|
src={generatedImage}
|
||||||
alt="AI generated content"
|
alt="AI generated content"
|
||||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
||||||
/>
|
/>
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
class="absolute bottom-2 right-2 p-2 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
class="absolute bottom-2 right-2 p-2 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
||||||
onclick={(e) => { e.stopPropagation(); downloadImage(0); }}
|
onclick={(e) => { e.stopPropagation(); downloadImage(); }}
|
||||||
aria-label="Download image"
|
aria-label="Download image"
|
||||||
>
|
>
|
||||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
@@ -485,7 +193,7 @@
|
|||||||
<button
|
<button
|
||||||
class="btn flex-1 md:flex-none"
|
class="btn flex-1 md:flex-none"
|
||||||
onclick={clearImage}
|
onclick={clearImage}
|
||||||
disabled={generatedImages.length === 0 && !error && !prompt.trim()}
|
disabled={!generatedImage && !error && !prompt.trim()}
|
||||||
>
|
>
|
||||||
Clear
|
Clear
|
||||||
</button>
|
</button>
|
||||||
@@ -496,7 +204,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Fullscreen dialog -->
|
<!-- Fullscreen dialog -->
|
||||||
{#if showFullscreen && generatedImages[fullscreenIndex]}
|
{#if showFullscreen && generatedImage}
|
||||||
<div
|
<div
|
||||||
class="fixed inset-0 bg-black/90 z-50 flex items-center justify-center p-4"
|
class="fixed inset-0 bg-black/90 z-50 flex items-center justify-center p-4"
|
||||||
onclick={(e) => closeFullscreen(e)}
|
onclick={(e) => closeFullscreen(e)}
|
||||||
@@ -513,7 +221,7 @@
|
|||||||
×
|
×
|
||||||
</button>
|
</button>
|
||||||
<img
|
<img
|
||||||
src={generatedImages[fullscreenIndex]}
|
src={generatedImage}
|
||||||
alt="AI generated content"
|
alt="AI generated content"
|
||||||
class="max-w-full max-h-full object-contain pointer-events-none"
|
class="max-w-full max-h-full object-contain pointer-events-none"
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -25,11 +25,6 @@
|
|||||||
<optgroup label="Local">
|
<optgroup label="Local">
|
||||||
{#each grouped.local as model (model.id)}
|
{#each grouped.local as model (model.id)}
|
||||||
<option value={model.id}>{model.id}</option>
|
<option value={model.id}>{model.id}</option>
|
||||||
{#if model.aliases}
|
|
||||||
{#each model.aliases as alias (alias)}
|
|
||||||
<option value={alias}> ↳ {alias}</option>
|
|
||||||
{/each}
|
|
||||||
{/if}
|
|
||||||
{/each}
|
{/each}
|
||||||
</optgroup>
|
</optgroup>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -1,406 +0,0 @@
|
|||||||
<script lang="ts">
|
|
||||||
import { models } from "../../stores/api";
|
|
||||||
import { persistentStore } from "../../stores/persistent";
|
|
||||||
import { rerank } from "../../lib/rerankApi";
|
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
|
||||||
|
|
||||||
type RerankRow = { doc: string; score: number | null };
|
|
||||||
type SortOrder = "none" | "asc" | "desc";
|
|
||||||
type EditorMode = "table" | "json";
|
|
||||||
|
|
||||||
const selectedModelStore = persistentStore<string>("playground-rerank-model", "");
|
|
||||||
|
|
||||||
const defaultQuery = "How do LLM's work?";
|
|
||||||
const defaultDocs = [
|
|
||||||
"Large language models (LLMs) use transformer architectures to predict the next token in a sequence based on massive amounts of text data.",
|
|
||||||
"LLMs are trained on diverse internet text, learning statistical patterns of language that allow them to generate coherent responses.",
|
|
||||||
"During training, LLMs minimize a loss function that measures the difference between predicted and actual tokens across billions of examples.",
|
|
||||||
"Attention mechanisms in transformers enable LLMs to weigh the importance of different words when generating output.",
|
|
||||||
"Fine\u2011tuning allows a pre\u2011trained LLM to adapt to a specific downstream task with a smaller dataset.",
|
|
||||||
"Neural networks consist of layers of interconnected neurons that adjust their weights during back\u2011propagation.",
|
|
||||||
"The history of the Roman Empire spanned over a thousand years.",
|
|
||||||
"Soccer is the most popular sport in many countries around the world.",
|
|
||||||
"Quantum computing uses qubits to perform calculations that are intractable for classical computers.",
|
|
||||||
];
|
|
||||||
|
|
||||||
let query = $state(defaultQuery);
|
|
||||||
let rows = $state<RerankRow[]>([
|
|
||||||
...defaultDocs.map((doc) => ({ doc, score: null })),
|
|
||||||
{ doc: "", score: null },
|
|
||||||
]);
|
|
||||||
let isLoading = $state(false);
|
|
||||||
let error = $state<string | null>(null);
|
|
||||||
let usage = $state<{ prompt_tokens: number; total_tokens: number } | null>(null);
|
|
||||||
let abortController: AbortController | null = null;
|
|
||||||
let sortOrder = $state<SortOrder>("desc");
|
|
||||||
let editorMode = $state<EditorMode>("table");
|
|
||||||
let jsonText = $state("");
|
|
||||||
let jsonError = $state<string | null>(null);
|
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
|
||||||
|
|
||||||
let canSubmit = $derived((() => {
|
|
||||||
if (!$selectedModelStore || isLoading) return false;
|
|
||||||
if (editorMode === "json") {
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
|
||||||
return (
|
|
||||||
typeof parsed.query === "string" &&
|
|
||||||
parsed.query.trim() !== "" &&
|
|
||||||
Array.isArray(parsed.documents) &&
|
|
||||||
(parsed.documents as unknown[]).some(
|
|
||||||
(d) => typeof d === "string" && (d as string).trim() !== ""
|
|
||||||
)
|
|
||||||
);
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return query.trim() !== "" && rows.some((r) => r.doc.trim() !== "");
|
|
||||||
})());
|
|
||||||
|
|
||||||
// Display rows with sort applied (display-only transform, rows[] is never mutated by sorting)
|
|
||||||
let displayRows = $derived((() => {
|
|
||||||
const indexed = rows.map((row, i) => ({ row, i }));
|
|
||||||
if (sortOrder === "none") return indexed;
|
|
||||||
return [...indexed].sort((a, b) => {
|
|
||||||
if (a.row.score === null && b.row.score === null) return 0;
|
|
||||||
if (a.row.score === null) return 1;
|
|
||||||
if (b.row.score === null) return -1;
|
|
||||||
return sortOrder === "desc"
|
|
||||||
? b.row.score - a.row.score
|
|
||||||
: a.row.score - b.row.score;
|
|
||||||
});
|
|
||||||
})());
|
|
||||||
|
|
||||||
// Auto-add a new empty row when the last row gets content (table mode only)
|
|
||||||
$effect(() => {
|
|
||||||
if (editorMode === "table" && rows[rows.length - 1]?.doc.trim() !== "") {
|
|
||||||
rows = [...rows, { doc: "", score: null }];
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Sync loading state to activity store
|
|
||||||
$effect(() => {
|
|
||||||
playgroundStores.rerankLoading.set(isLoading);
|
|
||||||
});
|
|
||||||
|
|
||||||
function switchToJson() {
|
|
||||||
if (editorMode === "json") return;
|
|
||||||
const docs = rows.filter((r) => r.doc.trim() !== "").map((r) => r.doc);
|
|
||||||
jsonText = JSON.stringify({ query, documents: docs }, null, 2);
|
|
||||||
jsonError = null;
|
|
||||||
editorMode = "json";
|
|
||||||
}
|
|
||||||
|
|
||||||
function switchToTable() {
|
|
||||||
if (editorMode === "table") return;
|
|
||||||
if (jsonText.trim() === "") {
|
|
||||||
query = "";
|
|
||||||
rows = [{ doc: "", score: null }];
|
|
||||||
jsonError = null;
|
|
||||||
editorMode = "table";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(jsonText) as unknown;
|
|
||||||
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
||||||
throw new Error("Expected a JSON object");
|
|
||||||
}
|
|
||||||
const obj = parsed as Record<string, unknown>;
|
|
||||||
if (typeof obj.query !== "string") throw new Error('"query" must be a string');
|
|
||||||
if (!Array.isArray(obj.documents)) throw new Error('"documents" must be an array');
|
|
||||||
query = obj.query;
|
|
||||||
const newRows: RerankRow[] = (obj.documents as unknown[]).map((d) => ({
|
|
||||||
doc: typeof d === "string" ? d : String(d),
|
|
||||||
score: null,
|
|
||||||
}));
|
|
||||||
if (newRows.length === 0 || newRows[newRows.length - 1].doc.trim() !== "") {
|
|
||||||
newRows.push({ doc: "", score: null });
|
|
||||||
}
|
|
||||||
rows = newRows;
|
|
||||||
jsonError = null;
|
|
||||||
editorMode = "table";
|
|
||||||
} catch (err) {
|
|
||||||
jsonError = err instanceof Error ? err.message : "Invalid JSON";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function cycleSortOrder() {
|
|
||||||
sortOrder = sortOrder === "none" ? "desc" : sortOrder === "desc" ? "asc" : "none";
|
|
||||||
}
|
|
||||||
|
|
||||||
function sortIndicator(): string {
|
|
||||||
if (sortOrder === "desc") return " ↓";
|
|
||||||
if (sortOrder === "asc") return " ↑";
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
async function submit() {
|
|
||||||
if (!canSubmit) return;
|
|
||||||
|
|
||||||
let submitQuery: string;
|
|
||||||
let nonEmptyEntries: { originalIndex: number; doc: string }[];
|
|
||||||
|
|
||||||
if (editorMode === "json") {
|
|
||||||
// Parse JSON, sync state to table, then submit
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
|
||||||
submitQuery = parsed.query as string;
|
|
||||||
const docs = (parsed.documents as string[]).filter((d) => d.trim() !== "");
|
|
||||||
const newRows: RerankRow[] = docs.map((d) => ({ doc: d, score: null }));
|
|
||||||
newRows.push({ doc: "", score: null });
|
|
||||||
rows = newRows;
|
|
||||||
query = submitQuery;
|
|
||||||
editorMode = "table";
|
|
||||||
} catch {
|
|
||||||
error = "Invalid JSON — fix before submitting";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
nonEmptyEntries = rows
|
|
||||||
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
|
||||||
.filter((e) => e.doc.trim() !== "");
|
|
||||||
} else {
|
|
||||||
submitQuery = query;
|
|
||||||
nonEmptyEntries = rows
|
|
||||||
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
|
||||||
.filter((e) => e.doc.trim() !== "");
|
|
||||||
}
|
|
||||||
|
|
||||||
isLoading = true;
|
|
||||||
error = null;
|
|
||||||
usage = null;
|
|
||||||
|
|
||||||
// Clear previous scores
|
|
||||||
rows = rows.map((r) => ({ ...r, score: null }));
|
|
||||||
|
|
||||||
abortController = new AbortController();
|
|
||||||
|
|
||||||
try {
|
|
||||||
const response = await rerank(
|
|
||||||
$selectedModelStore,
|
|
||||||
submitQuery,
|
|
||||||
nonEmptyEntries.map((e) => e.doc),
|
|
||||||
abortController.signal
|
|
||||||
);
|
|
||||||
|
|
||||||
usage = response.usage;
|
|
||||||
|
|
||||||
// Map result.index (position in submitted docs array) back to original rows[] index
|
|
||||||
const updated = rows.map((r) => ({ ...r }));
|
|
||||||
for (const result of response.results) {
|
|
||||||
const entry = nonEmptyEntries[result.index];
|
|
||||||
if (entry !== undefined) {
|
|
||||||
updated[entry.originalIndex].score = result.relevance_score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rows = updated;
|
|
||||||
} catch (err) {
|
|
||||||
if (err instanceof Error && err.name === "AbortError") {
|
|
||||||
// User cancelled
|
|
||||||
} else {
|
|
||||||
error = err instanceof Error ? err.message : "An error occurred";
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
isLoading = false;
|
|
||||||
abortController = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function cancel() {
|
|
||||||
abortController?.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
function clear() {
|
|
||||||
query = defaultQuery;
|
|
||||||
rows = [...defaultDocs.map((doc) => ({ doc, score: null })), { doc: "", score: null }];
|
|
||||||
error = null;
|
|
||||||
usage = null;
|
|
||||||
sortOrder = "desc";
|
|
||||||
jsonText = "";
|
|
||||||
jsonError = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
function deleteRow(originalIndex: number) {
|
|
||||||
if (rows.length <= 1) return;
|
|
||||||
rows = rows.filter((_, i) => i !== originalIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateDoc(originalIndex: number, value: string) {
|
|
||||||
const updated = rows.map((r) => ({ ...r }));
|
|
||||||
updated[originalIndex].doc = value;
|
|
||||||
rows = updated;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scoreColor(score: number | null): string {
|
|
||||||
if (score === null) return "text-txtsecondary";
|
|
||||||
if (score > 0) return "text-green-600 dark:text-green-400";
|
|
||||||
return "text-red-500 dark:text-red-400";
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatScore(score: number | null): string {
|
|
||||||
if (score === null) return "—";
|
|
||||||
return score.toFixed(3);
|
|
||||||
}
|
|
||||||
|
|
||||||
function handleKeyDown(e: KeyboardEvent) {
|
|
||||||
if (e.key === "Enter" && !e.shiftKey) {
|
|
||||||
e.preventDefault();
|
|
||||||
submit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let isCleared = $derived(
|
|
||||||
query === defaultQuery &&
|
|
||||||
rows.every((r, i) => r.score === null && r.doc === (defaultDocs[i] ?? "")) &&
|
|
||||||
rows.length === defaultDocs.length + 1 &&
|
|
||||||
!jsonText.trim() &&
|
|
||||||
!error &&
|
|
||||||
!usage
|
|
||||||
);
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<div class="flex flex-col h-full">
|
|
||||||
<!-- Top bar: model selector + query input (table mode) + mode toggle -->
|
|
||||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
|
||||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a rerank model..." disabled={isLoading} />
|
|
||||||
{#if editorMode === "table"}
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
class="min-w-0 flex-1 basis-48 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
|
||||||
placeholder="Query..."
|
|
||||||
bind:value={query}
|
|
||||||
disabled={isLoading}
|
|
||||||
onkeydown={handleKeyDown}
|
|
||||||
/>
|
|
||||||
{/if}
|
|
||||||
<!-- Table / JSON toggle -->
|
|
||||||
<div class="flex rounded border border-gray-200 dark:border-white/10 overflow-hidden shrink-0">
|
|
||||||
<button
|
|
||||||
class="px-3 py-1.5 text-sm transition-colors {editorMode === 'table'
|
|
||||||
? 'bg-primary text-btn-primary-text'
|
|
||||||
: 'bg-surface hover:bg-secondary-hover'}"
|
|
||||||
onclick={switchToTable}
|
|
||||||
disabled={isLoading}
|
|
||||||
>
|
|
||||||
Table
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
class="px-3 py-1.5 text-sm border-l border-gray-200 dark:border-white/10 transition-colors {editorMode === 'json'
|
|
||||||
? 'bg-primary text-btn-primary-text'
|
|
||||||
: 'bg-surface hover:bg-secondary-hover'}"
|
|
||||||
onclick={switchToJson}
|
|
||||||
disabled={isLoading}
|
|
||||||
>
|
|
||||||
JSON
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{#if !hasModels}
|
|
||||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
|
||||||
<p>No models configured. Add models to your configuration to use reranking.</p>
|
|
||||||
</div>
|
|
||||||
{:else if editorMode === "json"}
|
|
||||||
<!-- JSON editor -->
|
|
||||||
<div class="flex-1 flex flex-col min-h-0 mb-4">
|
|
||||||
<textarea
|
|
||||||
class="flex-1 w-full font-mono text-sm px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
|
||||||
bind:value={jsonText}
|
|
||||||
disabled={isLoading}
|
|
||||||
placeholder={'{\n "query": "your search query",\n "documents": [\n "document one",\n "document two"\n ]\n}'}
|
|
||||||
spellcheck={false}
|
|
||||||
></textarea>
|
|
||||||
{#if jsonError}
|
|
||||||
<p class="mt-1 text-sm text-red-500">{jsonError}</p>
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
{:else}
|
|
||||||
<!-- Document table -->
|
|
||||||
<div class="flex-1 overflow-y-auto mb-4 border border-gray-200 dark:border-white/10 rounded">
|
|
||||||
<table class="w-full border-collapse table-fixed">
|
|
||||||
<colgroup>
|
|
||||||
<col class="w-auto" />
|
|
||||||
<col style="width: 120px" />
|
|
||||||
<col style="width: 40px" />
|
|
||||||
</colgroup>
|
|
||||||
<thead class="sticky top-0 bg-surface border-b border-gray-200 dark:border-white/10">
|
|
||||||
<tr>
|
|
||||||
<th class="px-3 py-2 text-left text-sm font-medium text-txtsecondary">Document</th>
|
|
||||||
<th
|
|
||||||
class="px-3 py-2 text-right text-sm font-medium text-txtsecondary cursor-pointer select-none hover:text-txtprimary transition-colors"
|
|
||||||
onclick={cycleSortOrder}
|
|
||||||
>
|
|
||||||
Score{sortIndicator()}
|
|
||||||
</th>
|
|
||||||
<th class="px-2 py-2"></th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{#each displayRows as { row, i } (i)}
|
|
||||||
<tr class="border-b border-gray-100 dark:border-white/5 last:border-0">
|
|
||||||
<td class="px-3 py-1.5">
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
class="w-full bg-transparent focus:outline-none focus:ring-1 focus:ring-primary rounded px-1 py-0.5"
|
|
||||||
placeholder={i === rows.length - 1 ? "Add document..." : "Document text..."}
|
|
||||||
value={row.doc}
|
|
||||||
oninput={(e) => updateDoc(i, (e.target as HTMLInputElement).value)}
|
|
||||||
disabled={isLoading}
|
|
||||||
onkeydown={handleKeyDown}
|
|
||||||
/>
|
|
||||||
</td>
|
|
||||||
<td class="px-3 py-1.5 text-right font-mono text-sm {scoreColor(row.score)}">
|
|
||||||
{#if isLoading && row.score === null && row.doc.trim() !== ""}
|
|
||||||
<span class="inline-block w-4 h-4 border-2 border-current border-t-transparent rounded-full animate-spin align-middle"></span>
|
|
||||||
{:else}
|
|
||||||
{formatScore(row.score)}
|
|
||||||
{/if}
|
|
||||||
</td>
|
|
||||||
<td class="px-2 py-1.5 text-center">
|
|
||||||
<button
|
|
||||||
class="w-7 h-7 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
|
||||||
onclick={() => deleteRow(i)}
|
|
||||||
disabled={rows.length <= 1}
|
|
||||||
tabindex="-1"
|
|
||||||
aria-label="Remove row"
|
|
||||||
>
|
|
||||||
×
|
|
||||||
</button>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{/each}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
{/if}
|
|
||||||
|
|
||||||
<!-- Bottom toolbar -->
|
|
||||||
{#if hasModels}
|
|
||||||
<div class="shrink-0 flex flex-wrap items-center gap-2">
|
|
||||||
{#if isLoading}
|
|
||||||
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancel}>
|
|
||||||
Cancel
|
|
||||||
</button>
|
|
||||||
{:else}
|
|
||||||
<button
|
|
||||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
|
||||||
onclick={submit}
|
|
||||||
disabled={!canSubmit}
|
|
||||||
>
|
|
||||||
Rerank
|
|
||||||
</button>
|
|
||||||
<button class="btn" onclick={clear} disabled={isCleared}>
|
|
||||||
Clear
|
|
||||||
</button>
|
|
||||||
{/if}
|
|
||||||
|
|
||||||
{#if error}
|
|
||||||
<span class="text-sm text-red-500 ml-2">{error}</span>
|
|
||||||
{:else if usage}
|
|
||||||
<span class="text-sm text-txtsecondary ml-2">{usage.total_tokens} tokens</span>
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
@@ -2,7 +2,6 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { generateSpeech } from "../../lib/speechApi";
|
import { generateSpeech } from "../../lib/speechApi";
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||||
|
|
||||||
@@ -21,9 +20,11 @@
|
|||||||
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
|
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
|
||||||
let isLoadingVoices = $state(false);
|
let isLoadingVoices = $state(false);
|
||||||
|
|
||||||
|
// Default voices to fall back to if API call fails
|
||||||
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
||||||
const CACHE_KEY = "playground-speech-voices-cache";
|
const CACHE_KEY = "playground-speech-voices-cache";
|
||||||
|
|
||||||
|
// Load voices cache from localStorage
|
||||||
function getVoicesCache(): Record<string, string[]> {
|
function getVoicesCache(): Record<string, string[]> {
|
||||||
if (typeof window === "undefined") return {};
|
if (typeof window === "undefined") return {};
|
||||||
try {
|
try {
|
||||||
@@ -34,6 +35,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save voices cache to localStorage
|
||||||
function saveVoicesCache(cache: Record<string, string[]>) {
|
function saveVoicesCache(cache: Record<string, string[]>) {
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
try {
|
try {
|
||||||
@@ -45,12 +47,9 @@
|
|||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
|
||||||
|
// Track if this is the initial page load to avoid fetching on refresh
|
||||||
let isInitialLoad = $state(true);
|
let isInitialLoad = $state(true);
|
||||||
|
|
||||||
$effect(() => {
|
|
||||||
playgroundStores.speechGenerating.set(isGenerating);
|
|
||||||
});
|
|
||||||
|
|
||||||
// On page load, restore cached voices for the selected model if available
|
// On page load, restore cached voices for the selected model if available
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
const model = $selectedModelStore;
|
const model = $selectedModelStore;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect } from "vitest";
|
import { describe, it, expect } from "vitest";
|
||||||
import { renderMarkdown, escapeHtml, splitCompleteBlocks, closePendingBlock, normalizeLatexDelimiters, renderStreamingMarkdown, createStreamingCache } from "./markdown";
|
import { renderMarkdown, escapeHtml } from "./markdown";
|
||||||
|
|
||||||
describe("renderMarkdown", () => {
|
describe("renderMarkdown", () => {
|
||||||
describe("basic markdown", () => {
|
describe("basic markdown", () => {
|
||||||
@@ -130,35 +130,6 @@ More text here.
|
|||||||
expect(result).toContain("katex");
|
expect(result).toContain("katex");
|
||||||
expect(result).toContain("sqrt");
|
expect(result).toContain("sqrt");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("renders \\[...\\] display math", () => {
|
|
||||||
const result = renderMarkdown("\\[\nx^2 + y^2 = z^2\n\\]");
|
|
||||||
expect(result).toContain("katex");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("renders \\(...\\) inline math", () => {
|
|
||||||
const result = renderMarkdown("The equation \\(E = mc^2\\) is famous.");
|
|
||||||
expect(result).toContain("katex");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("normalizeLatexDelimiters", () => {
|
|
||||||
it("converts \\[...\\] to $$...$$", () => {
|
|
||||||
expect(normalizeLatexDelimiters("\\[\nx^2\n\\]")).toBe("$$\nx^2\n$$");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("converts \\(...\\) to $...$", () => {
|
|
||||||
expect(normalizeLatexDelimiters("\\(x^2\\)")).toBe("$x^2$");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("leaves $$ and $ delimiters unchanged", () => {
|
|
||||||
const text = "$$x^2$$ and $y$";
|
|
||||||
expect(normalizeLatexDelimiters(text)).toBe(text);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("handles multiple occurrences", () => {
|
|
||||||
expect(normalizeLatexDelimiters("\\(a\\) and \\(b\\)")).toBe("$a$ and $b$");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("escapeHtml", () => {
|
describe("escapeHtml", () => {
|
||||||
@@ -187,237 +158,3 @@ More text here.
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("splitCompleteBlocks", () => {
|
|
||||||
it("returns everything as pending when no blank line", () => {
|
|
||||||
const result = splitCompleteBlocks("Hello world");
|
|
||||||
expect(result.complete).toBe("");
|
|
||||||
expect(result.pending).toBe("Hello world");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("returns empty for empty input", () => {
|
|
||||||
const result = splitCompleteBlocks("");
|
|
||||||
expect(result.complete).toBe("");
|
|
||||||
expect(result.pending).toBe("");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("splits on blank line between paragraphs", () => {
|
|
||||||
const result = splitCompleteBlocks("First paragraph.\n\nSecond paragraph");
|
|
||||||
expect(result.complete).toBe("First paragraph.\n");
|
|
||||||
expect(result.pending).toBe("Second paragraph");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("splits multiple paragraphs at last blank line", () => {
|
|
||||||
const result = splitCompleteBlocks("Para 1.\n\nPara 2.\n\nPara 3");
|
|
||||||
expect(result.complete).toBe("Para 1.\n\nPara 2.\n");
|
|
||||||
expect(result.pending).toBe("Para 3");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats closed code fence as complete boundary", () => {
|
|
||||||
const text = "```js\nconst x = 1;\n```\nMore text";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("```js\nconst x = 1;\n```");
|
|
||||||
expect(result.pending).toBe("More text");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats unclosed code fence as pending", () => {
|
|
||||||
const text = "Done paragraph.\n\n```js\nconst x = 1;";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("Done paragraph.\n");
|
|
||||||
expect(result.pending).toBe("```js\nconst x = 1;");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does not split on blank lines inside code fences", () => {
|
|
||||||
const text = "```\nline1\n\nline2\n```";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("```\nline1\n\nline2\n```");
|
|
||||||
expect(result.pending).toBe("");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("handles tilde fences", () => {
|
|
||||||
const text = "~~~py\nprint('hi')\n~~~\nAfter";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("~~~py\nprint('hi')\n~~~");
|
|
||||||
expect(result.pending).toBe("After");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does not close backtick fence with tilde fence", () => {
|
|
||||||
const text = "```\ncode\n~~~\nstill code";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
// The ~~~ should not close a backtick fence, so everything from ``` onward is pending
|
|
||||||
expect(result.complete).toBe("");
|
|
||||||
expect(result.pending).toBe("```\ncode\n~~~\nstill code");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats closed math block as complete boundary", () => {
|
|
||||||
const text = "$$\nx^2\n$$\nAfter";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("$$\nx^2\n$$");
|
|
||||||
expect(result.pending).toBe("After");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats unclosed math block as pending", () => {
|
|
||||||
const text = "Before.\n\n$$\nx^2";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("Before.\n");
|
|
||||||
expect(result.pending).toBe("$$\nx^2");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats closed \\[...\\] math block as complete boundary", () => {
|
|
||||||
const text = "\\[\nx^2\n\\]\nAfter";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("\\[\nx^2\n\\]");
|
|
||||||
expect(result.pending).toBe("After");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats unclosed \\[ math block as pending", () => {
|
|
||||||
const text = "Before.\n\n\\[\nx^2";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
expect(result.complete).toBe("Before.\n");
|
|
||||||
expect(result.pending).toBe("\\[\nx^2");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("handles trailing blank line making everything complete", () => {
|
|
||||||
const text = "Hello world.\n";
|
|
||||||
const result = splitCompleteBlocks(text);
|
|
||||||
// Last line is empty string after split, which is a blank line
|
|
||||||
expect(result.complete).toBe("Hello world.\n");
|
|
||||||
expect(result.pending).toBe("");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("closePendingBlock", () => {
|
|
||||||
it("returns empty string for empty input", () => {
|
|
||||||
expect(closePendingBlock("")).toBe("");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("returns plain text unchanged", () => {
|
|
||||||
expect(closePendingBlock("Hello world")).toBe("Hello world");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("closes an open backtick code fence", () => {
|
|
||||||
const result = closePendingBlock("```python\nprint('hi')");
|
|
||||||
expect(result).toBe("```python\nprint('hi')\n```");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("closes an open tilde code fence", () => {
|
|
||||||
const result = closePendingBlock("~~~js\nconst x = 1;");
|
|
||||||
expect(result).toBe("~~~js\nconst x = 1;\n~~~");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does not modify already-closed code fence", () => {
|
|
||||||
const text = "```py\ncode\n```";
|
|
||||||
expect(closePendingBlock(text)).toBe(text);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("closes an open math block", () => {
|
|
||||||
const result = closePendingBlock("$$\nx^2 + y^2");
|
|
||||||
expect(result).toBe("$$\nx^2 + y^2\n$$");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does not modify already-closed math block", () => {
|
|
||||||
const text = "$$\nx^2\n$$";
|
|
||||||
expect(closePendingBlock(text)).toBe(text);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("closes an open \\[ math block with \\]", () => {
|
|
||||||
const result = closePendingBlock("\\[\nx^2 + y^2");
|
|
||||||
expect(result).toBe("\\[\nx^2 + y^2\n\\]");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does not modify already-closed \\[...\\] math block", () => {
|
|
||||||
const text = "\\[\nx^2\n\\]";
|
|
||||||
expect(closePendingBlock(text)).toBe(text);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("closes code fence when preceded by regular text", () => {
|
|
||||||
const result = closePendingBlock("Some text\n```\ncode");
|
|
||||||
expect(result).toBe("Some text\n```\ncode\n```");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("leaves headers unchanged", () => {
|
|
||||||
expect(closePendingBlock("## Hello")).toBe("## Hello");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("leaves tables unchanged", () => {
|
|
||||||
const table = "| a | b |\n| --- | --- |\n| 1 | 2 |";
|
|
||||||
expect(closePendingBlock(table)).toBe(table);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("leaves lists unchanged", () => {
|
|
||||||
expect(closePendingBlock("- item 1\n- item 2")).toBe("- item 1\n- item 2");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("renderStreamingMarkdown", () => {
|
|
||||||
it("renders complete blocks and pending as markdown", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
const text = "# Hello\n\nWorld";
|
|
||||||
const { blocks, pendingHtml } = renderStreamingMarkdown(text, cache);
|
|
||||||
expect(blocks).toHaveLength(1);
|
|
||||||
expect(blocks[0].html).toContain("<h1>Hello</h1>");
|
|
||||||
expect(pendingHtml).toContain("World");
|
|
||||||
expect(pendingHtml).toContain("<p>");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("preserves existing blocks when complete portion is unchanged", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
renderStreamingMarkdown("# Hello\n\nWor", cache);
|
|
||||||
const firstBlocks = cache.blocks;
|
|
||||||
|
|
||||||
const { blocks } = renderStreamingMarkdown("# Hello\n\nWorld", cache);
|
|
||||||
// Same block array reference — nothing changed in the complete section
|
|
||||||
expect(blocks).toBe(firstBlocks);
|
|
||||||
expect(cache.completeKey).toBe("# Hello\n");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("appends a new block when a new section completes", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
renderStreamingMarkdown("# Hello\n\nParagraph", cache);
|
|
||||||
expect(cache.blocks).toHaveLength(1);
|
|
||||||
const firstBlock = cache.blocks[0];
|
|
||||||
|
|
||||||
renderStreamingMarkdown("# Hello\n\nParagraph.\n\nMore", cache);
|
|
||||||
expect(cache.blocks).toHaveLength(2);
|
|
||||||
// First block is preserved with the same id and html
|
|
||||||
expect(cache.blocks[0].id).toBe(firstBlock.id);
|
|
||||||
expect(cache.blocks[0].html).toBe(firstBlock.html);
|
|
||||||
// Second block contains the new paragraph
|
|
||||||
expect(cache.blocks[1].html).toContain("Paragraph.");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("assigns unique stable ids to each block", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
renderStreamingMarkdown("A.\n\nB.\n\nC", cache);
|
|
||||||
expect(cache.blocks).toHaveLength(1);
|
|
||||||
const id0 = cache.blocks[0].id;
|
|
||||||
|
|
||||||
renderStreamingMarkdown("A.\n\nB.\n\nC.\n\nD", cache);
|
|
||||||
expect(cache.blocks).toHaveLength(2);
|
|
||||||
expect(cache.blocks[0].id).toBe(id0);
|
|
||||||
expect(cache.blocks[1].id).toBe(id0 + 1);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("renders pending code block with syntax highlighting", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
const text = "Done.\n\n```python\nprint('hello')";
|
|
||||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
|
||||||
expect(pendingHtml).toContain("<code");
|
|
||||||
expect(pendingHtml).toContain("hljs");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("renders pending table as markdown", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
const text = "Done.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |";
|
|
||||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
|
||||||
expect(pendingHtml).toContain("<table>");
|
|
||||||
expect(pendingHtml).toContain("<td>");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("renders pending portion through markdown pipeline", () => {
|
|
||||||
const cache = createStreamingCache();
|
|
||||||
const text = "Done.\n\nSome **bold** text";
|
|
||||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
|
||||||
expect(pendingHtml).toContain("<strong>bold</strong>");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -69,189 +69,13 @@ const processor = unified()
|
|||||||
.use(rehypeHighlight)
|
.use(rehypeHighlight)
|
||||||
.use(rehypeStringify, { allowDangerousHtml: true });
|
.use(rehypeStringify, { allowDangerousHtml: true });
|
||||||
|
|
||||||
export function splitCompleteBlocks(text: string): { complete: string; pending: string } {
|
|
||||||
if (!text) {
|
|
||||||
return { complete: "", pending: "" };
|
|
||||||
}
|
|
||||||
|
|
||||||
const lines = text.split("\n");
|
|
||||||
let lastCompleteBoundary = -1; // index of last line that ends a complete block
|
|
||||||
let inFence = false;
|
|
||||||
let fenceChar = "";
|
|
||||||
let inMathBlock = false;
|
|
||||||
|
|
||||||
for (let i = 0; i < lines.length; i++) {
|
|
||||||
const trimmed = lines[i].trimEnd();
|
|
||||||
|
|
||||||
if (inFence) {
|
|
||||||
// Check for closing fence: same character, at least 3, no other content
|
|
||||||
if (new RegExp(`^\\s*${fenceChar.replace(/~/g, "\\~")}{3,}\\s*$`).test(trimmed)) {
|
|
||||||
inFence = false;
|
|
||||||
fenceChar = "";
|
|
||||||
lastCompleteBoundary = i;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inMathBlock) {
|
|
||||||
if (trimmed === "$$" || trimmed === "\\]") {
|
|
||||||
inMathBlock = false;
|
|
||||||
lastCompleteBoundary = i;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for opening fence
|
|
||||||
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
|
||||||
if (fenceMatch) {
|
|
||||||
// Check if it's an opening fence (may have language info after)
|
|
||||||
// A line with just ``` or ~~~ could be opening or closing, but since we're not in a fence it's opening
|
|
||||||
fenceChar = fenceMatch[2][0]; // '`' or '~'
|
|
||||||
inFence = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for opening math block
|
|
||||||
if (trimmed === "$$" || trimmed === "\\[") {
|
|
||||||
inMathBlock = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Outside fences/math: blank line marks a complete boundary
|
|
||||||
if (trimmed === "") {
|
|
||||||
lastCompleteBoundary = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lastCompleteBoundary < 0) {
|
|
||||||
return { complete: "", pending: text };
|
|
||||||
}
|
|
||||||
|
|
||||||
const completeLines = lines.slice(0, lastCompleteBoundary + 1);
|
|
||||||
const pendingLines = lines.slice(lastCompleteBoundary + 1);
|
|
||||||
|
|
||||||
return {
|
|
||||||
complete: completeLines.join("\n"),
|
|
||||||
pending: pendingLines.join("\n"),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export function closePendingBlock(pending: string): string {
|
|
||||||
if (!pending) return "";
|
|
||||||
|
|
||||||
const lines = pending.split("\n");
|
|
||||||
let inFence = false;
|
|
||||||
let fenceStr = "";
|
|
||||||
let inMathBlock = false;
|
|
||||||
let mathClose = "";
|
|
||||||
|
|
||||||
for (const line of lines) {
|
|
||||||
const trimmed = line.trimEnd();
|
|
||||||
|
|
||||||
if (inFence) {
|
|
||||||
if (new RegExp(`^\\s*${fenceStr[0] === "~" ? "~~~" : "\\`\\`\\`"}\\s*$`).test(trimmed)) {
|
|
||||||
inFence = false;
|
|
||||||
fenceStr = "";
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inMathBlock) {
|
|
||||||
if (trimmed === "$$" || trimmed === "\\]") {
|
|
||||||
inMathBlock = false;
|
|
||||||
mathClose = "";
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
|
||||||
if (fenceMatch) {
|
|
||||||
fenceStr = fenceMatch[2];
|
|
||||||
inFence = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trimmed === "$$") {
|
|
||||||
inMathBlock = true;
|
|
||||||
mathClose = "$$";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trimmed === "\\[") {
|
|
||||||
inMathBlock = true;
|
|
||||||
mathClose = "\\]";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inFence) return pending + "\n" + fenceStr;
|
|
||||||
if (inMathBlock) return pending + "\n" + mathClose;
|
|
||||||
return pending;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface RenderedBlock {
|
|
||||||
id: number;
|
|
||||||
html: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface StreamingCache {
|
|
||||||
blocks: RenderedBlock[];
|
|
||||||
nextId: number;
|
|
||||||
completeKey: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function createStreamingCache(): StreamingCache {
|
|
||||||
return { blocks: [], nextId: 0, completeKey: "" };
|
|
||||||
}
|
|
||||||
|
|
||||||
export function renderStreamingMarkdown(
|
|
||||||
text: string,
|
|
||||||
cache: StreamingCache,
|
|
||||||
): { blocks: RenderedBlock[]; pendingHtml: string } {
|
|
||||||
const { complete, pending } = splitCompleteBlocks(text);
|
|
||||||
|
|
||||||
if (complete) {
|
|
||||||
if (cache.completeKey !== complete) {
|
|
||||||
if (complete.startsWith(cache.completeKey) && cache.completeKey.length > 0) {
|
|
||||||
// Complete section grew — render only the new part as a new block
|
|
||||||
const newPart = complete.slice(cache.completeKey.length);
|
|
||||||
cache.blocks = [...cache.blocks, { id: cache.nextId++, html: renderMarkdown(newPart) }];
|
|
||||||
} else {
|
|
||||||
// Complete section changed unexpectedly — re-render as single block
|
|
||||||
cache.blocks = [{ id: cache.nextId++, html: renderMarkdown(complete) }];
|
|
||||||
}
|
|
||||||
cache.completeKey = complete;
|
|
||||||
}
|
|
||||||
} else if (cache.blocks.length > 0) {
|
|
||||||
cache.blocks = [];
|
|
||||||
cache.completeKey = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
let pendingHtml = "";
|
|
||||||
if (pending) {
|
|
||||||
const closed = closePendingBlock(pending);
|
|
||||||
pendingHtml = renderMarkdown(closed);
|
|
||||||
}
|
|
||||||
|
|
||||||
return { blocks: cache.blocks, pendingHtml };
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert \[...\] to $$...$$ and \(...\) to $...$
|
|
||||||
export function normalizeLatexDelimiters(text: string): string {
|
|
||||||
// Display math: \[...\] → $$...$$ (may span multiple lines)
|
|
||||||
text = text.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => `$$${inner}$$`);
|
|
||||||
// Inline math: \(...\) → $...$
|
|
||||||
text = text.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => `$${inner}$`);
|
|
||||||
return text;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function renderMarkdown(content: string): string {
|
export function renderMarkdown(content: string): string {
|
||||||
if (!content) {
|
if (!content) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = processor.processSync(normalizeLatexDelimiters(content));
|
const result = processor.processSync(content);
|
||||||
return String(result);
|
return String(result);
|
||||||
} catch {
|
} catch {
|
||||||
// Fallback to escaped plain text if markdown parsing fails
|
// Fallback to escaped plain text if markdown parsing fails
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
export interface RerankResult {
|
|
||||||
index: number;
|
|
||||||
relevance_score: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface RerankResponse {
|
|
||||||
model: string;
|
|
||||||
object: string;
|
|
||||||
usage: { prompt_tokens: number; total_tokens: number };
|
|
||||||
results: RerankResult[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function rerank(
|
|
||||||
model: string,
|
|
||||||
query: string,
|
|
||||||
documents: string[],
|
|
||||||
signal: AbortSignal
|
|
||||||
): Promise<RerankResponse> {
|
|
||||||
const response = await fetch("/v1/rerank", {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json" },
|
|
||||||
body: JSON.stringify({ model, query, documents }),
|
|
||||||
signal,
|
|
||||||
});
|
|
||||||
if (!response.ok) throw new Error(`${response.status} ${response.statusText}`);
|
|
||||||
return response.json();
|
|
||||||
}
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
import type { SdApiTxt2ImgRequest, SdApiResponse, SdApiLora } from "./types";
|
|
||||||
|
|
||||||
export async function generateSdImage(
|
|
||||||
request: SdApiTxt2ImgRequest,
|
|
||||||
signal?: AbortSignal
|
|
||||||
): Promise<SdApiResponse> {
|
|
||||||
const response = await fetch("/sdapi/v1/txt2img", {
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
body: JSON.stringify(request),
|
|
||||||
signal,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
const errorText = await response.text();
|
|
||||||
throw new Error(`SDAPI error: ${response.status} - ${errorText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.json();
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function fetchSdLoras(
|
|
||||||
model: string,
|
|
||||||
signal?: AbortSignal
|
|
||||||
): Promise<SdApiLora[]> {
|
|
||||||
const response = await fetch(
|
|
||||||
`/sdapi/v1/loras?model=${encodeURIComponent(model)}`,
|
|
||||||
{ signal }
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
const errorText = await response.text();
|
|
||||||
throw new Error(`SDAPI loras error: ${response.status} - ${errorText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.json();
|
|
||||||
}
|
|
||||||
@@ -9,7 +9,6 @@ export interface Model {
|
|||||||
description: string;
|
description: string;
|
||||||
unlisted: boolean;
|
unlisted: boolean;
|
||||||
peerID: string;
|
peerID: string;
|
||||||
aliases?: string[];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Metrics {
|
export interface Metrics {
|
||||||
@@ -39,12 +38,8 @@ export interface LogData {
|
|||||||
data: string;
|
data: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface InFlightStats {
|
|
||||||
total: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface APIEventEnvelope {
|
export interface APIEventEnvelope {
|
||||||
type: "modelStatus" | "logData" | "metrics" | "inflight";
|
type: "modelStatus" | "logData" | "metrics";
|
||||||
data: string;
|
data: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,40 +110,6 @@ export interface ImageGenerationResponse {
|
|||||||
}>;
|
}>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SDAPI types (stable-diffusion.cpp)
|
|
||||||
export type ImageApiMode = "openai" | "sdapi";
|
|
||||||
|
|
||||||
export interface SdApiLora {
|
|
||||||
name: string;
|
|
||||||
path: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SdApiLoraRef {
|
|
||||||
path: string;
|
|
||||||
multiplier: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SdApiTxt2ImgRequest {
|
|
||||||
model?: string;
|
|
||||||
prompt: string;
|
|
||||||
negative_prompt?: string;
|
|
||||||
width?: number;
|
|
||||||
height?: number;
|
|
||||||
steps?: number;
|
|
||||||
cfg_scale?: number;
|
|
||||||
seed?: number;
|
|
||||||
batch_size?: number;
|
|
||||||
sampler_name?: string;
|
|
||||||
scheduler?: string;
|
|
||||||
lora?: SdApiLoraRef[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SdApiResponse {
|
|
||||||
images: string[];
|
|
||||||
parameters: Record<string, unknown>;
|
|
||||||
info: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface AudioTranscriptionRequest {
|
export interface AudioTranscriptionRequest {
|
||||||
file: File;
|
file: File;
|
||||||
model: string;
|
model: string;
|
||||||
|
|||||||
@@ -4,9 +4,8 @@
|
|||||||
import ImageInterface from "../components/playground/ImageInterface.svelte";
|
import ImageInterface from "../components/playground/ImageInterface.svelte";
|
||||||
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
||||||
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
||||||
import RerankInterface from "../components/playground/RerankInterface.svelte";
|
|
||||||
|
|
||||||
type Tab = "chat" | "images" | "speech" | "audio" | "rerank";
|
type Tab = "chat" | "images" | "speech" | "audio";
|
||||||
|
|
||||||
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
||||||
let mobileMenuOpen = $state(false);
|
let mobileMenuOpen = $state(false);
|
||||||
@@ -16,7 +15,6 @@
|
|||||||
{ id: "images", label: "Images" },
|
{ id: "images", label: "Images" },
|
||||||
{ id: "speech", label: "Speech" },
|
{ id: "speech", label: "Speech" },
|
||||||
{ id: "audio", label: "Transcription" },
|
{ id: "audio", label: "Transcription" },
|
||||||
{ id: "rerank", label: "Rerank" },
|
|
||||||
];
|
];
|
||||||
|
|
||||||
function selectTab(tab: Tab) {
|
function selectTab(tab: Tab) {
|
||||||
@@ -91,9 +89,6 @@
|
|||||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "audio"}>
|
<div class="h-full" class:tab-hidden={$selectedTabStore !== "audio"}>
|
||||||
<AudioInterface />
|
<AudioInterface />
|
||||||
</div>
|
</div>
|
||||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "rerank"}>
|
|
||||||
<RerankInterface />
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
<!-- empty: real Playground is always mounted in App.svelte -->
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import { writable } from "svelte/store";
|
import { writable } from "svelte/store";
|
||||||
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture, InFlightStats } from "../lib/types";
|
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture } from "../lib/types";
|
||||||
import { connectionState } from "./theme";
|
import { connectionState } from "./theme";
|
||||||
|
|
||||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||||
@@ -9,7 +9,6 @@ export const models = writable<Model[]>([]);
|
|||||||
export const proxyLogs = writable<string>("");
|
export const proxyLogs = writable<string>("");
|
||||||
export const upstreamLogs = writable<string>("");
|
export const upstreamLogs = writable<string>("");
|
||||||
export const metrics = writable<Metrics[]>([]);
|
export const metrics = writable<Metrics[]>([]);
|
||||||
export const inFlightRequests = writable<number>(0);
|
|
||||||
export const versionInfo = writable<VersionInfo>({
|
export const versionInfo = writable<VersionInfo>({
|
||||||
build_date: "unknown",
|
build_date: "unknown",
|
||||||
commit: "unknown",
|
commit: "unknown",
|
||||||
@@ -30,7 +29,6 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
apiEventSource?.close();
|
apiEventSource?.close();
|
||||||
apiEventSource = null;
|
apiEventSource = null;
|
||||||
metrics.set([]);
|
metrics.set([]);
|
||||||
inFlightRequests.set(0);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,7 +46,6 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
proxyLogs.set("");
|
proxyLogs.set("");
|
||||||
upstreamLogs.set("");
|
upstreamLogs.set("");
|
||||||
metrics.set([]);
|
metrics.set([]);
|
||||||
inFlightRequests.set(0);
|
|
||||||
models.set([]);
|
models.set([]);
|
||||||
retryCount = 0;
|
retryCount = 0;
|
||||||
connectionState.set("connected");
|
connectionState.set("connected");
|
||||||
@@ -62,7 +59,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
const newModels = JSON.parse(message.data) as Model[];
|
const newModels = JSON.parse(message.data) as Model[];
|
||||||
// Sort models by name and id
|
// Sort models by name and id
|
||||||
newModels.sort((a, b) => {
|
newModels.sort((a, b) => {
|
||||||
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
return (a.name + a.id).localeCompare(b.name + b.id);
|
||||||
});
|
});
|
||||||
models.set(newModels);
|
models.set(newModels);
|
||||||
break;
|
break;
|
||||||
@@ -86,11 +83,6 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case "inflight": {
|
|
||||||
const stats = JSON.parse(message.data) as InFlightStats;
|
|
||||||
inFlightRequests.set(stats.total ?? 0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(e.data, err);
|
console.error(e.data, err);
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
import { writable, derived } from "svelte/store";
|
|
||||||
|
|
||||||
const chatStreaming = writable(false);
|
|
||||||
const imageGenerating = writable(false);
|
|
||||||
const speechGenerating = writable(false);
|
|
||||||
const audioTranscribing = writable(false);
|
|
||||||
const rerankLoading = writable(false);
|
|
||||||
|
|
||||||
export const playgroundActivity = derived(
|
|
||||||
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing, rerankLoading],
|
|
||||||
([$chat, $image, $speech, $audio, $rerank]) => $chat || $image || $speech || $audio || $rerank
|
|
||||||
);
|
|
||||||
|
|
||||||
export const playgroundStores = {
|
|
||||||
chatStreaming,
|
|
||||||
imageGenerating,
|
|
||||||
speechGenerating,
|
|
||||||
audioTranscribing,
|
|
||||||
rerankLoading,
|
|
||||||
};
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
import { writable } from "svelte/store";
|
|
||||||
|
|
||||||
export const currentRoute = writable("/");
|
|
||||||
@@ -32,7 +32,6 @@ export default defineConfig({
|
|||||||
"/upstream": "http://localhost:8080",
|
"/upstream": "http://localhost:8080",
|
||||||
"/unload": "http://localhost:8080",
|
"/unload": "http://localhost:8080",
|
||||||
"/v1": "http://localhost:8080",
|
"/v1": "http://localhost:8080",
|
||||||
"/sdapi": "http://localhost:8080",
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user