Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 02aee4e86d | |||
| f45896d395 | |||
| f7e46a359f | |||
| c260907415 | |||
| b83a5fa291 | |||
| 6e2ff28d59 | |||
| a8b81f2799 | |||
| f9ee7156dc | |||
| 2d00120781 | |||
| afc9aef058 | |||
| d7b390df74 | |||
| 5025c2f1f3 | |||
| e3a0b013c1 | |||
| f5763a94a0 | |||
| 8ada72eb57 |
@@ -0,0 +1,37 @@
|
|||||||
|
---
|
||||||
|
name: Bug Report
|
||||||
|
about: Something is not working as expected...
|
||||||
|
title: ''
|
||||||
|
labels: bug
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Describe the bug**
|
||||||
|
A clear and concise description of what the bug is.
|
||||||
|
|
||||||
|
**Expected behaviour**
|
||||||
|
A clear and concise description of what you expected to happen.
|
||||||
|
|
||||||
|
**Operating system and version**
|
||||||
|
|
||||||
|
- OS: (linux, osx, windows, freebsd, etc)
|
||||||
|
- GPUs: (list architecture)
|
||||||
|
|
||||||
|
**My Configuration**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# copy / paste your configuration here
|
||||||
|
```
|
||||||
|
|
||||||
|
**Proxy Logs**
|
||||||
|
|
||||||
|
```
|
||||||
|
# copy / paste from /logs
|
||||||
|
```
|
||||||
|
|
||||||
|
**Upstream Logs**
|
||||||
|
|
||||||
|
```
|
||||||
|
# copy/paste from /logs
|
||||||
|
```
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
name: Windows CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ "main" ]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches: [ "main" ]
|
||||||
|
|
||||||
|
# Allows manual triggering of the workflow
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
run-tests:
|
||||||
|
runs-on: windows-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '1.23'
|
||||||
|
|
||||||
|
# cache simple-responder to save the build time
|
||||||
|
- name: Restore Simple Responder
|
||||||
|
id: restore-simple-responder
|
||||||
|
uses: actions/cache/restore@v4
|
||||||
|
with:
|
||||||
|
path: ./build
|
||||||
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
|
# necessary for testing proxy/Process swapping
|
||||||
|
- name: Create simple-responder
|
||||||
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: make simple-responder-windows
|
||||||
|
|
||||||
|
- name: Save Simple Responder
|
||||||
|
# nothing new to save ... skip this step
|
||||||
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
|
id: save-simple-responder
|
||||||
|
uses: actions/cache/save@v4
|
||||||
|
with:
|
||||||
|
path: ./build
|
||||||
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
|
- name: Test all
|
||||||
|
shell: bash
|
||||||
|
run: make test-all
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
# This workflow will build a golang project
|
name: Linux CI
|
||||||
|
|
||||||
name: CI
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
@@ -24,9 +22,26 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version: '1.23'
|
go-version: '1.23'
|
||||||
|
|
||||||
|
# cache simple-responder to save the build time
|
||||||
|
- name: Restore Simple Responder
|
||||||
|
id: restore-simple-responder
|
||||||
|
uses: actions/cache/restore@v4
|
||||||
|
with:
|
||||||
|
path: ./build
|
||||||
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
# necessary for testing proxy/Process swapping
|
# necessary for testing proxy/Process swapping
|
||||||
- name: Create simple-responder
|
- name: Create simple-responder
|
||||||
run: make simple-responder
|
run: make simple-responder
|
||||||
|
|
||||||
|
- name: Save Simple Responder
|
||||||
|
# nothing new to save ... skip this step
|
||||||
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
|
id: save-simple-responder
|
||||||
|
uses: actions/cache/save@v4
|
||||||
|
with:
|
||||||
|
path: ./build
|
||||||
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
- name: Test all
|
- name: Test all
|
||||||
run: make test-all
|
run: make test-all
|
||||||
@@ -46,6 +46,10 @@ simple-responder:
|
|||||||
GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/simple-responder_darwin_arm64 misc/simple-responder/simple-responder.go
|
GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/simple-responder_darwin_arm64 misc/simple-responder/simple-responder.go
|
||||||
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/simple-responder_linux_amd64 misc/simple-responder/simple-responder.go
|
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/simple-responder_linux_amd64 misc/simple-responder/simple-responder.go
|
||||||
|
|
||||||
|
simple-responder-windows:
|
||||||
|
@echo "Building simple responder for windows"
|
||||||
|
GOOS=windows GOARCH=amd64 go build -o $(BUILD_DIR)/simple-responder.exe misc/simple-responder/simple-responder.go
|
||||||
|
|
||||||
# Ensure build directory exists
|
# Ensure build directory exists
|
||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
mkdir -p $(BUILD_DIR)
|
mkdir -p $(BUILD_DIR)
|
||||||
|
|||||||
@@ -46,14 +46,14 @@ llama-swap's configuration is purposefully simple.
|
|||||||
models:
|
models:
|
||||||
"qwen2.5":
|
"qwen2.5":
|
||||||
proxy: "http://127.0.0.1:9999"
|
proxy: "http://127.0.0.1:9999"
|
||||||
cmd: >
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
||||||
--port 9999
|
--port 9999
|
||||||
|
|
||||||
"smollm2":
|
"smollm2":
|
||||||
proxy: "http://127.0.0.1:9999"
|
proxy: "http://127.0.0.1:9999"
|
||||||
cmd: >
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
||||||
--port 9999
|
--port 9999
|
||||||
@@ -82,7 +82,7 @@ startPort: 10001
|
|||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
# multiline for readability
|
# multiline for readability
|
||||||
cmd: >
|
cmd: |
|
||||||
llama-server --port 8999
|
llama-server --port 8999
|
||||||
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||||
|
|
||||||
@@ -123,12 +123,18 @@ models:
|
|||||||
# Docker Support (v26.1.4+ required!)
|
# Docker Support (v26.1.4+ required!)
|
||||||
"docker-llama":
|
"docker-llama":
|
||||||
proxy: "http://127.0.0.1:${PORT}"
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
cmd: >
|
cmd: |
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggerganov/llama.cpp:server
|
ghcr.io/ggerganov/llama.cpp:server
|
||||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||||
|
|
||||||
|
# use a custom command to stop the model when swapping. By default
|
||||||
|
# this is SIGTERM on POSIX systems, and taskkill on Windows systems
|
||||||
|
# the ${PID} variable can be used in cmdStop, it will be automatically replaced
|
||||||
|
# with the PID of the running model
|
||||||
|
cmdStop: docker stop dockertest
|
||||||
|
|
||||||
# Groups provide advanced controls over model swapping behaviour. Using groups
|
# Groups provide advanced controls over model swapping behaviour. Using groups
|
||||||
# some models can be kept loaded indefinitely, while others are swapped out.
|
# some models can be kept loaded indefinitely, while others are swapped out.
|
||||||
#
|
#
|
||||||
@@ -247,11 +253,11 @@ Pre-built binaries are available for Linux, FreeBSD and Darwin (OSX). These are
|
|||||||
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
|
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
|
||||||
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
|
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
|
||||||
1. Run the binary with `llama-swap --config path/to/config.yaml`.
|
1. Run the binary with `llama-swap --config path/to/config.yaml`.
|
||||||
Available flags:
|
Available flags:
|
||||||
- `--config`: Path to the configuration file (default: `config.yaml`).
|
- `--config`: Path to the configuration file (default: `config.yaml`).
|
||||||
- `--listen`: Address and port to listen on (default: `:8080`).
|
- `--listen`: Address and port to listen on (default: `:8080`).
|
||||||
- `--version`: Show version information and exit.
|
- `--version`: Show version information and exit.
|
||||||
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
|
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
|
||||||
|
|
||||||
### Building from source
|
### Building from source
|
||||||
|
|
||||||
|
|||||||
+4
-4
@@ -15,7 +15,7 @@ groups:
|
|||||||
|
|
||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx
|
models/llama-server-osx
|
||||||
--port ${PORT}
|
--port ${PORT}
|
||||||
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
||||||
@@ -38,7 +38,7 @@ models:
|
|||||||
# Embedding example with Nomic
|
# Embedding example with Nomic
|
||||||
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
||||||
"nomic":
|
"nomic":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx --port ${PORT}
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
@@ -51,7 +51,7 @@ models:
|
|||||||
# Reranking example with bge-reranker
|
# Reranking example with bge-reranker
|
||||||
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
||||||
"bge-reranker":
|
"bge-reranker":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx --port ${PORT}
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
@@ -59,7 +59,7 @@ models:
|
|||||||
|
|
||||||
# Docker Support (v26.1.4+ required!)
|
# Docker Support (v26.1.4+ required!)
|
||||||
"dockertest":
|
"dockertest":
|
||||||
cmd: >
|
cmd: |
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggerganov/llama.cpp:server
|
ghcr.io/ggerganov/llama.cpp:server
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ go 1.23.0
|
|||||||
require (
|
require (
|
||||||
github.com/fsnotify/fsnotify v1.9.0
|
github.com/fsnotify/fsnotify v1.9.0
|
||||||
github.com/gin-gonic/gin v1.10.0
|
github.com/gin-gonic/gin v1.10.0
|
||||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
|
|
||||||
github.com/stretchr/testify v1.9.0
|
github.com/stretchr/testify v1.9.0
|
||||||
github.com/tidwall/gjson v1.18.0
|
github.com/tidwall/gjson v1.18.0
|
||||||
github.com/tidwall/sjson v1.2.5
|
github.com/tidwall/sjson v1.2.5
|
||||||
@@ -13,6 +12,7 @@ require (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/billziss-gh/golib v0.2.0 // indirect
|
||||||
github.com/bytedance/sonic v1.11.6 // indirect
|
github.com/bytedance/sonic v1.11.6 // indirect
|
||||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
github.com/billziss-gh/golib v0.2.0 h1:NyvcAQdfvM8xokKkKotiligKjKXzuQD4PPykg1nKc/8=
|
||||||
|
github.com/billziss-gh/golib v0.2.0/go.mod h1:mZpUYANXZkDKSnyYbX9gfnyxwe0ddRhUtfXcsD5r8dw=
|
||||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||||
|
|||||||
+27
-9
@@ -4,11 +4,12 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/google/shlex"
|
"github.com/billziss-gh/golib/shlex"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,6 +17,7 @@ const DEFAULT_GROUP_ID = "(default)"
|
|||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
|
CmdStop string `yaml:"cmdStop"`
|
||||||
Proxy string `yaml:"proxy"`
|
Proxy string `yaml:"proxy"`
|
||||||
Aliases []string `yaml:"aliases"`
|
Aliases []string `yaml:"aliases"`
|
||||||
Env []string `yaml:"env"`
|
Env []string `yaml:"env"`
|
||||||
@@ -134,7 +136,6 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over the models and replace any ${PORT} with the next available port
|
|
||||||
// Get and sort all model IDs first, makes testing more consistent
|
// Get and sort all model IDs first, makes testing more consistent
|
||||||
modelIds := make([]string, 0, len(config.Models))
|
modelIds := make([]string, 0, len(config.Models))
|
||||||
for modelId := range config.Models {
|
for modelId := range config.Models {
|
||||||
@@ -142,10 +143,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
sort.Strings(modelIds) // This guarantees stable iteration order
|
sort.Strings(modelIds) // This guarantees stable iteration order
|
||||||
|
|
||||||
// iterate over the sorted models
|
|
||||||
nextPort := config.StartPort
|
nextPort := config.StartPort
|
||||||
for _, modelId := range modelIds {
|
for _, modelId := range modelIds {
|
||||||
modelConfig := config.Models[modelId]
|
modelConfig := config.Models[modelId]
|
||||||
|
// iterate over the models and replace any ${PORT} with the next available port
|
||||||
if strings.Contains(modelConfig.Cmd, "${PORT}") {
|
if strings.Contains(modelConfig.Cmd, "${PORT}") {
|
||||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort))
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort))
|
||||||
if modelConfig.Proxy == "" {
|
if modelConfig.Proxy == "" {
|
||||||
@@ -159,6 +160,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId)
|
return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
config = AddDefaultGroupToConfig(config)
|
config = AddDefaultGroupToConfig(config)
|
||||||
// check that members are all unique in the groups
|
// check that members are all unique in the groups
|
||||||
memberUsage := make(map[string]string) // maps member to group it appears in
|
memberUsage := make(map[string]string) // maps member to group it appears in
|
||||||
@@ -228,14 +230,30 @@ func AddDefaultGroupToConfig(config Config) Config {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func SanitizeCommand(cmdStr string) ([]string, error) {
|
func SanitizeCommand(cmdStr string) ([]string, error) {
|
||||||
// Remove trailing backslashes
|
var cleanedLines []string
|
||||||
cmdStr = strings.ReplaceAll(cmdStr, "\\ \n", " ")
|
for _, line := range strings.Split(cmdStr, "\n") {
|
||||||
cmdStr = strings.ReplaceAll(cmdStr, "\\\n", " ")
|
trimmed := strings.TrimSpace(line)
|
||||||
|
// Skip comment lines
|
||||||
|
if strings.HasPrefix(trimmed, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Handle trailing backslashes by replacing with space
|
||||||
|
if strings.HasSuffix(trimmed, "\\") {
|
||||||
|
cleanedLines = append(cleanedLines, strings.TrimSuffix(trimmed, "\\")+" ")
|
||||||
|
} else {
|
||||||
|
cleanedLines = append(cleanedLines, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// put it back together
|
||||||
|
cmdStr = strings.Join(cleanedLines, "\n")
|
||||||
|
|
||||||
// Split the command into arguments
|
// Split the command into arguments
|
||||||
args, err := shlex.Split(cmdStr)
|
var args []string
|
||||||
if err != nil {
|
if runtime.GOOS == "windows" {
|
||||||
return nil, err
|
args = shlex.Windows.Split(cmdStr)
|
||||||
|
} else {
|
||||||
|
args = shlex.Posix.Split(cmdStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure the command is not empty
|
// Ensure the command is not empty
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
//go:build !windows
|
||||||
|
|
||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConfig_SanitizeCommand(t *testing.T) {
|
||||||
|
// Test a command with spaces and newlines
|
||||||
|
args, err := SanitizeCommand(`python model1.py \
|
||||||
|
-a "double quotes" \
|
||||||
|
--arg2 'single quotes'
|
||||||
|
-s
|
||||||
|
# comment 1
|
||||||
|
--arg3 123 \
|
||||||
|
|
||||||
|
# comment 2
|
||||||
|
--arg4 '"string in string"'
|
||||||
|
|
||||||
|
|
||||||
|
# this will get stripped out as well as the white space above
|
||||||
|
-c "'single quoted'"
|
||||||
|
`)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{
|
||||||
|
"python", "model1.py",
|
||||||
|
"-a", "double quotes",
|
||||||
|
"--arg2", "single quotes",
|
||||||
|
"-s",
|
||||||
|
"--arg3", "123",
|
||||||
|
"--arg4", `"string in string"`,
|
||||||
|
"-c", `'single quoted'`,
|
||||||
|
}, args)
|
||||||
|
|
||||||
|
// Test an empty command
|
||||||
|
args, err = SanitizeCommand("")
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Nil(t, args)
|
||||||
|
}
|
||||||
@@ -258,34 +258,6 @@ func TestConfig_FindConfig(t *testing.T) {
|
|||||||
assert.Equal(t, ModelConfig{}, modelConfig)
|
assert.Equal(t, ModelConfig{}, modelConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConfig_SanitizeCommand(t *testing.T) {
|
|
||||||
|
|
||||||
// Test a command with spaces and newlines
|
|
||||||
args, err := SanitizeCommand(`python model1.py \
|
|
||||||
-a "double quotes" \
|
|
||||||
--arg2 'single quotes'
|
|
||||||
-s
|
|
||||||
--arg3 123 \
|
|
||||||
--arg4 '"string in string"'
|
|
||||||
-c "'single quoted'"
|
|
||||||
`)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, []string{
|
|
||||||
"python", "model1.py",
|
|
||||||
"-a", "double quotes",
|
|
||||||
"--arg2", "single quotes",
|
|
||||||
"-s",
|
|
||||||
"--arg3", "123",
|
|
||||||
"--arg4", `"string in string"`,
|
|
||||||
"-c", `'single quoted'`,
|
|
||||||
}, args)
|
|
||||||
|
|
||||||
// Test an empty command
|
|
||||||
args, err = SanitizeCommand("")
|
|
||||||
assert.Error(t, err)
|
|
||||||
assert.Nil(t, args)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_AutomaticPortAssignments(t *testing.T) {
|
func TestConfig_AutomaticPortAssignments(t *testing.T) {
|
||||||
|
|
||||||
t.Run("Default Port Ranges", func(t *testing.T) {
|
t.Run("Default Port Ranges", func(t *testing.T) {
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
//go:build windows
|
||||||
|
|
||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConfig_SanitizeCommand(t *testing.T) {
|
||||||
|
// does not support single quoted strings like in config_posix_test.go
|
||||||
|
args, err := SanitizeCommand(`python model1.py \
|
||||||
|
|
||||||
|
-a "double quotes" \
|
||||||
|
-s
|
||||||
|
--arg3 123 \
|
||||||
|
|
||||||
|
# comment 2
|
||||||
|
--arg4 '"string in string"'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# this will get stripped out as well as the white space above
|
||||||
|
-c "'single quoted'"
|
||||||
|
`)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{
|
||||||
|
"python", "model1.py",
|
||||||
|
"-a", "double quotes",
|
||||||
|
"-s",
|
||||||
|
"--arg3", "123",
|
||||||
|
"--arg4", "'string in string'", // this is a little weird but the lexer says so...?
|
||||||
|
"-c", `'single quoted'`,
|
||||||
|
}, args)
|
||||||
|
|
||||||
|
// Test an empty command
|
||||||
|
args, err = SanitizeCommand("")
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Nil(t, args)
|
||||||
|
}
|
||||||
@@ -45,7 +45,12 @@ func TestMain(m *testing.M) {
|
|||||||
func getSimpleResponderPath() string {
|
func getSimpleResponderPath() string {
|
||||||
goos := runtime.GOOS
|
goos := runtime.GOOS
|
||||||
goarch := runtime.GOARCH
|
goarch := runtime.GOARCH
|
||||||
return filepath.Join("..", "build", fmt.Sprintf("simple-responder_%s_%s", goos, goarch))
|
|
||||||
|
if goos == "windows" {
|
||||||
|
return filepath.Join("..", "build", "simple-responder.exe")
|
||||||
|
} else {
|
||||||
|
return filepath.Join("..", "build", fmt.Sprintf("simple-responder_%s_%s", goos, goarch))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTestPort() int {
|
func getTestPort() int {
|
||||||
|
|||||||
+55
-10
@@ -8,6 +8,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -80,9 +81,8 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
|
|||||||
concurrentLimit := 10
|
concurrentLimit := 10
|
||||||
if config.ConcurrencyLimit > 0 {
|
if config.ConcurrencyLimit > 0 {
|
||||||
concurrentLimit = config.ConcurrencyLimit
|
concurrentLimit = config.ConcurrencyLimit
|
||||||
} else {
|
|
||||||
proxyLogger.Debugf("Concurrency limit for model %s not set, defaulting to 10", ID)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Process{
|
return &Process{
|
||||||
ID: ID,
|
ID: ID,
|
||||||
config: config,
|
config: config,
|
||||||
@@ -148,7 +148,9 @@ func isValidTransition(from, to ProcessState) bool {
|
|||||||
return to == StateStopping
|
return to == StateStopping
|
||||||
case StateStopping:
|
case StateStopping:
|
||||||
return to == StateStopped || to == StateShutdown
|
return to == StateStopped || to == StateShutdown
|
||||||
case StateFailed, StateShutdown:
|
case StateFailed:
|
||||||
|
return to == StateStopping
|
||||||
|
case StateShutdown:
|
||||||
return false // No transitions allowed from these states
|
return false // No transitions allowed from these states
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -358,12 +360,19 @@ func (p *Process) StopImmediately() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
p.proxyLogger.Debugf("<%s> Stopping process", p.ID)
|
p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState())
|
||||||
|
currentState := p.CurrentState()
|
||||||
|
|
||||||
// calling Stop() when state is invalid is a no-op
|
if currentState == StateFailed {
|
||||||
if curState, err := p.swapState(StateReady, StateStopping); err != nil {
|
if curState, err := p.swapState(StateFailed, StateStopping); err != nil {
|
||||||
p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState)
|
p.proxyLogger.Infof("<%s> Stop() Failed -> StateStopping err: %v, current state: %v", p.ID, err, curState)
|
||||||
return
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if curState, err := p.swapState(StateReady, StateStopping); err != nil {
|
||||||
|
p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// stop the process with a graceful exit timeout
|
// stop the process with a graceful exit timeout
|
||||||
@@ -379,8 +388,14 @@ func (p *Process) StopImmediately() {
|
|||||||
// is in the state of starting, it will cancel it and shut it down. Once a process is in
|
// is in the state of starting, it will cancel it and shut it down. Once a process is in
|
||||||
// the StateShutdown state, it can not be started again.
|
// the StateShutdown state, it can not be started again.
|
||||||
func (p *Process) Shutdown() {
|
func (p *Process) Shutdown() {
|
||||||
|
if !isValidTransition(p.CurrentState(), StateStopping) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
p.shutdownCancel()
|
p.shutdownCancel()
|
||||||
p.stopCommand(p.gracefulStopTimeout)
|
p.stopCommand(p.gracefulStopTimeout)
|
||||||
|
|
||||||
|
// just force it to this state since there is no recovery from shutdown
|
||||||
p.state = StateShutdown
|
p.state = StateShutdown
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -400,8 +415,38 @@ func (p *Process) stopCommand(sigtermTTL time.Duration) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.terminateProcess(); err != nil {
|
// if err := p.terminateProcess(); err != nil {
|
||||||
p.proxyLogger.Debugf("<%s> Process already terminated: %v (normal during shutdown)", p.ID, err)
|
// p.proxyLogger.Debugf("<%s> Process already terminated: %v (normal during shutdown)", p.ID, err)
|
||||||
|
// }
|
||||||
|
// the default cmdStop to taskkill /f /t /pid ${PID}
|
||||||
|
if runtime.GOOS == "windows" && strings.TrimSpace(p.config.CmdStop) == "" {
|
||||||
|
p.config.CmdStop = "taskkill /f /t /pid ${PID}"
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.config.CmdStop != "" {
|
||||||
|
// replace ${PID} with the pid of the process
|
||||||
|
stopArgs, err := SanitizeCommand(strings.ReplaceAll(p.config.CmdStop, "${PID}", fmt.Sprintf("%d", p.cmd.Process.Pid)))
|
||||||
|
if err != nil {
|
||||||
|
p.proxyLogger.Errorf("<%s> Failed to sanitize stop command: %v", p.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
p.proxyLogger.Debugf("<%s> Executing stop command: %s", p.ID, strings.Join(stopArgs, " "))
|
||||||
|
|
||||||
|
stopCmd := exec.Command(stopArgs[0], stopArgs[1:]...)
|
||||||
|
stopCmd.Stdout = p.processLogger
|
||||||
|
stopCmd.Stderr = p.processLogger
|
||||||
|
stopCmd.Env = p.config.Env
|
||||||
|
|
||||||
|
if err := stopCmd.Run(); err != nil {
|
||||||
|
p.proxyLogger.Errorf("<%s> Failed to exec stop command: %v", p.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := p.cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
||||||
|
p.proxyLogger.Errorf("<%s> Failed to send SIGTERM to process: %v", p.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
//go:build !windows
|
|
||||||
|
|
||||||
package proxy
|
|
||||||
|
|
||||||
import "syscall"
|
|
||||||
|
|
||||||
func (p *Process) terminateProcess() error {
|
|
||||||
return p.cmd.Process.Signal(syscall.SIGTERM)
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
//go:build windows
|
|
||||||
|
|
||||||
package proxy
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os/exec"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (p *Process) terminateProcess() error {
|
|
||||||
pid := fmt.Sprintf("%d", p.cmd.Process.Pid)
|
|
||||||
cmd := exec.Command("taskkill", "/f", "/t", "/pid", pid)
|
|
||||||
return cmd.Run()
|
|
||||||
}
|
|
||||||
+26
-1
@@ -5,6 +5,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -432,7 +433,12 @@ func TestProcess_ForceStopWithKill(t *testing.T) {
|
|||||||
|
|
||||||
// unexpected EOF because the kill happened, the "1" is sent before the kill
|
// unexpected EOF because the kill happened, the "1" is sent before the kill
|
||||||
// then the unexpected EOF is sent after the kill
|
// then the unexpected EOF is sent after the kill
|
||||||
assert.Equal(t, "1unexpected EOF\n", w.Body.String())
|
if runtime.GOOS == "windows" {
|
||||||
|
assert.Contains(t, w.Body.String(), "wsarecv: An existing connection was forcibly closed by the remote host")
|
||||||
|
} else {
|
||||||
|
assert.Contains(t, w.Body.String(), "unexpected EOF")
|
||||||
|
}
|
||||||
|
|
||||||
close(waitChan)
|
close(waitChan)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -443,3 +449,22 @@ func TestProcess_ForceStopWithKill(t *testing.T) {
|
|||||||
// the request should have been interrupted by SIGKILL
|
// the request should have been interrupted by SIGKILL
|
||||||
<-waitChan
|
<-waitChan
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProcess_StopCmd(t *testing.T) {
|
||||||
|
config := getTestSimpleResponderConfig("test_stop_cmd")
|
||||||
|
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
config.CmdStop = "taskkill /f /t /pid ${PID}"
|
||||||
|
} else {
|
||||||
|
config.CmdStop = "kill -TERM ${PID}"
|
||||||
|
}
|
||||||
|
|
||||||
|
process := NewProcess("testStopCmd", 2, config, debugLogger, debugLogger)
|
||||||
|
defer process.Stop()
|
||||||
|
|
||||||
|
err := process.start()
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, process.CurrentState(), StateReady)
|
||||||
|
process.StopImmediately()
|
||||||
|
assert.Equal(t, process.CurrentState(), StateStopped)
|
||||||
|
}
|
||||||
|
|||||||
+30
-3
@@ -319,7 +319,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
|
func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
|
||||||
var html strings.Builder
|
var html strings.Builder
|
||||||
|
|
||||||
html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><ul>")
|
html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><a href=\"/unload\">Unload all models</a><ul>")
|
||||||
|
|
||||||
// Extract keys and sort them
|
// Extract keys and sort them
|
||||||
var modelIDs []string
|
var modelIDs []string
|
||||||
@@ -334,7 +334,33 @@ func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
|
|||||||
|
|
||||||
// Iterate over sorted keys
|
// Iterate over sorted keys
|
||||||
for _, modelID := range modelIDs {
|
for _, modelID := range modelIDs {
|
||||||
html.WriteString(fmt.Sprintf("<li><a href=\"/upstream/%s\">%s</a></li>", modelID, modelID))
|
// Get process state
|
||||||
|
processGroup := pm.findGroupByModelName(modelID)
|
||||||
|
var state string
|
||||||
|
if processGroup != nil {
|
||||||
|
process := processGroup.processes[modelID]
|
||||||
|
if process != nil {
|
||||||
|
var stateStr string
|
||||||
|
switch process.CurrentState() {
|
||||||
|
case StateReady:
|
||||||
|
stateStr = "Ready"
|
||||||
|
case StateStarting:
|
||||||
|
stateStr = "Starting"
|
||||||
|
case StateStopping:
|
||||||
|
stateStr = "Stopping"
|
||||||
|
case StateFailed:
|
||||||
|
stateStr = "Failed"
|
||||||
|
case StateShutdown:
|
||||||
|
stateStr = "Shutdown"
|
||||||
|
case StateStopped:
|
||||||
|
stateStr = "Stopped"
|
||||||
|
default:
|
||||||
|
stateStr = "Unknown"
|
||||||
|
}
|
||||||
|
state = stateStr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
html.WriteString(fmt.Sprintf("<li><a href=\"/upstream/%s\">%s</a> - %s</li>", modelID, modelID, state))
|
||||||
}
|
}
|
||||||
html.WriteString("</ul></body></html>")
|
html.WriteString("</ul></body></html>")
|
||||||
c.Header("Content-Type", "text/html")
|
c.Header("Content-Type", "text/html")
|
||||||
@@ -374,7 +400,8 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
|
|||||||
|
|
||||||
// dechunk it as we already have all the body bytes see issue #11
|
// dechunk it as we already have all the body bytes see issue #11
|
||||||
c.Request.Header.Del("transfer-encoding")
|
c.Request.Header.Del("transfer-encoding")
|
||||||
c.Request.Header.Add("content-length", strconv.Itoa(len(bodyBytes)))
|
c.Request.Header.Set("content-length", strconv.Itoa(len(bodyBytes)))
|
||||||
|
c.Request.ContentLength = int64(len(bodyBytes))
|
||||||
|
|
||||||
if err := processGroup.ProxyRequest(realModelName, c.Writer, c.Request); err != nil {
|
if err := processGroup.ProxyRequest(realModelName, c.Writer, c.Request); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
|
func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
|
||||||
@@ -448,7 +449,6 @@ func TestProxyManager_AudioTranscriptionHandler(t *testing.T) {
|
|||||||
// Test useModelName in configuration sends overrides what is sent to upstream
|
// Test useModelName in configuration sends overrides what is sent to upstream
|
||||||
func TestProxyManager_UseModelName(t *testing.T) {
|
func TestProxyManager_UseModelName(t *testing.T) {
|
||||||
upstreamModelName := "upstreamModel"
|
upstreamModelName := "upstreamModel"
|
||||||
|
|
||||||
modelConfig := getTestSimpleResponderConfig(upstreamModelName)
|
modelConfig := getTestSimpleResponderConfig(upstreamModelName)
|
||||||
modelConfig.UseModelName = upstreamModelName
|
modelConfig.UseModelName = upstreamModelName
|
||||||
|
|
||||||
@@ -473,6 +473,12 @@ func TestProxyManager_UseModelName(t *testing.T) {
|
|||||||
proxy.ServeHTTP(w, req)
|
proxy.ServeHTTP(w, req)
|
||||||
assert.Equal(t, http.StatusOK, w.Code)
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
assert.Contains(t, w.Body.String(), upstreamModelName)
|
assert.Contains(t, w.Body.String(), upstreamModelName)
|
||||||
|
|
||||||
|
// make sure the content length was set correctly
|
||||||
|
// simple-responder will return the content length it got in the response
|
||||||
|
body := w.Body.Bytes()
|
||||||
|
contentLength := int(gjson.GetBytes(body, "h_content_length").Int())
|
||||||
|
assert.Equal(t, len(fmt.Sprintf(`{"model":"%s"}`, upstreamModelName)), contentLength)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("useModelName over rides requested model: /v1/audio/transcriptions", func(t *testing.T) {
|
t.Run("useModelName over rides requested model: /v1/audio/transcriptions", func(t *testing.T) {
|
||||||
|
|||||||
@@ -0,0 +1,189 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# This script installs llama-swap on Linux.
|
||||||
|
# It detects the current operating system architecture and installs the appropriate version of llama-swap.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
|
||||||
|
plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"
|
||||||
|
|
||||||
|
status() { echo ">>> $*" >&2; }
|
||||||
|
error() { echo "${red}ERROR:${plain} $*"; exit 1; }
|
||||||
|
warning() { echo "${red}WARNING:${plain} $*"; }
|
||||||
|
|
||||||
|
available() { command -v $1 >/dev/null; }
|
||||||
|
require() {
|
||||||
|
local MISSING=''
|
||||||
|
for TOOL in $*; do
|
||||||
|
if ! available $TOOL; then
|
||||||
|
MISSING="$MISSING $TOOL"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo $MISSING
|
||||||
|
}
|
||||||
|
|
||||||
|
SUDO=
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
if ! available sudo; then
|
||||||
|
error "This script requires superuser permissions. Please re-run as root."
|
||||||
|
fi
|
||||||
|
|
||||||
|
SUDO="sudo"
|
||||||
|
fi
|
||||||
|
|
||||||
|
NEEDS=$(require curl tee jq tar)
|
||||||
|
if [ -n "$NEEDS" ]; then
|
||||||
|
status "ERROR: The following tools are required but missing:"
|
||||||
|
for NEED in $NEEDS; do
|
||||||
|
echo " - $NEED"
|
||||||
|
done
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
|
||||||
|
|
||||||
|
ARCH=$(uname -m)
|
||||||
|
case "$ARCH" in
|
||||||
|
x86_64) ARCH="amd64" ;;
|
||||||
|
aarch64|arm64) ARCH="arm64" ;;
|
||||||
|
*) error "Unsupported architecture: $ARCH" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
IS_WSL2=false
|
||||||
|
|
||||||
|
KERN=$(uname -r)
|
||||||
|
case "$KERN" in
|
||||||
|
*icrosoft*WSL2 | *icrosoft*wsl2) IS_WSL2=true;;
|
||||||
|
*icrosoft) error "Microsoft WSL1 is not currently supported. Please use WSL2 with 'wsl --set-version <distro> 2'" ;;
|
||||||
|
*) ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
download_binary() {
|
||||||
|
ASSET_NAME="linux_$ARCH"
|
||||||
|
|
||||||
|
# Fetch the latest release info and extract the matching asset URL
|
||||||
|
DL_URL=$(curl -s "https://api.github.com/repos/mostlygeek/llama-swap/releases/latest" | \
|
||||||
|
jq -r --arg name "$ASSET_NAME" \
|
||||||
|
'.assets[] | select(.name | contains($name)) | .browser_download_url')
|
||||||
|
|
||||||
|
# Check if a URL was successfully extracted
|
||||||
|
if [ -z "$DL_URL" ]; then
|
||||||
|
error "No matching asset found with name containing '$ASSET_NAME'."
|
||||||
|
fi
|
||||||
|
|
||||||
|
status "Downloading Linux $ARCH binary"
|
||||||
|
curl -s -L "$DL_URL" | $SUDO tar -xzf - -C /usr/local/bin llama-swap
|
||||||
|
}
|
||||||
|
download_binary
|
||||||
|
|
||||||
|
configure_systemd() {
|
||||||
|
if ! id llama-swap >/dev/null 2>&1; then
|
||||||
|
status "Creating llama-swap user..."
|
||||||
|
$SUDO useradd -r -s /bin/false -U -m -d /usr/share/llama-swap llama-swap
|
||||||
|
fi
|
||||||
|
if getent group render >/dev/null 2>&1; then
|
||||||
|
status "Adding llama-swap user to render group..."
|
||||||
|
$SUDO usermod -a -G render llama-swap
|
||||||
|
fi
|
||||||
|
if getent group video >/dev/null 2>&1; then
|
||||||
|
status "Adding llama-swap user to video group..."
|
||||||
|
$SUDO usermod -a -G video llama-swap
|
||||||
|
fi
|
||||||
|
if getent group docker >/dev/null 2>&1; then
|
||||||
|
status "Adding llama-swap user to docker group..."
|
||||||
|
$SUDO usermod -a -G docker llama-swap
|
||||||
|
fi
|
||||||
|
|
||||||
|
status "Adding current user to llama-swap group..."
|
||||||
|
$SUDO usermod -a -G llama-swap $(whoami)
|
||||||
|
|
||||||
|
if [ ! -f "/usr/share/llama-swap/config.yaml" ]; then
|
||||||
|
status "Creating default config.yaml..."
|
||||||
|
cat <<EOF | $SUDO -u llama-swap tee /usr/share/llama-swap/config.yaml >/dev/null
|
||||||
|
# default 15s likely to fail for default models due to downloading models
|
||||||
|
healthCheckTimeout: 60
|
||||||
|
|
||||||
|
models:
|
||||||
|
"qwen2.5":
|
||||||
|
cmd: |
|
||||||
|
docker run
|
||||||
|
--rm
|
||||||
|
-p \${PORT}:8080
|
||||||
|
--name qwen2.5
|
||||||
|
ghcr.io/ggml-org/llama.cpp:server
|
||||||
|
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
||||||
|
cmdStop: docker stop qwen2.5
|
||||||
|
|
||||||
|
"smollm2":
|
||||||
|
cmd: |
|
||||||
|
docker run
|
||||||
|
--rm
|
||||||
|
-p \${PORT}:8080
|
||||||
|
--name smollm2
|
||||||
|
ghcr.io/ggml-org/llama.cpp:server
|
||||||
|
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
||||||
|
cmdStop: docker stop smollm2
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
status "Creating llama-swap systemd service..."
|
||||||
|
cat <<EOF | $SUDO tee /etc/systemd/system/llama-swap.service >/dev/null
|
||||||
|
[Unit]
|
||||||
|
Description=llama-swap
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=llama-swap
|
||||||
|
Group=llama-swap
|
||||||
|
|
||||||
|
# set this to match your environment
|
||||||
|
ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=3
|
||||||
|
StartLimitBurst=3
|
||||||
|
StartLimitInterval=30
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
|
||||||
|
case $SYSTEMCTL_RUNNING in
|
||||||
|
running|degraded)
|
||||||
|
status "Enabling and starting llama-swap service..."
|
||||||
|
$SUDO systemctl daemon-reload
|
||||||
|
$SUDO systemctl enable llama-swap
|
||||||
|
|
||||||
|
start_service() { $SUDO systemctl restart llama-swap; }
|
||||||
|
trap start_service EXIT
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
warning "systemd is not running"
|
||||||
|
if [ "$IS_WSL2" = true ]; then
|
||||||
|
warning "see https://learn.microsoft.com/en-us/windows/wsl/systemd#how-to-enable-systemd to enable it"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
if available systemctl; then
|
||||||
|
configure_systemd
|
||||||
|
fi
|
||||||
|
|
||||||
|
install_success() {
|
||||||
|
status 'The llama-swap API is now available at 127.0.0.1:8080.'
|
||||||
|
status 'Customize the config file at /usr/share/llama-swap/config.yaml.'
|
||||||
|
status 'Install complete.'
|
||||||
|
}
|
||||||
|
|
||||||
|
# WSL2 only supports GPUs via nvidia passthrough
|
||||||
|
# so check for nvidia-smi to determine if GPU is available
|
||||||
|
if [ "$IS_WSL2" = true ]; then
|
||||||
|
if available nvidia-smi && [ -n "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
|
||||||
|
status "Nvidia GPU detected."
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
install_success
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# This script uninstalls llama-swap on Linux.
|
||||||
|
# It removes the binary, systemd service, config.yaml (optional), and llama-swap user and group.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
|
||||||
|
plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"
|
||||||
|
|
||||||
|
status() { echo ">>> $*" >&2; }
|
||||||
|
error() { echo "${red}ERROR:${plain} $*"; exit 1; }
|
||||||
|
warning() { echo "${red}WARNING:${plain} $*"; }
|
||||||
|
|
||||||
|
available() { command -v $1 >/dev/null; }
|
||||||
|
|
||||||
|
SUDO=
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
if ! available sudo; then
|
||||||
|
error "This script requires superuser permissions. Please re-run as root."
|
||||||
|
fi
|
||||||
|
|
||||||
|
SUDO="sudo"
|
||||||
|
fi
|
||||||
|
|
||||||
|
configure_systemd() {
|
||||||
|
status "Stopping llama-swap service..."
|
||||||
|
$SUDO systemctl stop llama-swap
|
||||||
|
|
||||||
|
status "Disabling llama-swap service..."
|
||||||
|
$SUDO systemctl disable llama-swap
|
||||||
|
}
|
||||||
|
if available systemctl; then
|
||||||
|
configure_systemd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if available llama-swap; then
|
||||||
|
status "Removing llama-swap binary..."
|
||||||
|
$SUDO rm $(which llama-swap)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f "/usr/share/llama-swap/config.yaml" ]; then
|
||||||
|
while true; do
|
||||||
|
printf "Delete config.yaml (/usr/share/llama-swap/config.yaml)? [y/N] " >&2
|
||||||
|
read answer
|
||||||
|
case "$answer" in
|
||||||
|
[Yy]* )
|
||||||
|
$SUDO rm -r /usr/share/llama-swap
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
[Nn]* | "" )
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
* )
|
||||||
|
echo "Invalid input. Please enter y or n."
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if id llama-swap >/dev/null 2>&1; then
|
||||||
|
status "Removing llama-swap user..."
|
||||||
|
$SUDO userdel llama-swap
|
||||||
|
fi
|
||||||
|
|
||||||
|
if getent group llama-swap >/dev/null 2>&1; then
|
||||||
|
status "Removing llama-swap group..."
|
||||||
|
$SUDO groupdel llama-swap
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user