small tweak to example config

Add ${MODEL_ID} macro (#226 )
The automatic ${MODEL_ID} macro includes the name of the model and can be used in Cmd and CmdStop.
2025-09-01 21:26:58 -07:00 · 2025-09-01 21:21:37 -07:00 · 2025-08-28 23:44:37 -07:00 · 2025-08-28 22:47:28 -07:00
4 changed files with 58 additions and 8 deletions
@@ -7,7 +7,7 @@

 llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.

-Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
+Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary, a provided docker images or Homebrew.

 ## Features:

@@ -207,4 +207,7 @@ For Python based inference servers like vllm or tabbyAPI it is recommended to ru

 ## Star History

+> [!NOTE]
+> ⭐️ Star this project to help others discover it! 
+
 [![Star History Chart](https://api.star-history.com/svg?repos=mostlygeek/llama-swap&type=Date)](https://www.star-history.com/#mostlygeek/llama-swap&Date)
@@ -49,8 +49,8 @@ macros:
 # - required
 # - each key is the model's ID, used in API requests
 # - model settings have default values that are used if they are not defined here
-# - below are examples of the various settings a model can have:
-# - available model settings: env, cmd, cmdStop, proxy, aliases, checkEndpoint, ttl, unlisted
+# - the model's ID is available in the ${MODEL_ID} macro, also available in macros defined above
+# - below are examples of the all the settings a model can have
 models:

  # keys are the model names used in API requests
@@ -148,12 +148,12 @@ models:
    cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0

  # Docker example:
-  # container run times like Docker and Podman can be used reliably with a
-  # a combination of cmd and cmdStop.
+  # container runtimes like Docker and Podman can be used reliably with
+  # a combination of cmd, cmdStop, and ${MODEL_ID}
  "docker-llama":
    proxy: "http://127.0.0.1:${PORT}"
    cmd: |
-      docker run --name dockertest
+      docker run --name ${MODEL_ID}
      --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
      ghcr.io/ggml-org/llama.cpp:server
      --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
@@ -167,7 +167,7 @@ models:
    # - on POSIX systems: a SIGTERM signal is sent
    # - on Windows, calls taskkill to stop the process
    # - processes have 5 seconds to shutdown until forceful termination is attempted
-    cmdStop: docker stop dockertest
+    cmdStop: docker stop ${MODEL_ID}

 # groups: a dictionary of group settings
 # - optional, default: empty dictionary
@@ -237,7 +237,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {

 	- name must fit the regex ^[a-zA-Z0-9_-]+$
 	- names must be less than 64 characters (no reason, just cause)
-	- name can not be any reserved macros: PORT
+	- name can not be any reserved macros: PORT, MODEL_ID
 	- macro values must be less than 1024 characters
 	*/
 	macroNameRegex := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
@@ -253,6 +253,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		}
 		switch macroName {
 		case "PORT":
+		case "MODEL_ID":
 			return Config{}, fmt.Errorf("macro name '%s' is reserved and cannot be used", macroName)
 		}
 	}
@@ -296,6 +297,11 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 			nextPort++
 		}

+		if strings.Contains(modelConfig.Cmd, "${MODEL_ID}") || strings.Contains(modelConfig.CmdStop, "${MODEL_ID}") {
+			modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${MODEL_ID}", modelId)
+			modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, "${MODEL_ID}", modelId)
+		}
+
 		// make sure there are no unknown macros that have not been replaced
 		macroPattern := regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
 		fieldMap := map[string]string{
@@ -440,3 +440,44 @@ models:
 	expectedCmd := "/user/llama.cpp/build/bin/llama-server --port 9990 --model /path/to/model.gguf -ngl 99"
 	assert.Equal(t, expectedCmd, cmdStr, "Final command does not match expected structure")
 }
+
+func TestConfig_MacroModelId(t *testing.T) {
+	content := `
+startPort: 9000
+macros:
+  "docker-llama": docker run --name ${MODEL_ID} -p ${PORT}:8080 docker_img
+  "docker-stop": docker stop ${MODEL_ID}
+
+models:
+  model1:
+    cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
+
+  model2:
+    cmd: ${docker-llama}
+    cmdStop: ${docker-stop}
+
+  author/model:F16:
+    cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
+    cmdStop: stop
+`
+
+	config, err := LoadConfigFromReader(strings.NewReader(content))
+	assert.NoError(t, err)
+	sanitizedCmd, err := SanitizeCommand(config.Models["model1"].Cmd)
+	assert.NoError(t, err)
+	assert.Equal(t, "/path/to/server -p 9001 -hf model1", strings.Join(sanitizedCmd, " "))
+
+	assert.Equal(t, "docker stop ${MODEL_ID}", config.Macros["docker-stop"])
+
+	sanitizedCmd2, err := SanitizeCommand(config.Models["model2"].Cmd)
+	assert.NoError(t, err)
+	assert.Equal(t, "docker run --name model2 -p 9002:8080 docker_img", strings.Join(sanitizedCmd2, " "))
+
+	sanitizedCmdStop, err := SanitizeCommand(config.Models["model2"].CmdStop)
+	assert.NoError(t, err)
+	assert.Equal(t, "docker stop model2", strings.Join(sanitizedCmdStop, " "))
+
+	sanitizedCmd3, err := SanitizeCommand(config.Models["author/model:F16"].Cmd)
+	assert.NoError(t, err)
+	assert.Equal(t, "/path/to/server -p 9000 -hf author/model:F16", strings.Join(sanitizedCmd3, " "))
+}
Author	SHA1	Message	Date
Benson Wong	a533aec736	small tweak to example config	2025-09-01 21:26:58 -07:00
Brett Profitt	97b17fc47d	Add ${MODEL_ID} macro (#226 ) The automatic ${MODEL_ID} macro includes the name of the model and can be used in Cmd and CmdStop.	2025-09-01 21:21:37 -07:00
Benson Wong	2457840698	Update README.md [skip ci]	2025-08-28 23:44:37 -07:00
Benson Wong	7f55494151	Update README.md [skip ci]	2025-08-28 22:47:28 -07:00