config,proxy: add new configuration logToStdout (#432)

The new logToStdout option controls what is logged to stdout. The default has been changed to just the proxy logs, which contain swap and http request logs. There are four supported settings: none, proxy, upstream, both. The "both" setting is the legacy setting where everything was spewed to stdout.
2025-12-21 22:23:31 -08:00
parent e6a9e210ba
commit 565c44766d
6 changed files with 100 additions and 15 deletions
@@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your
 > This is a copy of `config.example.yaml`. Always check that for the most up to date examples.

 ```yaml
+# add this modeline for validation in vscode
+# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
+#
 # llama-swap YAML configuration example
 # -------------------------------------
 #
@@ -114,6 +117,24 @@ healthCheckTimeout: 500
 # - Valid log levels: debug, info, warn, error
 logLevel: info

+# logTimeFormat: enables and sets the logging timestamp format
+# - optional, default (disabled): ""
+# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z",
+#   "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen",
+#   "stamp", "stampmilli", "stampmicro", and "stampnano".
+# - For more info, read: https://pkg.go.dev/time#pkg-constants
+logTimeFormat: ""
+
+# logToStdout: controls what is logged to stdout
+# - optional, default: "proxy"
+# - valid values:
+#   - "proxy": logs generated by llama-swap when swapping models,
+#      handling requests, etc.
+#   - "upstream": a copy of an upstream processes stdout logs
+#   - "both": both the proxy and upstream logs interleaved together
+#   - "none": no logs are ever written to stdout
+logToStdout: "proxy"
+
 # metricsMaxInMemory: maximum number of metrics to keep in memory
 # - optional, default: 1000
 # - controls how many metrics are stored in memory before older ones are discarded
@@ -126,6 +147,20 @@ metricsMaxInMemory: 1000
 # - it is automatically incremented for every model that uses it
 startPort: 10001

+# sendLoadingState: inject loading status updates into the reasoning (thinking)
+# field
+# - optional, default: false
+# - when true, a stream of loading messages will be sent to the client in the
+#   reasoning field so chat UIs can show that loading is in progress.
+# - see #366 for more details
+sendLoadingState: true
+
+# includeAliasesInList: present aliases within the /v1/models OpenAI API listing
+# - optional, default: false
+# - when true, model aliases will be output to the API model listing duplicating
+#   all fields except for Id so chat UIs can use the alias equivalent to the original.
+includeAliasesInList: false
+
 # macros: a dictionary of string substitutions
 # - optional, default: empty dictionary
 # - macros are reusable snippets
@@ -274,6 +309,10 @@ models:
    # - recommended to be omitted and the default used
    concurrencyLimit: 0

+    # sendLoadingState: overrides the global sendLoadingState setting for this model
+    # - optional, default: undefined (use global setting)
+    sendLoadingState: false
+
  # Unlisted model example:
  "qwen-unlisted":
    # unlisted: boolean, true or false