From 565c44766db9cdcaa0f3e61d0a60c44884b95d62 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sun, 21 Dec 2025 22:23:31 -0800 Subject: [PATCH] config,proxy: add new configuration logToStdout (#432) The new logToStdout option controls what is logged to stdout. The default has been changed to just the proxy logs, which contain swap and http request logs. There are four supported settings: none, proxy, upstream, both. The "both" setting is the legacy setting where everything was spewed to stdout. --- config.example.yaml | 10 ++++++ docs/configuration.md | 39 ++++++++++++++++++++++ proxy/config/config.go | 14 ++++++++ proxy/config/config_posix_test.go | 1 + proxy/config/config_windows_test.go | 1 + proxy/proxymanager.go | 50 ++++++++++++++++++++--------- 6 files changed, 100 insertions(+), 15 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index e6b8c9c2..923fb825 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -34,6 +34,16 @@ logLevel: info # - For more info, read: https://pkg.go.dev/time#pkg-constants logTimeFormat: "" +# logToStdout: controls what is logged to stdout +# - optional, default: "proxy" +# - valid values: +# - "proxy": logs generated by llama-swap when swapping models, +# handling requests, etc. +# - "upstream": a copy of an upstream processes stdout logs +# - "both": both the proxy and upstream logs interleaved together +# - "none": no logs are ever written to stdout +logToStdout: "proxy" + # metricsMaxInMemory: maximum number of metrics to keep in memory # - optional, default: 1000 # - controls how many metrics are stored in memory before older ones are discarded diff --git a/docs/configuration.md b/docs/configuration.md index c253d408..852a4a02 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your > This is a copy of `config.example.yaml`. Always check that for the most up to date examples. ```yaml +# add this modeline for validation in vscode +# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json +# # llama-swap YAML configuration example # ------------------------------------- # @@ -114,6 +117,24 @@ healthCheckTimeout: 500 # - Valid log levels: debug, info, warn, error logLevel: info +# logTimeFormat: enables and sets the logging timestamp format +# - optional, default (disabled): "" +# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z", +# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen", +# "stamp", "stampmilli", "stampmicro", and "stampnano". +# - For more info, read: https://pkg.go.dev/time#pkg-constants +logTimeFormat: "" + +# logToStdout: controls what is logged to stdout +# - optional, default: "proxy" +# - valid values: +# - "proxy": logs generated by llama-swap when swapping models, +# handling requests, etc. +# - "upstream": a copy of an upstream processes stdout logs +# - "both": both the proxy and upstream logs interleaved together +# - "none": no logs are ever written to stdout +logToStdout: "proxy" + # metricsMaxInMemory: maximum number of metrics to keep in memory # - optional, default: 1000 # - controls how many metrics are stored in memory before older ones are discarded @@ -126,6 +147,20 @@ metricsMaxInMemory: 1000 # - it is automatically incremented for every model that uses it startPort: 10001 +# sendLoadingState: inject loading status updates into the reasoning (thinking) +# field +# - optional, default: false +# - when true, a stream of loading messages will be sent to the client in the +# reasoning field so chat UIs can show that loading is in progress. +# - see #366 for more details +sendLoadingState: true + +# includeAliasesInList: present aliases within the /v1/models OpenAI API listing +# - optional, default: false +# - when true, model aliases will be output to the API model listing duplicating +# all fields except for Id so chat UIs can use the alias equivalent to the original. +includeAliasesInList: false + # macros: a dictionary of string substitutions # - optional, default: empty dictionary # - macros are reusable snippets @@ -274,6 +309,10 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # sendLoadingState: overrides the global sendLoadingState setting for this model + # - optional, default: undefined (use global setting) + sendLoadingState: false + # Unlisted model example: "qwen-unlisted": # unlisted: boolean, true or false diff --git a/proxy/config/config.go b/proxy/config/config.go index 0138e093..c812204d 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -15,6 +15,12 @@ import ( ) const DEFAULT_GROUP_ID = "(default)" +const ( + LogToStdoutProxy = "proxy" + LogToStdoutUpstream = "upstream" + LogToStdoutBoth = "both" + LogToStdoutNone = "none" +) type MacroEntry struct { Name string @@ -114,6 +120,7 @@ type Config struct { LogRequests bool `yaml:"logRequests"` LogLevel string `yaml:"logLevel"` LogTimeFormat string `yaml:"logTimeFormat"` + LogToStdout string `yaml:"logToStdout"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Profiles map[string][]string `yaml:"profiles"` @@ -177,6 +184,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { StartPort: 5800, LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, MetricsMaxInMemory: 1000, } err = yaml.Unmarshal(data, &config) @@ -193,6 +201,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { return Config{}, fmt.Errorf("startPort must be greater than 1") } + switch config.LogToStdout { + case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone: + default: + return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none") + } + // Populate the aliases map config.aliases = make(map[string]string) for modelName, modelConfig := range config.Models { diff --git a/proxy/config/config_posix_test.go b/proxy/config/config_posix_test.go index 8793319d..6a2b02f3 100644 --- a/proxy/config/config_posix_test.go +++ b/proxy/config/config_posix_test.go @@ -166,6 +166,7 @@ groups: expected := Config{ LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, StartPort: 5800, Macros: MacroList{ {"svr-path", "path/to/server"}, diff --git a/proxy/config/config_windows_test.go b/proxy/config/config_windows_test.go index 9e633a70..1b674f61 100644 --- a/proxy/config/config_windows_test.go +++ b/proxy/config/config_windows_test.go @@ -158,6 +158,7 @@ groups: expected := Config{ LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, StartPort: 5800, Macros: MacroList{ {"svr-path", "path/to/server"}, diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index b7e578df..2636f470 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -52,17 +52,37 @@ type ProxyManager struct { version string } -func New(config config.Config) *ProxyManager { +func New(proxyConfig config.Config) *ProxyManager { // set up loggers - stdoutLogger := NewLogMonitorWriter(os.Stdout) - upstreamLogger := NewLogMonitorWriter(stdoutLogger) - proxyLogger := NewLogMonitorWriter(stdoutLogger) - if config.LogRequests { + var muxLogger, upstreamLogger, proxyLogger *LogMonitor + switch proxyConfig.LogToStdout { + case config.LogToStdoutNone: + muxLogger = NewLogMonitorWriter(io.Discard) + upstreamLogger = NewLogMonitorWriter(io.Discard) + proxyLogger = NewLogMonitorWriter(io.Discard) + case config.LogToStdoutBoth: + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(muxLogger) + proxyLogger = NewLogMonitorWriter(muxLogger) + case config.LogToStdoutUpstream: + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(muxLogger) + proxyLogger = NewLogMonitorWriter(io.Discard) + default: + // same as config.LogToStdoutProxy + // helpful because some old tests create a config.Config directly and it + // may not have LogToStdout set explicitly + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(io.Discard) + proxyLogger = NewLogMonitorWriter(muxLogger) + } + + if proxyConfig.LogRequests { proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.") } - switch strings.ToLower(strings.TrimSpace(config.LogLevel)) { + switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) { case "debug": proxyLogger.SetLogLevel(LevelDebug) upstreamLogger.SetLogLevel(LevelDebug) @@ -99,7 +119,7 @@ func New(config config.Config) *ProxyManager { "stampnano": time.StampNano, } - if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok { + if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok { proxyLogger.SetLogTimeFormat(timeFormat) upstreamLogger.SetLogTimeFormat(timeFormat) } @@ -107,18 +127,18 @@ func New(config config.Config) *ProxyManager { shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) var maxMetrics int - if config.MetricsMaxInMemory <= 0 { + if proxyConfig.MetricsMaxInMemory <= 0 { maxMetrics = 1000 // Default fallback } else { - maxMetrics = config.MetricsMaxInMemory + maxMetrics = proxyConfig.MetricsMaxInMemory } pm := &ProxyManager{ - config: config, + config: proxyConfig, ginEngine: gin.New(), proxyLogger: proxyLogger, - muxLogger: stdoutLogger, + muxLogger: muxLogger, upstreamLogger: upstreamLogger, metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics), @@ -134,19 +154,19 @@ func New(config config.Config) *ProxyManager { } // create the process groups - for groupID := range config.Groups { - processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger) + for groupID := range proxyConfig.Groups { + processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger) pm.processGroups[groupID] = processGroup } pm.setupGinEngine() // run any startup hooks - if len(config.Hooks.OnStartup.Preload) > 0 { + if len(proxyConfig.Hooks.OnStartup.Preload) > 0 { // do it in the background, don't block startup -- not sure if good idea yet go func() { discardWriter := &DiscardWriter{} - for _, realModelName := range config.Hooks.OnStartup.Preload { + for _, realModelName := range proxyConfig.Hooks.OnStartup.Preload { proxyLogger.Infof("Preloading model: %s", realModelName) processGroup, _, err := pm.swapProcessGroup(realModelName)