config,proxy: add new configuration logToStdout (#432)

The new logToStdout option controls what is logged to stdout. The
default has been changed to just the proxy logs, which contain swap and
http request logs.

There are four supported settings: none, proxy, upstream, both. The
"both" setting is the legacy setting where everything was spewed to
stdout.
This commit is contained in:
Benson Wong
2025-12-21 22:23:31 -08:00
committed by GitHub
parent e6a9e210ba
commit 565c44766d
6 changed files with 100 additions and 15 deletions
+10
View File
@@ -34,6 +34,16 @@ logLevel: info
# - For more info, read: https://pkg.go.dev/time#pkg-constants # - For more info, read: https://pkg.go.dev/time#pkg-constants
logTimeFormat: "" logTimeFormat: ""
# logToStdout: controls what is logged to stdout
# - optional, default: "proxy"
# - valid values:
# - "proxy": logs generated by llama-swap when swapping models,
# handling requests, etc.
# - "upstream": a copy of an upstream processes stdout logs
# - "both": both the proxy and upstream logs interleaved together
# - "none": no logs are ever written to stdout
logToStdout: "proxy"
# metricsMaxInMemory: maximum number of metrics to keep in memory # metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000 # - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded # - controls how many metrics are stored in memory before older ones are discarded
+39
View File
@@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your
> This is a copy of `config.example.yaml`. Always check that for the most up to date examples. > This is a copy of `config.example.yaml`. Always check that for the most up to date examples.
```yaml ```yaml
# add this modeline for validation in vscode
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
#
# llama-swap YAML configuration example # llama-swap YAML configuration example
# ------------------------------------- # -------------------------------------
# #
@@ -114,6 +117,24 @@ healthCheckTimeout: 500
# - Valid log levels: debug, info, warn, error # - Valid log levels: debug, info, warn, error
logLevel: info logLevel: info
# logTimeFormat: enables and sets the logging timestamp format
# - optional, default (disabled): ""
# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z",
# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen",
# "stamp", "stampmilli", "stampmicro", and "stampnano".
# - For more info, read: https://pkg.go.dev/time#pkg-constants
logTimeFormat: ""
# logToStdout: controls what is logged to stdout
# - optional, default: "proxy"
# - valid values:
# - "proxy": logs generated by llama-swap when swapping models,
# handling requests, etc.
# - "upstream": a copy of an upstream processes stdout logs
# - "both": both the proxy and upstream logs interleaved together
# - "none": no logs are ever written to stdout
logToStdout: "proxy"
# metricsMaxInMemory: maximum number of metrics to keep in memory # metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000 # - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded # - controls how many metrics are stored in memory before older ones are discarded
@@ -126,6 +147,20 @@ metricsMaxInMemory: 1000
# - it is automatically incremented for every model that uses it # - it is automatically incremented for every model that uses it
startPort: 10001 startPort: 10001
# sendLoadingState: inject loading status updates into the reasoning (thinking)
# field
# - optional, default: false
# - when true, a stream of loading messages will be sent to the client in the
# reasoning field so chat UIs can show that loading is in progress.
# - see #366 for more details
sendLoadingState: true
# includeAliasesInList: present aliases within the /v1/models OpenAI API listing
# - optional, default: false
# - when true, model aliases will be output to the API model listing duplicating
# all fields except for Id so chat UIs can use the alias equivalent to the original.
includeAliasesInList: false
# macros: a dictionary of string substitutions # macros: a dictionary of string substitutions
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - macros are reusable snippets # - macros are reusable snippets
@@ -274,6 +309,10 @@ models:
# - recommended to be omitted and the default used # - recommended to be omitted and the default used
concurrencyLimit: 0 concurrencyLimit: 0
# sendLoadingState: overrides the global sendLoadingState setting for this model
# - optional, default: undefined (use global setting)
sendLoadingState: false
# Unlisted model example: # Unlisted model example:
"qwen-unlisted": "qwen-unlisted":
# unlisted: boolean, true or false # unlisted: boolean, true or false
+14
View File
@@ -15,6 +15,12 @@ import (
) )
const DEFAULT_GROUP_ID = "(default)" const DEFAULT_GROUP_ID = "(default)"
const (
LogToStdoutProxy = "proxy"
LogToStdoutUpstream = "upstream"
LogToStdoutBoth = "both"
LogToStdoutNone = "none"
)
type MacroEntry struct { type MacroEntry struct {
Name string Name string
@@ -114,6 +120,7 @@ type Config struct {
LogRequests bool `yaml:"logRequests"` LogRequests bool `yaml:"logRequests"`
LogLevel string `yaml:"logLevel"` LogLevel string `yaml:"logLevel"`
LogTimeFormat string `yaml:"logTimeFormat"` LogTimeFormat string `yaml:"logTimeFormat"`
LogToStdout string `yaml:"logToStdout"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"` Profiles map[string][]string `yaml:"profiles"`
@@ -177,6 +184,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
StartPort: 5800, StartPort: 5800,
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
} }
err = yaml.Unmarshal(data, &config) err = yaml.Unmarshal(data, &config)
@@ -193,6 +201,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
return Config{}, fmt.Errorf("startPort must be greater than 1") return Config{}, fmt.Errorf("startPort must be greater than 1")
} }
switch config.LogToStdout {
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
default:
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
}
// Populate the aliases map // Populate the aliases map
config.aliases = make(map[string]string) config.aliases = make(map[string]string)
for modelName, modelConfig := range config.Models { for modelName, modelConfig := range config.Models {
+1
View File
@@ -166,6 +166,7 @@ groups:
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
StartPort: 5800, StartPort: 5800,
Macros: MacroList{ Macros: MacroList{
{"svr-path", "path/to/server"}, {"svr-path", "path/to/server"},
+1
View File
@@ -158,6 +158,7 @@ groups:
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
StartPort: 5800, StartPort: 5800,
Macros: MacroList{ Macros: MacroList{
{"svr-path", "path/to/server"}, {"svr-path", "path/to/server"},
+35 -15
View File
@@ -52,17 +52,37 @@ type ProxyManager struct {
version string version string
} }
func New(config config.Config) *ProxyManager { func New(proxyConfig config.Config) *ProxyManager {
// set up loggers // set up loggers
stdoutLogger := NewLogMonitorWriter(os.Stdout)
upstreamLogger := NewLogMonitorWriter(stdoutLogger)
proxyLogger := NewLogMonitorWriter(stdoutLogger)
if config.LogRequests { var muxLogger, upstreamLogger, proxyLogger *LogMonitor
switch proxyConfig.LogToStdout {
case config.LogToStdoutNone:
muxLogger = NewLogMonitorWriter(io.Discard)
upstreamLogger = NewLogMonitorWriter(io.Discard)
proxyLogger = NewLogMonitorWriter(io.Discard)
case config.LogToStdoutBoth:
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(muxLogger)
proxyLogger = NewLogMonitorWriter(muxLogger)
case config.LogToStdoutUpstream:
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(muxLogger)
proxyLogger = NewLogMonitorWriter(io.Discard)
default:
// same as config.LogToStdoutProxy
// helpful because some old tests create a config.Config directly and it
// may not have LogToStdout set explicitly
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(io.Discard)
proxyLogger = NewLogMonitorWriter(muxLogger)
}
if proxyConfig.LogRequests {
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.") proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
} }
switch strings.ToLower(strings.TrimSpace(config.LogLevel)) { switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
case "debug": case "debug":
proxyLogger.SetLogLevel(LevelDebug) proxyLogger.SetLogLevel(LevelDebug)
upstreamLogger.SetLogLevel(LevelDebug) upstreamLogger.SetLogLevel(LevelDebug)
@@ -99,7 +119,7 @@ func New(config config.Config) *ProxyManager {
"stampnano": time.StampNano, "stampnano": time.StampNano,
} }
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok { if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok {
proxyLogger.SetLogTimeFormat(timeFormat) proxyLogger.SetLogTimeFormat(timeFormat)
upstreamLogger.SetLogTimeFormat(timeFormat) upstreamLogger.SetLogTimeFormat(timeFormat)
} }
@@ -107,18 +127,18 @@ func New(config config.Config) *ProxyManager {
shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
var maxMetrics int var maxMetrics int
if config.MetricsMaxInMemory <= 0 { if proxyConfig.MetricsMaxInMemory <= 0 {
maxMetrics = 1000 // Default fallback maxMetrics = 1000 // Default fallback
} else { } else {
maxMetrics = config.MetricsMaxInMemory maxMetrics = proxyConfig.MetricsMaxInMemory
} }
pm := &ProxyManager{ pm := &ProxyManager{
config: config, config: proxyConfig,
ginEngine: gin.New(), ginEngine: gin.New(),
proxyLogger: proxyLogger, proxyLogger: proxyLogger,
muxLogger: stdoutLogger, muxLogger: muxLogger,
upstreamLogger: upstreamLogger, upstreamLogger: upstreamLogger,
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics), metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
@@ -134,19 +154,19 @@ func New(config config.Config) *ProxyManager {
} }
// create the process groups // create the process groups
for groupID := range config.Groups { for groupID := range proxyConfig.Groups {
processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger) processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger)
pm.processGroups[groupID] = processGroup pm.processGroups[groupID] = processGroup
} }
pm.setupGinEngine() pm.setupGinEngine()
// run any startup hooks // run any startup hooks
if len(config.Hooks.OnStartup.Preload) > 0 { if len(proxyConfig.Hooks.OnStartup.Preload) > 0 {
// do it in the background, don't block startup -- not sure if good idea yet // do it in the background, don't block startup -- not sure if good idea yet
go func() { go func() {
discardWriter := &DiscardWriter{} discardWriter := &DiscardWriter{}
for _, realModelName := range config.Hooks.OnStartup.Preload { for _, realModelName := range proxyConfig.Hooks.OnStartup.Preload {
proxyLogger.Infof("Preloading model: %s", realModelName) proxyLogger.Infof("Preloading model: %s", realModelName)
processGroup, _, err := pm.swapProcessGroup(realModelName) processGroup, _, err := pm.swapProcessGroup(realModelName)