diff --git a/config.example.yaml b/config.example.yaml index e6b8c9c2..923fb825 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -34,6 +34,16 @@ logLevel: info # - For more info, read: https://pkg.go.dev/time#pkg-constants logTimeFormat: "" +# logToStdout: controls what is logged to stdout +# - optional, default: "proxy" +# - valid values: +# - "proxy": logs generated by llama-swap when swapping models, +# handling requests, etc. +# - "upstream": a copy of an upstream processes stdout logs +# - "both": both the proxy and upstream logs interleaved together +# - "none": no logs are ever written to stdout +logToStdout: "proxy" + # metricsMaxInMemory: maximum number of metrics to keep in memory # - optional, default: 1000 # - controls how many metrics are stored in memory before older ones are discarded diff --git a/docs/configuration.md b/docs/configuration.md index c253d408..852a4a02 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your > This is a copy of `config.example.yaml`. Always check that for the most up to date examples. ```yaml +# add this modeline for validation in vscode +# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json +# # llama-swap YAML configuration example # ------------------------------------- # @@ -114,6 +117,24 @@ healthCheckTimeout: 500 # - Valid log levels: debug, info, warn, error logLevel: info +# logTimeFormat: enables and sets the logging timestamp format +# - optional, default (disabled): "" +# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z", +# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen", +# "stamp", "stampmilli", "stampmicro", and "stampnano". +# - For more info, read: https://pkg.go.dev/time#pkg-constants +logTimeFormat: "" + +# logToStdout: controls what is logged to stdout +# - optional, default: "proxy" +# - valid values: +# - "proxy": logs generated by llama-swap when swapping models, +# handling requests, etc. +# - "upstream": a copy of an upstream processes stdout logs +# - "both": both the proxy and upstream logs interleaved together +# - "none": no logs are ever written to stdout +logToStdout: "proxy" + # metricsMaxInMemory: maximum number of metrics to keep in memory # - optional, default: 1000 # - controls how many metrics are stored in memory before older ones are discarded @@ -126,6 +147,20 @@ metricsMaxInMemory: 1000 # - it is automatically incremented for every model that uses it startPort: 10001 +# sendLoadingState: inject loading status updates into the reasoning (thinking) +# field +# - optional, default: false +# - when true, a stream of loading messages will be sent to the client in the +# reasoning field so chat UIs can show that loading is in progress. +# - see #366 for more details +sendLoadingState: true + +# includeAliasesInList: present aliases within the /v1/models OpenAI API listing +# - optional, default: false +# - when true, model aliases will be output to the API model listing duplicating +# all fields except for Id so chat UIs can use the alias equivalent to the original. +includeAliasesInList: false + # macros: a dictionary of string substitutions # - optional, default: empty dictionary # - macros are reusable snippets @@ -274,6 +309,10 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # sendLoadingState: overrides the global sendLoadingState setting for this model + # - optional, default: undefined (use global setting) + sendLoadingState: false + # Unlisted model example: "qwen-unlisted": # unlisted: boolean, true or false diff --git a/proxy/config/config.go b/proxy/config/config.go index 0138e093..c812204d 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -15,6 +15,12 @@ import ( ) const DEFAULT_GROUP_ID = "(default)" +const ( + LogToStdoutProxy = "proxy" + LogToStdoutUpstream = "upstream" + LogToStdoutBoth = "both" + LogToStdoutNone = "none" +) type MacroEntry struct { Name string @@ -114,6 +120,7 @@ type Config struct { LogRequests bool `yaml:"logRequests"` LogLevel string `yaml:"logLevel"` LogTimeFormat string `yaml:"logTimeFormat"` + LogToStdout string `yaml:"logToStdout"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Profiles map[string][]string `yaml:"profiles"` @@ -177,6 +184,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { StartPort: 5800, LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, MetricsMaxInMemory: 1000, } err = yaml.Unmarshal(data, &config) @@ -193,6 +201,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { return Config{}, fmt.Errorf("startPort must be greater than 1") } + switch config.LogToStdout { + case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone: + default: + return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none") + } + // Populate the aliases map config.aliases = make(map[string]string) for modelName, modelConfig := range config.Models { diff --git a/proxy/config/config_posix_test.go b/proxy/config/config_posix_test.go index 8793319d..6a2b02f3 100644 --- a/proxy/config/config_posix_test.go +++ b/proxy/config/config_posix_test.go @@ -166,6 +166,7 @@ groups: expected := Config{ LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, StartPort: 5800, Macros: MacroList{ {"svr-path", "path/to/server"}, diff --git a/proxy/config/config_windows_test.go b/proxy/config/config_windows_test.go index 9e633a70..1b674f61 100644 --- a/proxy/config/config_windows_test.go +++ b/proxy/config/config_windows_test.go @@ -158,6 +158,7 @@ groups: expected := Config{ LogLevel: "info", LogTimeFormat: "", + LogToStdout: LogToStdoutProxy, StartPort: 5800, Macros: MacroList{ {"svr-path", "path/to/server"}, diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index b7e578df..2636f470 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -52,17 +52,37 @@ type ProxyManager struct { version string } -func New(config config.Config) *ProxyManager { +func New(proxyConfig config.Config) *ProxyManager { // set up loggers - stdoutLogger := NewLogMonitorWriter(os.Stdout) - upstreamLogger := NewLogMonitorWriter(stdoutLogger) - proxyLogger := NewLogMonitorWriter(stdoutLogger) - if config.LogRequests { + var muxLogger, upstreamLogger, proxyLogger *LogMonitor + switch proxyConfig.LogToStdout { + case config.LogToStdoutNone: + muxLogger = NewLogMonitorWriter(io.Discard) + upstreamLogger = NewLogMonitorWriter(io.Discard) + proxyLogger = NewLogMonitorWriter(io.Discard) + case config.LogToStdoutBoth: + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(muxLogger) + proxyLogger = NewLogMonitorWriter(muxLogger) + case config.LogToStdoutUpstream: + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(muxLogger) + proxyLogger = NewLogMonitorWriter(io.Discard) + default: + // same as config.LogToStdoutProxy + // helpful because some old tests create a config.Config directly and it + // may not have LogToStdout set explicitly + muxLogger = NewLogMonitorWriter(os.Stdout) + upstreamLogger = NewLogMonitorWriter(io.Discard) + proxyLogger = NewLogMonitorWriter(muxLogger) + } + + if proxyConfig.LogRequests { proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.") } - switch strings.ToLower(strings.TrimSpace(config.LogLevel)) { + switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) { case "debug": proxyLogger.SetLogLevel(LevelDebug) upstreamLogger.SetLogLevel(LevelDebug) @@ -99,7 +119,7 @@ func New(config config.Config) *ProxyManager { "stampnano": time.StampNano, } - if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok { + if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok { proxyLogger.SetLogTimeFormat(timeFormat) upstreamLogger.SetLogTimeFormat(timeFormat) } @@ -107,18 +127,18 @@ func New(config config.Config) *ProxyManager { shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) var maxMetrics int - if config.MetricsMaxInMemory <= 0 { + if proxyConfig.MetricsMaxInMemory <= 0 { maxMetrics = 1000 // Default fallback } else { - maxMetrics = config.MetricsMaxInMemory + maxMetrics = proxyConfig.MetricsMaxInMemory } pm := &ProxyManager{ - config: config, + config: proxyConfig, ginEngine: gin.New(), proxyLogger: proxyLogger, - muxLogger: stdoutLogger, + muxLogger: muxLogger, upstreamLogger: upstreamLogger, metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics), @@ -134,19 +154,19 @@ func New(config config.Config) *ProxyManager { } // create the process groups - for groupID := range config.Groups { - processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger) + for groupID := range proxyConfig.Groups { + processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger) pm.processGroups[groupID] = processGroup } pm.setupGinEngine() // run any startup hooks - if len(config.Hooks.OnStartup.Preload) > 0 { + if len(proxyConfig.Hooks.OnStartup.Preload) > 0 { // do it in the background, don't block startup -- not sure if good idea yet go func() { discardWriter := &DiscardWriter{} - for _, realModelName := range config.Hooks.OnStartup.Preload { + for _, realModelName := range proxyConfig.Hooks.OnStartup.Preload { proxyLogger.Infof("Preloading model: %s", realModelName) processGroup, _, err := pm.swapProcessGroup(realModelName)