config,proxy: add new configuration logToStdout (#432)
The new logToStdout option controls what is logged to stdout. The default has been changed to just the proxy logs, which contain swap and http request logs. There are four supported settings: none, proxy, upstream, both. The "both" setting is the legacy setting where everything was spewed to stdout.
This commit is contained in:
@@ -34,6 +34,16 @@ logLevel: info
|
|||||||
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
||||||
logTimeFormat: ""
|
logTimeFormat: ""
|
||||||
|
|
||||||
|
# logToStdout: controls what is logged to stdout
|
||||||
|
# - optional, default: "proxy"
|
||||||
|
# - valid values:
|
||||||
|
# - "proxy": logs generated by llama-swap when swapping models,
|
||||||
|
# handling requests, etc.
|
||||||
|
# - "upstream": a copy of an upstream processes stdout logs
|
||||||
|
# - "both": both the proxy and upstream logs interleaved together
|
||||||
|
# - "none": no logs are ever written to stdout
|
||||||
|
logToStdout: "proxy"
|
||||||
|
|
||||||
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
||||||
# - optional, default: 1000
|
# - optional, default: 1000
|
||||||
# - controls how many metrics are stored in memory before older ones are discarded
|
# - controls how many metrics are stored in memory before older ones are discarded
|
||||||
|
|||||||
@@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your
|
|||||||
> This is a copy of `config.example.yaml`. Always check that for the most up to date examples.
|
> This is a copy of `config.example.yaml`. Always check that for the most up to date examples.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
# add this modeline for validation in vscode
|
||||||
|
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
|
||||||
|
#
|
||||||
# llama-swap YAML configuration example
|
# llama-swap YAML configuration example
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
#
|
#
|
||||||
@@ -114,6 +117,24 @@ healthCheckTimeout: 500
|
|||||||
# - Valid log levels: debug, info, warn, error
|
# - Valid log levels: debug, info, warn, error
|
||||||
logLevel: info
|
logLevel: info
|
||||||
|
|
||||||
|
# logTimeFormat: enables and sets the logging timestamp format
|
||||||
|
# - optional, default (disabled): ""
|
||||||
|
# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z",
|
||||||
|
# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen",
|
||||||
|
# "stamp", "stampmilli", "stampmicro", and "stampnano".
|
||||||
|
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
||||||
|
logTimeFormat: ""
|
||||||
|
|
||||||
|
# logToStdout: controls what is logged to stdout
|
||||||
|
# - optional, default: "proxy"
|
||||||
|
# - valid values:
|
||||||
|
# - "proxy": logs generated by llama-swap when swapping models,
|
||||||
|
# handling requests, etc.
|
||||||
|
# - "upstream": a copy of an upstream processes stdout logs
|
||||||
|
# - "both": both the proxy and upstream logs interleaved together
|
||||||
|
# - "none": no logs are ever written to stdout
|
||||||
|
logToStdout: "proxy"
|
||||||
|
|
||||||
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
||||||
# - optional, default: 1000
|
# - optional, default: 1000
|
||||||
# - controls how many metrics are stored in memory before older ones are discarded
|
# - controls how many metrics are stored in memory before older ones are discarded
|
||||||
@@ -126,6 +147,20 @@ metricsMaxInMemory: 1000
|
|||||||
# - it is automatically incremented for every model that uses it
|
# - it is automatically incremented for every model that uses it
|
||||||
startPort: 10001
|
startPort: 10001
|
||||||
|
|
||||||
|
# sendLoadingState: inject loading status updates into the reasoning (thinking)
|
||||||
|
# field
|
||||||
|
# - optional, default: false
|
||||||
|
# - when true, a stream of loading messages will be sent to the client in the
|
||||||
|
# reasoning field so chat UIs can show that loading is in progress.
|
||||||
|
# - see #366 for more details
|
||||||
|
sendLoadingState: true
|
||||||
|
|
||||||
|
# includeAliasesInList: present aliases within the /v1/models OpenAI API listing
|
||||||
|
# - optional, default: false
|
||||||
|
# - when true, model aliases will be output to the API model listing duplicating
|
||||||
|
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||||
|
includeAliasesInList: false
|
||||||
|
|
||||||
# macros: a dictionary of string substitutions
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros are reusable snippets
|
# - macros are reusable snippets
|
||||||
@@ -274,6 +309,10 @@ models:
|
|||||||
# - recommended to be omitted and the default used
|
# - recommended to be omitted and the default used
|
||||||
concurrencyLimit: 0
|
concurrencyLimit: 0
|
||||||
|
|
||||||
|
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
||||||
|
# - optional, default: undefined (use global setting)
|
||||||
|
sendLoadingState: false
|
||||||
|
|
||||||
# Unlisted model example:
|
# Unlisted model example:
|
||||||
"qwen-unlisted":
|
"qwen-unlisted":
|
||||||
# unlisted: boolean, true or false
|
# unlisted: boolean, true or false
|
||||||
|
|||||||
@@ -15,6 +15,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_GROUP_ID = "(default)"
|
const DEFAULT_GROUP_ID = "(default)"
|
||||||
|
const (
|
||||||
|
LogToStdoutProxy = "proxy"
|
||||||
|
LogToStdoutUpstream = "upstream"
|
||||||
|
LogToStdoutBoth = "both"
|
||||||
|
LogToStdoutNone = "none"
|
||||||
|
)
|
||||||
|
|
||||||
type MacroEntry struct {
|
type MacroEntry struct {
|
||||||
Name string
|
Name string
|
||||||
@@ -114,6 +120,7 @@ type Config struct {
|
|||||||
LogRequests bool `yaml:"logRequests"`
|
LogRequests bool `yaml:"logRequests"`
|
||||||
LogLevel string `yaml:"logLevel"`
|
LogLevel string `yaml:"logLevel"`
|
||||||
LogTimeFormat string `yaml:"logTimeFormat"`
|
LogTimeFormat string `yaml:"logTimeFormat"`
|
||||||
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
@@ -177,6 +184,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
}
|
}
|
||||||
err = yaml.Unmarshal(data, &config)
|
err = yaml.Unmarshal(data, &config)
|
||||||
@@ -193,6 +201,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch config.LogToStdout {
|
||||||
|
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||||
|
default:
|
||||||
|
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
|
||||||
|
}
|
||||||
|
|
||||||
// Populate the aliases map
|
// Populate the aliases map
|
||||||
config.aliases = make(map[string]string)
|
config.aliases = make(map[string]string)
|
||||||
for modelName, modelConfig := range config.Models {
|
for modelName, modelConfig := range config.Models {
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ groups:
|
|||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
Macros: MacroList{
|
Macros: MacroList{
|
||||||
{"svr-path", "path/to/server"},
|
{"svr-path", "path/to/server"},
|
||||||
|
|||||||
@@ -158,6 +158,7 @@ groups:
|
|||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
Macros: MacroList{
|
Macros: MacroList{
|
||||||
{"svr-path", "path/to/server"},
|
{"svr-path", "path/to/server"},
|
||||||
|
|||||||
+35
-15
@@ -52,17 +52,37 @@ type ProxyManager struct {
|
|||||||
version string
|
version string
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(config config.Config) *ProxyManager {
|
func New(proxyConfig config.Config) *ProxyManager {
|
||||||
// set up loggers
|
// set up loggers
|
||||||
stdoutLogger := NewLogMonitorWriter(os.Stdout)
|
|
||||||
upstreamLogger := NewLogMonitorWriter(stdoutLogger)
|
|
||||||
proxyLogger := NewLogMonitorWriter(stdoutLogger)
|
|
||||||
|
|
||||||
if config.LogRequests {
|
var muxLogger, upstreamLogger, proxyLogger *LogMonitor
|
||||||
|
switch proxyConfig.LogToStdout {
|
||||||
|
case config.LogToStdoutNone:
|
||||||
|
muxLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
case config.LogToStdoutBoth:
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
case config.LogToStdoutUpstream:
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
default:
|
||||||
|
// same as config.LogToStdoutProxy
|
||||||
|
// helpful because some old tests create a config.Config directly and it
|
||||||
|
// may not have LogToStdout set explicitly
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
}
|
||||||
|
|
||||||
|
if proxyConfig.LogRequests {
|
||||||
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
|
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
|
||||||
}
|
}
|
||||||
|
|
||||||
switch strings.ToLower(strings.TrimSpace(config.LogLevel)) {
|
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
||||||
case "debug":
|
case "debug":
|
||||||
proxyLogger.SetLogLevel(LevelDebug)
|
proxyLogger.SetLogLevel(LevelDebug)
|
||||||
upstreamLogger.SetLogLevel(LevelDebug)
|
upstreamLogger.SetLogLevel(LevelDebug)
|
||||||
@@ -99,7 +119,7 @@ func New(config config.Config) *ProxyManager {
|
|||||||
"stampnano": time.StampNano,
|
"stampnano": time.StampNano,
|
||||||
}
|
}
|
||||||
|
|
||||||
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok {
|
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok {
|
||||||
proxyLogger.SetLogTimeFormat(timeFormat)
|
proxyLogger.SetLogTimeFormat(timeFormat)
|
||||||
upstreamLogger.SetLogTimeFormat(timeFormat)
|
upstreamLogger.SetLogTimeFormat(timeFormat)
|
||||||
}
|
}
|
||||||
@@ -107,18 +127,18 @@ func New(config config.Config) *ProxyManager {
|
|||||||
shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
|
shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
var maxMetrics int
|
var maxMetrics int
|
||||||
if config.MetricsMaxInMemory <= 0 {
|
if proxyConfig.MetricsMaxInMemory <= 0 {
|
||||||
maxMetrics = 1000 // Default fallback
|
maxMetrics = 1000 // Default fallback
|
||||||
} else {
|
} else {
|
||||||
maxMetrics = config.MetricsMaxInMemory
|
maxMetrics = proxyConfig.MetricsMaxInMemory
|
||||||
}
|
}
|
||||||
|
|
||||||
pm := &ProxyManager{
|
pm := &ProxyManager{
|
||||||
config: config,
|
config: proxyConfig,
|
||||||
ginEngine: gin.New(),
|
ginEngine: gin.New(),
|
||||||
|
|
||||||
proxyLogger: proxyLogger,
|
proxyLogger: proxyLogger,
|
||||||
muxLogger: stdoutLogger,
|
muxLogger: muxLogger,
|
||||||
upstreamLogger: upstreamLogger,
|
upstreamLogger: upstreamLogger,
|
||||||
|
|
||||||
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
||||||
@@ -134,19 +154,19 @@ func New(config config.Config) *ProxyManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// create the process groups
|
// create the process groups
|
||||||
for groupID := range config.Groups {
|
for groupID := range proxyConfig.Groups {
|
||||||
processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger)
|
processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger)
|
||||||
pm.processGroups[groupID] = processGroup
|
pm.processGroups[groupID] = processGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
pm.setupGinEngine()
|
pm.setupGinEngine()
|
||||||
|
|
||||||
// run any startup hooks
|
// run any startup hooks
|
||||||
if len(config.Hooks.OnStartup.Preload) > 0 {
|
if len(proxyConfig.Hooks.OnStartup.Preload) > 0 {
|
||||||
// do it in the background, don't block startup -- not sure if good idea yet
|
// do it in the background, don't block startup -- not sure if good idea yet
|
||||||
go func() {
|
go func() {
|
||||||
discardWriter := &DiscardWriter{}
|
discardWriter := &DiscardWriter{}
|
||||||
for _, realModelName := range config.Hooks.OnStartup.Preload {
|
for _, realModelName := range proxyConfig.Hooks.OnStartup.Preload {
|
||||||
proxyLogger.Infof("Preloading model: %s", realModelName)
|
proxyLogger.Infof("Preloading model: %s", realModelName)
|
||||||
processGroup, _, err := pm.swapProcessGroup(realModelName)
|
processGroup, _, err := pm.swapProcessGroup(realModelName)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user