Compare commits

..

2 Commits

Author SHA1 Message Date
Benson Wong 565c44766d config,proxy: add new configuration logToStdout (#432)
The new logToStdout option controls what is logged to stdout. The
default has been changed to just the proxy logs, which contain swap and
http request logs.

There are four supported settings: none, proxy, upstream, both. The
"both" setting is the legacy setting where everything was spewed to
stdout.
2025-12-21 22:23:31 -08:00
Benson Wong e6a9e210ba proxy: fix path bug in /logs/stream/{model_id} (#431)
A {model_id} containing a forward slash trips up gin's path param
parsing. This updates /logs/stream to work like /upstream where the
model_id is built up in parts and searched for in the configuration.

Updates #421
2025-12-21 21:47:14 -08:00
8 changed files with 145 additions and 57 deletions
+10
View File
@@ -34,6 +34,16 @@ logLevel: info
# - For more info, read: https://pkg.go.dev/time#pkg-constants # - For more info, read: https://pkg.go.dev/time#pkg-constants
logTimeFormat: "" logTimeFormat: ""
# logToStdout: controls what is logged to stdout
# - optional, default: "proxy"
# - valid values:
# - "proxy": logs generated by llama-swap when swapping models,
# handling requests, etc.
# - "upstream": a copy of an upstream processes stdout logs
# - "both": both the proxy and upstream logs interleaved together
# - "none": no logs are ever written to stdout
logToStdout: "proxy"
# metricsMaxInMemory: maximum number of metrics to keep in memory # metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000 # - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded # - controls how many metrics are stored in memory before older ones are discarded
+39
View File
@@ -89,6 +89,9 @@ llama-swap supports many more features to customize how you want to manage your
> This is a copy of `config.example.yaml`. Always check that for the most up to date examples. > This is a copy of `config.example.yaml`. Always check that for the most up to date examples.
```yaml ```yaml
# add this modeline for validation in vscode
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
#
# llama-swap YAML configuration example # llama-swap YAML configuration example
# ------------------------------------- # -------------------------------------
# #
@@ -114,6 +117,24 @@ healthCheckTimeout: 500
# - Valid log levels: debug, info, warn, error # - Valid log levels: debug, info, warn, error
logLevel: info logLevel: info
# logTimeFormat: enables and sets the logging timestamp format
# - optional, default (disabled): ""
# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z",
# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen",
# "stamp", "stampmilli", "stampmicro", and "stampnano".
# - For more info, read: https://pkg.go.dev/time#pkg-constants
logTimeFormat: ""
# logToStdout: controls what is logged to stdout
# - optional, default: "proxy"
# - valid values:
# - "proxy": logs generated by llama-swap when swapping models,
# handling requests, etc.
# - "upstream": a copy of an upstream processes stdout logs
# - "both": both the proxy and upstream logs interleaved together
# - "none": no logs are ever written to stdout
logToStdout: "proxy"
# metricsMaxInMemory: maximum number of metrics to keep in memory # metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000 # - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded # - controls how many metrics are stored in memory before older ones are discarded
@@ -126,6 +147,20 @@ metricsMaxInMemory: 1000
# - it is automatically incremented for every model that uses it # - it is automatically incremented for every model that uses it
startPort: 10001 startPort: 10001
# sendLoadingState: inject loading status updates into the reasoning (thinking)
# field
# - optional, default: false
# - when true, a stream of loading messages will be sent to the client in the
# reasoning field so chat UIs can show that loading is in progress.
# - see #366 for more details
sendLoadingState: true
# includeAliasesInList: present aliases within the /v1/models OpenAI API listing
# - optional, default: false
# - when true, model aliases will be output to the API model listing duplicating
# all fields except for Id so chat UIs can use the alias equivalent to the original.
includeAliasesInList: false
# macros: a dictionary of string substitutions # macros: a dictionary of string substitutions
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - macros are reusable snippets # - macros are reusable snippets
@@ -274,6 +309,10 @@ models:
# - recommended to be omitted and the default used # - recommended to be omitted and the default used
concurrencyLimit: 0 concurrencyLimit: 0
# sendLoadingState: overrides the global sendLoadingState setting for this model
# - optional, default: undefined (use global setting)
sendLoadingState: false
# Unlisted model example: # Unlisted model example:
"qwen-unlisted": "qwen-unlisted":
# unlisted: boolean, true or false # unlisted: boolean, true or false
+14
View File
@@ -15,6 +15,12 @@ import (
) )
const DEFAULT_GROUP_ID = "(default)" const DEFAULT_GROUP_ID = "(default)"
const (
LogToStdoutProxy = "proxy"
LogToStdoutUpstream = "upstream"
LogToStdoutBoth = "both"
LogToStdoutNone = "none"
)
type MacroEntry struct { type MacroEntry struct {
Name string Name string
@@ -114,6 +120,7 @@ type Config struct {
LogRequests bool `yaml:"logRequests"` LogRequests bool `yaml:"logRequests"`
LogLevel string `yaml:"logLevel"` LogLevel string `yaml:"logLevel"`
LogTimeFormat string `yaml:"logTimeFormat"` LogTimeFormat string `yaml:"logTimeFormat"`
LogToStdout string `yaml:"logToStdout"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"` Profiles map[string][]string `yaml:"profiles"`
@@ -177,6 +184,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
StartPort: 5800, StartPort: 5800,
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
} }
err = yaml.Unmarshal(data, &config) err = yaml.Unmarshal(data, &config)
@@ -193,6 +201,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
return Config{}, fmt.Errorf("startPort must be greater than 1") return Config{}, fmt.Errorf("startPort must be greater than 1")
} }
switch config.LogToStdout {
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
default:
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
}
// Populate the aliases map // Populate the aliases map
config.aliases = make(map[string]string) config.aliases = make(map[string]string)
for modelName, modelConfig := range config.Models { for modelName, modelConfig := range config.Models {
+1
View File
@@ -166,6 +166,7 @@ groups:
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
StartPort: 5800, StartPort: 5800,
Macros: MacroList{ Macros: MacroList{
{"svr-path", "path/to/server"}, {"svr-path", "path/to/server"},
+1
View File
@@ -158,6 +158,7 @@ groups:
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy,
StartPort: 5800, StartPort: 5800,
Macros: MacroList{ Macros: MacroList{
{"svr-path", "path/to/server"}, {"svr-path", "path/to/server"},
+72 -52
View File
@@ -52,17 +52,37 @@ type ProxyManager struct {
version string version string
} }
func New(config config.Config) *ProxyManager { func New(proxyConfig config.Config) *ProxyManager {
// set up loggers // set up loggers
stdoutLogger := NewLogMonitorWriter(os.Stdout)
upstreamLogger := NewLogMonitorWriter(stdoutLogger)
proxyLogger := NewLogMonitorWriter(stdoutLogger)
if config.LogRequests { var muxLogger, upstreamLogger, proxyLogger *LogMonitor
switch proxyConfig.LogToStdout {
case config.LogToStdoutNone:
muxLogger = NewLogMonitorWriter(io.Discard)
upstreamLogger = NewLogMonitorWriter(io.Discard)
proxyLogger = NewLogMonitorWriter(io.Discard)
case config.LogToStdoutBoth:
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(muxLogger)
proxyLogger = NewLogMonitorWriter(muxLogger)
case config.LogToStdoutUpstream:
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(muxLogger)
proxyLogger = NewLogMonitorWriter(io.Discard)
default:
// same as config.LogToStdoutProxy
// helpful because some old tests create a config.Config directly and it
// may not have LogToStdout set explicitly
muxLogger = NewLogMonitorWriter(os.Stdout)
upstreamLogger = NewLogMonitorWriter(io.Discard)
proxyLogger = NewLogMonitorWriter(muxLogger)
}
if proxyConfig.LogRequests {
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.") proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
} }
switch strings.ToLower(strings.TrimSpace(config.LogLevel)) { switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
case "debug": case "debug":
proxyLogger.SetLogLevel(LevelDebug) proxyLogger.SetLogLevel(LevelDebug)
upstreamLogger.SetLogLevel(LevelDebug) upstreamLogger.SetLogLevel(LevelDebug)
@@ -99,7 +119,7 @@ func New(config config.Config) *ProxyManager {
"stampnano": time.StampNano, "stampnano": time.StampNano,
} }
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok { if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok {
proxyLogger.SetLogTimeFormat(timeFormat) proxyLogger.SetLogTimeFormat(timeFormat)
upstreamLogger.SetLogTimeFormat(timeFormat) upstreamLogger.SetLogTimeFormat(timeFormat)
} }
@@ -107,18 +127,18 @@ func New(config config.Config) *ProxyManager {
shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
var maxMetrics int var maxMetrics int
if config.MetricsMaxInMemory <= 0 { if proxyConfig.MetricsMaxInMemory <= 0 {
maxMetrics = 1000 // Default fallback maxMetrics = 1000 // Default fallback
} else { } else {
maxMetrics = config.MetricsMaxInMemory maxMetrics = proxyConfig.MetricsMaxInMemory
} }
pm := &ProxyManager{ pm := &ProxyManager{
config: config, config: proxyConfig,
ginEngine: gin.New(), ginEngine: gin.New(),
proxyLogger: proxyLogger, proxyLogger: proxyLogger,
muxLogger: stdoutLogger, muxLogger: muxLogger,
upstreamLogger: upstreamLogger, upstreamLogger: upstreamLogger,
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics), metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
@@ -134,19 +154,19 @@ func New(config config.Config) *ProxyManager {
} }
// create the process groups // create the process groups
for groupID := range config.Groups { for groupID := range proxyConfig.Groups {
processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger) processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger)
pm.processGroups[groupID] = processGroup pm.processGroups[groupID] = processGroup
} }
pm.setupGinEngine() pm.setupGinEngine()
// run any startup hooks // run any startup hooks
if len(config.Hooks.OnStartup.Preload) > 0 { if len(proxyConfig.Hooks.OnStartup.Preload) > 0 {
// do it in the background, don't block startup -- not sure if good idea yet // do it in the background, don't block startup -- not sure if good idea yet
go func() { go func() {
discardWriter := &DiscardWriter{} discardWriter := &DiscardWriter{}
for _, realModelName := range config.Hooks.OnStartup.Preload { for _, realModelName := range proxyConfig.Hooks.OnStartup.Preload {
proxyLogger.Infof("Preloading model: %s", realModelName) proxyLogger.Infof("Preloading model: %s", realModelName)
processGroup, _, err := pm.swapProcessGroup(realModelName) processGroup, _, err := pm.swapProcessGroup(realModelName)
@@ -266,7 +286,7 @@ func (pm *ProxyManager) setupGinEngine() {
// in proxymanager_loghandlers.go // in proxymanager_loghandlers.go
pm.ginEngine.GET("/logs", pm.sendLogsHandlers) pm.ginEngine.GET("/logs", pm.sendLogsHandlers)
pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler) pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
pm.ginEngine.GET("/logs/stream/:logMonitorID", pm.streamLogsHandler) pm.ginEngine.GET("/logs/stream/*logMonitorID", pm.streamLogsHandler)
/** /**
* User Interface Endpoints * User Interface Endpoints
@@ -466,61 +486,61 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
}) })
} }
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) { // findModelInPath searches for a valid model name in a path with slashes.
upstreamPath := c.Param("upstreamPath") // It iteratively builds up path segments until it finds a matching model.
// Returns: (searchModelName, realModelName, remainingPath, found)
// split the upstream path by / and search for the model name // Example: "/author/model/endpoint" with model "author/model" -> ("author/model", "author/model", "/endpoint", true)
parts := strings.Split(strings.TrimSpace(upstreamPath), "/") func (pm *ProxyManager) findModelInPath(path string) (searchName string, realName string, remainingPath string, found bool) {
if len(parts) == 0 { parts := strings.Split(strings.TrimSpace(path), "/")
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
return
}
modelFound := false
searchModelName := "" searchModelName := ""
var modelName, remainingPath string
for i, part := range parts { for i, part := range parts {
if parts[i] == "" { if part == "" {
continue continue
} }
if searchModelName == "" { if searchModelName == "" {
searchModelName = part searchModelName = part
} else { } else {
searchModelName = searchModelName + "/" + parts[i] searchModelName = searchModelName + "/" + part
} }
if real, ok := pm.config.RealModelName(searchModelName); ok { if real, ok := pm.config.RealModelName(searchModelName); ok {
modelName = real return searchModelName, real, "/" + strings.Join(parts[i+1:], "/"), true
remainingPath = "/" + strings.Join(parts[i+1:], "/")
modelFound = true
// Check if this is exactly a model name with no additional path
// and doesn't end with a trailing slash
if remainingPath == "/" && !strings.HasSuffix(upstreamPath, "/") {
// Build new URL with query parameters preserved
newPath := "/upstream/" + searchModelName + "/"
if c.Request.URL.RawQuery != "" {
newPath += "?" + c.Request.URL.RawQuery
}
// Use 308 for non-GET/HEAD requests to preserve method
if c.Request.Method == http.MethodGet || c.Request.Method == http.MethodHead {
c.Redirect(http.StatusMovedPermanently, newPath)
} else {
c.Redirect(http.StatusPermanentRedirect, newPath)
}
return
}
break
} }
} }
return "", "", "", false
}
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
upstreamPath := c.Param("upstreamPath")
searchModelName, modelName, remainingPath, modelFound := pm.findModelInPath(upstreamPath)
if !modelFound { if !modelFound {
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path") pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
return return
} }
// Check if this is exactly a model name with no additional path
// and doesn't end with a trailing slash
if remainingPath == "/" && !strings.HasSuffix(upstreamPath, "/") {
// Build new URL with query parameters preserved
newPath := "/upstream/" + searchModelName + "/"
if c.Request.URL.RawQuery != "" {
newPath += "?" + c.Request.URL.RawQuery
}
// Use 308 for non-GET/HEAD requests to preserve method
if c.Request.Method == http.MethodGet || c.Request.Method == http.MethodHead {
c.Redirect(http.StatusMovedPermanently, newPath)
} else {
c.Redirect(http.StatusPermanentRedirect, newPath)
}
return
}
processGroup, realModelName, err := pm.swapProcessGroup(modelName) processGroup, realModelName, err := pm.swapProcessGroup(modelName)
if err != nil { if err != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error())) pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
+5 -4
View File
@@ -31,7 +31,7 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
// prevent nginx from buffering streamed logs // prevent nginx from buffering streamed logs
c.Header("X-Accel-Buffering", "no") c.Header("X-Accel-Buffering", "no")
logMonitorId := c.Param("logMonitorID") logMonitorId := strings.TrimPrefix(c.Param("logMonitorID"), "/")
logger, err := pm.getLogger(logMonitorId) logger, err := pm.getLogger(logMonitorId)
if err != nil { if err != nil {
c.String(http.StatusBadRequest, err.Error()) c.String(http.StatusBadRequest, err.Error())
@@ -92,8 +92,9 @@ func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
case "upstream": case "upstream":
return pm.upstreamLogger, nil return pm.upstreamLogger, nil
default: default:
// search for a models specific logger // search for a models specific logger using findModelInPath
if name, found := pm.config.RealModelName(logMonitorId); found { // to handle model names with slashes (e.g., "author/model")
if _, name, _, found := pm.findModelInPath("/" + logMonitorId); found {
for _, group := range pm.processGroups { for _, group := range pm.processGroups {
if process, found := group.GetMember(name); found { if process, found := group.GetMember(name); found {
return process.Logger(), nil return process.Logger(), nil
@@ -101,6 +102,6 @@ func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
} }
} }
return nil, fmt.Errorf("invalid logger. Use 'proxy' or 'upstream'") return nil, fmt.Errorf("invalid logger. Use 'proxy', 'upstream' or a model's ID")
} }
} }
+3 -1
View File
@@ -1078,7 +1078,8 @@ func TestProxyManager_StreamingEndpointsReturnNoBufferingHeader(t *testing.T) {
config := config.AddDefaultGroupToConfig(config.Config{ config := config.AddDefaultGroupToConfig(config.Config{
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
Models: map[string]config.ModelConfig{ Models: map[string]config.ModelConfig{
"model1": getTestSimpleResponderConfig("model1"), "model1": getTestSimpleResponderConfig("model1"),
"author/model": getTestSimpleResponderConfig("author/model"),
}, },
LogLevel: "error", LogLevel: "error",
}) })
@@ -1091,6 +1092,7 @@ func TestProxyManager_StreamingEndpointsReturnNoBufferingHeader(t *testing.T) {
"/logs/stream", "/logs/stream",
"/logs/stream/proxy", "/logs/stream/proxy",
"/logs/stream/upstream", "/logs/stream/upstream",
"/logs/stream/author/model",
} }
for _, endpoint := range endpoints { for _, endpoint := range endpoints {