Model capabilities 734 (#842)

internal/config,server: implement model capabilities

- define the capabilities of a model using a simple config block on the
model
- v1/models renders out capabilities to be compatible with openrouter,
huggingface chat, and mistral formats for broader compatibility
- add support for capabilities in UI

Fixes #734
This commit is contained in:
Benson Wong
2026-06-13 23:23:19 -07:00
committed by GitHub
parent 62aea0e83d
commit 92b90447e8
16 changed files with 868 additions and 35 deletions
+4
View File
@@ -447,6 +447,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
}
}
if err = modelConfig.Capabilities.Validate(); err != nil {
return Config{}, fmt.Errorf("model %s: %w", modelId, err)
}
// Validate SetParamsByID keys and values
for key, paramMap := range modelConfig.Filters.SetParamsByID {
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
+45
View File
@@ -2,6 +2,7 @@ package config
import (
"errors"
"fmt"
"runtime"
)
@@ -9,6 +10,47 @@ const (
MODEL_CONFIG_DEFAULT_TTL = -1
)
var validModalities = map[string]struct{}{
"text": {},
"audio": {},
"image": {},
}
// ModelCapConfig defines what modalities and features a model supports.
// Used in /v1/models to inform clients. An empty block (all zero values) is
// treated as not configured.
type ModelCapConfig struct {
In []string `yaml:"in"`
Out []string `yaml:"out"`
Tools bool `yaml:"tools"`
Reranker bool `yaml:"reranker"`
Context int `yaml:"context"`
}
// Empty returns true when all fields are at their zero values.
func (c ModelCapConfig) Empty() bool {
return len(c.In) == 0 && len(c.Out) == 0 && !c.Tools && !c.Reranker && c.Context == 0
}
// Validate checks that all modality values are recognized and context is
// non-negative. Returns an error if any value is invalid.
func (c ModelCapConfig) Validate() error {
for _, m := range c.In {
if _, ok := validModalities[m]; !ok {
return fmt.Errorf("capabilities.in: invalid modality %q, must be one of: text, audio, image", m)
}
}
for _, m := range c.Out {
if _, ok := validModalities[m]; !ok {
return fmt.Errorf("capabilities.out: invalid modality %q, must be one of: text, audio, image", m)
}
}
if c.Context < 0 {
return errors.New("capabilities.context: must be >= 0")
}
return nil
}
// TimeoutsConfig holds timeout settings for proxy connections
// 0 = no timeout
type TimeoutsConfig struct {
@@ -55,6 +97,9 @@ type ModelConfig struct {
// Timeout settings for proxy connections
Timeouts TimeoutsConfig `yaml:"timeouts"`
// Capabilities defines what modalities and features the model supports.
Capabilities ModelCapConfig `yaml:"capabilities"`
// Copy of HealthCheckTimeout from global config
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
}
+165 -1
View File
@@ -152,7 +152,7 @@ models:
stop:
- "<|end|>"
- "<|stop|>"
`
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
@@ -170,3 +170,167 @@ models:
assert.Equal(t, 0.7, setParams["temperature"])
assert.Equal(t, 0.9, setParams["top_p"])
}
func TestConfig_ModelCapabilities(t *testing.T) {
t.Run("all fields", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
in:
- text
- audio
- image
out:
- text
- audio
- image
tools: true
context: 32000
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.False(t, mc.Capabilities.Empty())
assert.Equal(t, []string{"text", "audio", "image"}, mc.Capabilities.In)
assert.Equal(t, []string{"text", "audio", "image"}, mc.Capabilities.Out)
assert.True(t, mc.Capabilities.Tools)
assert.Equal(t, 32000, mc.Capabilities.Context)
})
t.Run("partial fields", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
tools: true
context: 8192
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.False(t, mc.Capabilities.Empty())
assert.Nil(t, mc.Capabilities.In)
assert.Nil(t, mc.Capabilities.Out)
assert.True(t, mc.Capabilities.Tools)
assert.Equal(t, 8192, mc.Capabilities.Context)
})
t.Run("not set", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.True(t, mc.Capabilities.Empty())
})
t.Run("tools false is empty", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
tools: false
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.True(t, mc.Capabilities.Empty())
})
t.Run("reranker true is not empty", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
reranker: true
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.False(t, mc.Capabilities.Empty())
assert.True(t, mc.Capabilities.Reranker)
})
t.Run("reranker false is empty", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
reranker: false
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
mc := config.Models["model1"]
assert.True(t, mc.Capabilities.Empty())
})
}
func TestConfig_ModelCapabilities_Validate(t *testing.T) {
t.Run("valid_modalities", func(t *testing.T) {
caps := ModelCapConfig{
In: []string{"text", "image"},
Out: []string{"text", "audio"},
Tools: true,
Context: 100000,
}
assert.NoError(t, caps.Validate())
})
t.Run("empty_is_valid", func(t *testing.T) {
caps := ModelCapConfig{}
assert.NoError(t, caps.Validate())
})
t.Run("invalid_in_modality", func(t *testing.T) {
caps := ModelCapConfig{In: []string{"video"}}
err := caps.Validate()
assert.Error(t, err)
assert.Contains(t, err.Error(), "capabilities.in")
assert.Contains(t, err.Error(), "video")
})
t.Run("invalid_out_modality", func(t *testing.T) {
caps := ModelCapConfig{Out: []string{"video"}}
err := caps.Validate()
assert.Error(t, err)
assert.Contains(t, err.Error(), "capabilities.out")
assert.Contains(t, err.Error(), "video")
})
t.Run("negative_context", func(t *testing.T) {
caps := ModelCapConfig{Context: -1}
err := caps.Validate()
assert.Error(t, err)
assert.Contains(t, err.Error(), "capabilities.context")
})
t.Run("rejects_invalid_at_load", func(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
capabilities:
in:
- text
- video
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "video")
})
}
+120 -11
View File
@@ -17,13 +17,118 @@ const apiUnloadTimeout = 10 * time.Second
// modelRecord is one entry in the OpenAI-compatible /v1/models listing.
type modelRecord struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Meta map[string]any `json:"meta,omitempty"`
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Architecture map[string]any `json:"architecture,omitempty"`
Capabilities map[string]any `json:"capabilities,omitempty"`
SupportedParameters []string `json:"supported_parameters,omitempty"`
ContextLength int `json:"context_length,omitempty"`
Meta map[string]any `json:"meta,omitempty"`
}
// cappedMetadataKeys are top-level /v1/models fields produced by the
// capabilities renderer. If a model's metadata block defines any of these
// keys, the renderer's values win and the metadata keys are dropped.
var cappedMetadataKeys = map[string]struct{}{
"architecture": {},
"capabilities": {},
"supported_parameters": {},
"context_length": {},
}
// renderCapabilities converts a model's capabilities config into additional
// /v1/models fields. Returns zero values when caps.Empty() is true.
func renderCapabilities(caps config.ModelCapConfig) (arch map[string]any, capsMap map[string]any, params []string, ctxLen int) {
if caps.Empty() {
return
}
hasIn := len(caps.In) > 0
hasOut := len(caps.Out) > 0
if hasIn || hasOut {
arch = make(map[string]any)
}
if hasIn {
arch["input_modalities"] = caps.In
}
if hasOut {
arch["output_modalities"] = caps.Out
}
if hasIn && hasOut {
arch["modality"] = strings.Join(caps.In, "+") + "->" + strings.Join(caps.Out, "+")
}
// Build capabilities map only if there's something to put in it.
if hasIn || hasOut || caps.Tools || caps.Reranker {
capsMap = make(map[string]any)
}
if hasIn {
if contains(caps.In, "image") {
capsMap["vision"] = true
}
}
if hasIn && hasOut {
if contains(caps.In, "audio") && contains(caps.Out, "text") {
capsMap["audio_transcriptions"] = true
}
if contains(caps.In, "text") && contains(caps.Out, "audio") {
capsMap["audio_speech"] = true
}
if contains(caps.In, "text") && contains(caps.Out, "image") {
capsMap["image_generation"] = true
}
if contains(caps.In, "image") && contains(caps.Out, "image") {
capsMap["image_to_image"] = true
}
}
if caps.Tools {
capsMap["function_calling"] = true
params = []string{"tools", "tool_choice"}
}
if caps.Reranker {
capsMap["reranker"] = true
}
if caps.Context > 0 {
ctxLen = caps.Context
}
return
}
// contains reports whether s is present in ss.
func contains(ss []string, s string) bool {
for _, v := range ss {
if v == s {
return true
}
}
return false
}
// filterCappedMetadata returns metadata with renderer-owned keys removed.
func filterCappedMetadata(md map[string]any) map[string]any {
if len(md) == 0 {
return nil
}
filtered := make(map[string]any, len(md))
for k, v := range md {
if _, capped := cappedMetadataKeys[k]; !capped {
filtered[k] = v
}
}
if len(filtered) == 0 {
return nil
}
return filtered
}
// handleListModels serves the OpenAI-compatible model listing: local models
@@ -32,7 +137,7 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
created := time.Now().Unix()
data := make([]modelRecord, 0, len(s.cfg.Models))
newRecord := func(id, name, description string, metadata map[string]any) modelRecord {
newRecord := func(id, name, description string, metadata map[string]any, caps config.ModelCapConfig) modelRecord {
rec := modelRecord{
ID: id,
Object: "model",
@@ -41,6 +146,10 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
Name: strings.TrimSpace(name),
Description: strings.TrimSpace(description),
}
rec.Architecture, rec.Capabilities, rec.SupportedParameters, rec.ContextLength = renderCapabilities(caps)
if !caps.Empty() {
metadata = filterCappedMetadata(metadata)
}
if len(metadata) > 0 {
rec.Meta = map[string]any{"llamaswap": metadata}
}
@@ -51,12 +160,12 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
if mc.Unlisted {
continue
}
data = append(data, newRecord(id, mc.Name, mc.Description, mc.Metadata))
data = append(data, newRecord(id, mc.Name, mc.Description, mc.Metadata, mc.Capabilities))
if s.cfg.IncludeAliasesInList {
for _, alias := range mc.Aliases {
if alias := strings.TrimSpace(alias); alias != "" {
data = append(data, newRecord(alias, mc.Name, mc.Description, mc.Metadata))
data = append(data, newRecord(alias, mc.Name, mc.Description, mc.Metadata, mc.Capabilities))
}
}
}
@@ -64,7 +173,7 @@ func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
for peerID, peer := range s.cfg.Peers {
for _, modelID := range peer.Models {
data = append(data, newRecord(modelID, peerID+": "+modelID, "", map[string]any{"peerID": peerID}))
data = append(data, newRecord(modelID, peerID+": "+modelID, "", map[string]any{"peerID": peerID}, config.ModelCapConfig{}))
}
}
+259
View File
@@ -157,3 +157,262 @@ func TestServer_Redirects(t *testing.T) {
}
}
}
func TestServer_HandleListModels_Capabilities(t *testing.T) {
newServer := func(mc config.ModelConfig) *Server {
s := newTestServer(newStubRouter(nil, ""), newStubRouter(nil, ""))
s.cfg = config.Config{Models: map[string]config.ModelConfig{"m": mc}}
return s
}
getModel := func(t *testing.T, s *Server) modelRecord {
t.Helper()
w := httptest.NewRecorder()
s.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/v1/models", nil))
var resp struct {
Data []modelRecord `json:"data"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("decode: %v", err)
}
if len(resp.Data) != 1 {
t.Fatalf("expected 1 model, got %d", len(resp.Data))
}
return resp.Data[0]
}
t.Run("all_fields", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{
In: []string{"text", "image"},
Out: []string{"text", "audio"},
Tools: true,
Context: 100000,
},
}))
if m.Architecture == nil {
t.Fatal("architecture is nil")
}
if !anySliceStrEqual(m.Architecture["input_modalities"], []string{"text", "image"}) {
t.Errorf("input_modalities = %v", m.Architecture["input_modalities"])
}
if !anySliceStrEqual(m.Architecture["output_modalities"], []string{"text", "audio"}) {
t.Errorf("output_modalities = %v", m.Architecture["output_modalities"])
}
if m.Architecture["modality"] != "text+image->text+audio" {
t.Errorf("modality = %v", m.Architecture["modality"])
}
if m.Capabilities == nil || m.Capabilities["vision"] != true {
t.Errorf("vision = %v", m.Capabilities)
}
if m.Capabilities["audio_speech"] != true {
t.Errorf("audio_speech = %v", m.Capabilities["audio_speech"])
}
if m.Capabilities["function_calling"] != true {
t.Errorf("function_calling = %v", m.Capabilities["function_calling"])
}
if !stringSliceEqual(m.SupportedParameters, []string{"tools", "tool_choice"}) {
t.Errorf("supported_parameters = %v", m.SupportedParameters)
}
if m.ContextLength != 100000 {
t.Errorf("context_length = %d", m.ContextLength)
}
})
t.Run("in_only", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{In: []string{"text", "image"}},
}))
if m.Architecture == nil {
t.Fatal("architecture is nil")
}
if _, ok := m.Architecture["output_modalities"]; ok {
t.Error("should not have output_modalities")
}
if _, ok := m.Architecture["modality"]; ok {
t.Error("should not have modality")
}
if m.Capabilities == nil || m.Capabilities["vision"] != true {
t.Error("expected vision: true")
}
if m.SupportedParameters != nil {
t.Error("should not have supported_parameters")
}
if m.ContextLength != 0 {
t.Error("should not have context_length")
}
})
t.Run("out_only", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{Out: []string{"audio"}},
}))
if m.Architecture == nil {
t.Fatal("architecture is nil")
}
if _, ok := m.Architecture["input_modalities"]; ok {
t.Error("should not have input_modalities")
}
if len(m.Capabilities) > 0 {
t.Errorf("expected no capabilities, got %v", m.Capabilities)
}
})
t.Run("tools", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{Tools: true},
}))
if m.Capabilities == nil || m.Capabilities["function_calling"] != true {
t.Error("expected function_calling: true")
}
if !stringSliceEqual(m.SupportedParameters, []string{"tools", "tool_choice"}) {
t.Errorf("supported_parameters = %v", m.SupportedParameters)
}
if m.Architecture != nil {
t.Error("should not have architecture")
}
})
t.Run("reranker", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{Reranker: true},
}))
if m.Capabilities == nil || m.Capabilities["reranker"] != true {
t.Error("expected reranker: true")
}
if m.Architecture != nil {
t.Error("should not have architecture")
}
})
t.Run("context", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{Context: 32768},
}))
if m.ContextLength != 32768 {
t.Errorf("context_length = %d", m.ContextLength)
}
if m.Architecture != nil {
t.Error("should not have architecture")
}
})
t.Run("audio_transcriptions", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{In: []string{"audio"}, Out: []string{"text"}},
}))
if m.Capabilities == nil || m.Capabilities["audio_transcriptions"] != true {
t.Error("expected audio_transcriptions: true")
}
})
t.Run("image_generation", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{In: []string{"text"}, Out: []string{"image"}},
}))
if m.Capabilities == nil || m.Capabilities["image_generation"] != true {
t.Error("expected image_generation: true")
}
})
t.Run("image_to_image", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{In: []string{"image"}, Out: []string{"image"}},
}))
if m.Capabilities == nil || m.Capabilities["image_to_image"] != true {
t.Error("expected image_to_image: true")
}
})
t.Run("empty_skip", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{}))
if m.Architecture != nil {
t.Error("should not have architecture")
}
if m.Capabilities != nil {
t.Error("should not have capabilities")
}
if m.SupportedParameters != nil {
t.Error("should not have supported_parameters")
}
if m.ContextLength != 0 {
t.Error("should not have context_length")
}
})
t.Run("metadata_precedence", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Capabilities: config.ModelCapConfig{In: []string{"text"}},
Metadata: map[string]any{
"architecture": "should-be-dropped",
"custom_field": "should-remain",
"capabilities": "also-dropped",
"other_metadata": "also-remain",
},
}))
if m.Architecture == nil || m.Architecture["input_modalities"] == nil {
t.Fatal("architecture should be rendered, not from metadata")
}
if m.Meta == nil || m.Meta["llamaswap"] == nil {
t.Fatal("meta.llamaswap should exist")
}
meta := m.Meta["llamaswap"].(map[string]any)
if _, ok := meta["architecture"]; ok {
t.Error("architecture should be filtered from metadata")
}
if _, ok := meta["custom_field"]; !ok {
t.Error("custom_field should remain in metadata")
}
})
t.Run("metadata_passthrough_no_caps", func(t *testing.T) {
m := getModel(t, newServer(config.ModelConfig{
Metadata: map[string]any{
"architecture": "preserved",
"context_length": 4096,
"capabilities": "preserved",
"custom_field": "preserved",
},
}))
if m.Architecture != nil {
t.Error("should not have architecture when caps is empty")
}
if m.Meta == nil || m.Meta["llamaswap"] == nil {
t.Fatal("meta.llamaswap should exist")
}
meta := m.Meta["llamaswap"].(map[string]any)
if _, ok := meta["architecture"]; !ok {
t.Error("architecture should be preserved in metadata when caps is empty")
}
if _, ok := meta["context_length"]; !ok {
t.Error("context_length should be preserved in metadata when caps is empty")
}
})
}
func stringSliceEqual(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func anySliceStrEqual(v any, want []string) bool {
arr, ok := v.([]any)
if !ok {
return false
}
if len(arr) != len(want) {
return false
}
for i := range arr {
if s, ok := arr[i].(string); !ok || s != want[i] {
return false
}
}
return true
}
+16 -13
View File
@@ -17,13 +17,14 @@ import (
// apiModel is one entry in the /api/events modelStatus payload.
type apiModel struct {
Id string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
State string `json:"state"`
Unlisted bool `json:"unlisted"`
PeerID string `json:"peerID"`
Aliases []string `json:"aliases,omitempty"`
Id string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
State string `json:"state"`
Unlisted bool `json:"unlisted"`
PeerID string `json:"peerID"`
Aliases []string `json:"aliases,omitempty"`
Capabilities map[string]any `json:"capabilities,omitempty"`
}
// modelStatus returns every configured model joined with its current process
@@ -44,13 +45,15 @@ func (s *Server) modelStatus() []apiModel {
if st, ok := running[id]; ok {
state = string(st)
}
_, capsMap, _, _ := renderCapabilities(mc.Capabilities)
models = append(models, apiModel{
Id: id,
Name: mc.Name,
Description: mc.Description,
State: state,
Unlisted: mc.Unlisted,
Aliases: mc.Aliases,
Id: id,
Name: mc.Name,
Description: mc.Description,
State: state,
Unlisted: mc.Unlisted,
Aliases: mc.Aliases,
Capabilities: capsMap,
})
}