diff --git a/README.md b/README.md index 348c222..4711563 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ majordomo failover chain / alias) is used verbatim. | **Ollama Cloud** (default) | `ollama-cloud` | `OLLAMA_API_KEY` / `OLLAMA_CLOUD_API_KEY` | ✅ in active use | | **Local Ollama** | `ollama` | none (`OLLAMA_HOST` or `GADFLY_BASE_URL` for a remote daemon) | ✅ tested | | **[foreman](https://gitea.stevedudenhoeffer.com/steve/foreman)** (native-Ollama queue daemon) | `foreman` + `GADFLY_BASE_URL`, or a `GADFLY_ENDPOINT_*` / `LLM_*` `foreman://` entry | optional bearer (via the endpoint/DSN) | ✅ native-Ollama path | -| **llamaswap** (llama-swap model router) | `llamaswap` + `GADFLY_BASE_URL`, or a `GADFLY_ENDPOINT_*` entry | optional bearer | ⚠️ wired, **untested** | +| **[llama-swap](https://github.com/mostlygeek/llama-swap)** (model-swapping proxy) | `llama-swap`/`llama-swaps` (un-hyphenated `llamaswap`/`llamaswaps` also accepted) + `GADFLY_BASE_URL` or a `GADFLY_ENDPOINT_*` entry, or an `LLM_*` `llama-swap://` / `llama-swaps://` DSN | optional bearer | ⚠️ wired, **untested** | | **OpenAI-compatible** (incl. local Ollama's `/v1`) | `openai` + `GADFLY_BASE_URL` | `OPENAI_API_KEY` (any non-empty for Ollama) | ✅ tested against Ollama | | **OpenAI** | `openai` | `OPENAI_API_KEY` | ⚠️ wired, **untested** | | **Anthropic** | `anthropic` | `ANTHROPIC_API_KEY` | ⚠️ wired, **untested** | diff --git a/cmd/gadfly/model.go b/cmd/gadfly/model.go index 05ba4d8..5376271 100644 --- a/cmd/gadfly/model.go +++ b/cmd/gadfly/model.go @@ -80,7 +80,11 @@ func resolveModel() (llm.Model, error) { opts = append(opts, ollama.WithToken(apiKey)) } return ollama.New(opts...).Model(model) - case "llamaswap": + case "llamaswap", "llamaswaps", "llama-swap", "llama-swaps": + // llama-swap (model-swapping proxy). Accept every spelling: hyphenated + // ("llama-swap"/"llama-swaps") mirrors majordomo's DSN schemes (http vs + // https), and the un-hyphenated forms are accepted too. With an explicit + // GADFLY_BASE_URL the scheme is whatever the URL says, so all behave the same. opts := []llamaswap.Option{llamaswap.WithBaseURL(baseURL)} if apiKey != "" { opts = append(opts, llamaswap.WithToken(apiKey)) @@ -104,7 +108,7 @@ func resolveModel() (llm.Model, error) { } return google.New(opts...).Model(model) default: - return nil, fmt.Errorf("GADFLY_BASE_URL is set but GADFLY_PROVIDER %q has no endpoint-override support (use openai/openai-compatible/ollama/llamaswap/foreman/anthropic/google, or unset GADFLY_BASE_URL to resolve via majordomo)", provider) + return nil, fmt.Errorf("GADFLY_BASE_URL is set but GADFLY_PROVIDER %q has no endpoint-override support (use openai/openai-compatible/ollama/llama-swap/foreman/anthropic/google, or unset GADFLY_BASE_URL to resolve via majordomo)", provider) } } @@ -163,7 +167,7 @@ func modelProvider() string { // plaintext local Ollama (or foreman queue) works: // GADFLY_ENDPOINT_BIGBOX="ollama|http://192.168.1.50:11434" // GADFLY_MODEL=bigbox/qwen2.5-coder:7b -// provider is one of ollama/llamaswap/foreman/openai/anthropic/google; "foreman" +// provider is one of ollama/llama-swap(s)/foreman/openai/anthropic/google; "foreman" // targets a foreman daemon (native Ollama on the wire): // GADFLY_ENDPOINT_M1="foreman|http://foreman-m1:8080|tok" // @@ -222,7 +226,7 @@ func endpointProvider(name, raw string) (llm.Provider, error) { opts = append(opts, ollama.WithToken(key)) } return ollama.New(opts...), nil - case "llamaswap": + case "llamaswap", "llamaswaps", "llama-swap", "llama-swaps": opts := []llamaswap.Option{llamaswap.WithName(name), llamaswap.WithBaseURL(baseURL)} if key != "" { opts = append(opts, llamaswap.WithToken(key)) @@ -252,6 +256,6 @@ func endpointProvider(name, raw string) (llm.Provider, error) { } return google.New(opts...), nil default: - return nil, fmt.Errorf("unknown provider %q (use ollama/llamaswap/foreman/openai/openai-compatible/anthropic/google)", provider) + return nil, fmt.Errorf("unknown provider %q (use ollama/llama-swap(s)/foreman/openai/openai-compatible/anthropic/google)", provider) } } diff --git a/cmd/gadfly/model_test.go b/cmd/gadfly/model_test.go index 6b36a5c..d77c68f 100644 --- a/cmd/gadfly/model_test.go +++ b/cmd/gadfly/model_test.go @@ -46,6 +46,19 @@ func TestEndpointProvider(t *testing.T) { t.Fatalf("unexpected error: %v", err) } }) + // All llama-swap spellings (hyphenated/TLS variants mirror majordomo's DSN + // schemes) must resolve to the llamaswap provider. + for _, name := range []string{"llama-swap", "llama-swaps", "llamaswaps"} { + t.Run(name+" alias", func(t *testing.T) { + p, err := endpointProvider("ls", name+"|https://swap.lan:8080|tok") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if p.Name() != "ls" { + t.Errorf("Name() = %q, want %q", p.Name(), "ls") + } + }) + } for _, bad := range []string{"", "ollama", "noprovider-no-pipe", "mystery|http://x"} { t.Run("rejects "+bad, func(t *testing.T) { if _, err := endpointProvider("n", bad); err == nil { diff --git a/go.mod b/go.mod index 21a33fc..5868b92 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gitea.stevedudenhoeffer.com/steve/gadfly go 1.26.2 require ( - gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627201401-b2487a1a370c + gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627225659-aa25b2c33462 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 5d1be4c..b41b816 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA= cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q= cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= -gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627201401-b2487a1a370c h1:OZlUMUa4Z89ynOWuNzhOioTHSPcGvV3qBi9F2MTGWAE= -gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627201401-b2487a1a370c/go.mod h1:UZLveG17SmENt4sne2RSLIbioix30RZbRIQUzBAnOyY= +gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627225659-aa25b2c33462 h1:1crjE1YkWHLZ91tUDOxN/Y5cuOnJ56e0U9UADoFfEPY= +gitea.stevedudenhoeffer.com/steve/majordomo v0.0.0-20260627225659-aa25b2c33462/go.mod h1:UZLveG17SmENt4sne2RSLIbioix30RZbRIQUzBAnOyY= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=