diff --git a/CLAUDE.md b/CLAUDE.md index 5ed4ecf..97733e7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,7 +35,9 @@ majordomo Registry, Parse, env-DSN loading, chain executor, re-exports provider/anthropic/ Messages API client (+ Anthropic-compat targets) provider/ollama/ one native /api/chat client serving the ollama, ollama-cloud, and foreman built-ins via presets - provider/google/ Gemini on the official genai SDK (Phase 4) + provider/google/ Gemini on google.golang.org/genai (the one approved + dependency; lazy client, raw-JSON-schema tools, + ThinkingLevel reasoning, iter.Pull2 streaming) agent/ Agent run loop (Phase 5) skill/ Skill interface + composition (Phase 6) examples/ one runnable example per hard requirement (Phase 7-8) diff --git a/README.md b/README.md index cccdbbf..fa776d8 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ Chains are health-tracked per target: |----------|-----------|-------------|------------------| | OpenAI (+compatible) | `openai` | `OPENAI_API_KEY` | https://api.openai.com/v1 | | Anthropic (+compatible) | `anthropic` | `ANTHROPIC_API_KEY` | https://api.anthropic.com | -| Google (Gemini) | `google` | `GOOGLE_API_KEY` / `GEMINI_API_KEY` | Gen AI API *(pending)* | +| Google (Gemini) | `google` | `GOOGLE_API_KEY` / `GEMINI_API_KEY` | Gemini API (official SDK) | | Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` | https://ollama.com | | Ollama (local) | `ollama` | — | `OLLAMA_HOST` or http://localhost:11434 | | foreman | `foreman` | — (token via DSN) | requires an LLM_* DSN or `ollama.Foreman(url, token)` | @@ -214,7 +214,7 @@ skills = reusable instruction+tool bundles attachable to any agent. |----------------------|:---:|:---:|:---:|:---:|:---:|:---:|:---:| | OpenAI (+compatible) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Anthropic (+compat) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Google (Gemini) | ✅ | pending | pending | pending | pending | pending | ✅ | +| Google (Gemini) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Ollama Cloud | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Ollama (local) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | foreman | ✅ | ✅ | ✅¹ | ✅ | ✅ | ✅ | ✅ | diff --git a/builtin.go b/builtin.go index 0109097..ba6367d 100644 --- a/builtin.go +++ b/builtin.go @@ -1,19 +1,16 @@ package majordomo import ( - "context" - "fmt" "net/http" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/anthropic" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/google" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/ollama" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/openai" ) -// Built-in provider names. Real client implementations land per-phase -// (see progress.md); until a provider's phase ships, its registration is a -// stub that resolves (so specs parse and env DSNs load) but errors on use. +// Built-in provider names. const ( ProviderOpenAI = "openai" ProviderAnthropic = "anthropic" @@ -85,57 +82,21 @@ func registerBuiltins(r *Registry, httpClient *http.Client) { )...), nil } - // Google lands in its own phase; stub until then. - r.providers[ProviderGoogle] = &stubProvider{name: ProviderGoogle, kind: ProviderGoogle} - r.schemes[ProviderGoogle] = stubScheme(ProviderGoogle) - // "gemini" is an alternate scheme for the Google provider. - r.schemes["gemini"] = stubScheme(ProviderGoogle) -} - -func stubScheme(kind string) SchemeFactory { - return func(name string, dsn DSN) (llm.Provider, error) { - return &stubProvider{name: name, kind: kind, baseURL: dsn.BaseURL(), token: dsn.Token}, nil + // Google (Gemini) on the official SDK; "gemini" is an alternate scheme. + googleOpts := func(extra ...google.Option) []google.Option { + if httpClient != nil { + extra = append(extra, google.WithHTTPClient(httpClient)) + } + return extra } -} - -// stubProvider stands in for a provider implementation that lands in a -// later phase. It resolves and carries its connection details (so Parse, -// chains, and env loading are fully functional) but errors on use. -type stubProvider struct { - name string - kind string - baseURL string - token string -} - -func (s *stubProvider) Name() string { return s.name } - -func (s *stubProvider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { - cfg := llm.ApplyModelOptions(opts) - return &stubModel{provider: s, id: id, cfg: cfg}, nil -} - -type stubModel struct { - provider *stubProvider - id string - cfg llm.ModelConfig -} - -func (m *stubModel) err() error { - return fmt.Errorf("majordomo: provider %q (%s) is not implemented yet", m.provider.name, m.provider.kind) -} - -func (m *stubModel) Generate(context.Context, llm.Request, ...llm.Option) (*llm.Response, error) { - return nil, m.err() -} - -func (m *stubModel) Stream(context.Context, llm.Request, ...llm.Option) (llm.Stream, error) { - return nil, m.err() -} - -func (m *stubModel) Capabilities() llm.Capabilities { - if m.cfg.Capabilities != nil { - return *m.cfg.Capabilities + r.providers[ProviderGoogle] = google.New(googleOpts()...) + googleScheme := func(name string, dsn DSN) (llm.Provider, error) { + return google.New(googleOpts( + google.WithName(name), + google.WithBaseURL(dsn.BaseURL()), + google.WithAPIKey(dsn.Token), + )...), nil } - return llm.Capabilities{} + r.schemes[ProviderGoogle] = googleScheme + r.schemes["gemini"] = googleScheme } diff --git a/docs/adr/0011-google-provider.md b/docs/adr/0011-google-provider.md new file mode 100644 index 0000000..20c84ff --- /dev/null +++ b/docs/adr/0011-google-provider.md @@ -0,0 +1,46 @@ +# ADR-0011: Google provider on the official Gen AI SDK + +**Status:** Accepted — 2026-06-10 + +## Context + +ADR-0007 approves exactly one third-party dependency: Google's surface +(auth modes, API versions, endpoint shapes) moves too much to hand-roll +profitably. The deprecated `github.com/google/generative-ai-go` is not an +option; `google.golang.org/genai` is the current official SDK. + +## Decision + +- Build `provider/google` on `google.golang.org/genai` **v1.59.0**, + `BackendGeminiAPI`, API key from `GOOGLE_API_KEY` then `GEMINI_API_KEY` + (the SDK's own precedence). +- The SDK client is created **lazily on first request** and cached; + construction of the Provider never fails (per ADR-0005). A missing key + is a synthetic 401 `APIError`, so chains fail over past it. +- Mappings (recorded in the package doc): assistant role → `model`; + tool results → `FunctionResponse` parts in a user content + (`{"output": ...}` / `{"error": ...}` payloads); tool schemas via + `FunctionDeclaration.ParametersJsonSchema` (raw JSON schema — no lossy + conversion to genai.Schema); structured output via `ResponseJsonSchema` + + `application/json` MIME; `ToolChoice` → `FunctionCallingConfig` modes + (`required` → ANY, named → ANY + AllowedFunctionNames, `none` → tools + omitted); `ReasoningEffort` → `ThinkingConfig.ThinkingLevel` + (LOW/MEDIUM/HIGH); usage output = candidates + thoughts tokens; thought + parts are skipped in content. +- Streaming adapts the SDK's `iter.Seq2` to majordomo's pull-based + `Stream` with `iter.Pull2`; `Close` releases the iterator via its stop + function. Function calls arrive whole per chunk (no partial-args + assembly needed). +- Hermetic tests use the SDK's documented hooks: + `HTTPOptions.BaseURL` + `HTTPClient` pointed at `httptest` servers (the + same technique as the SDK's own test suite); streaming fixtures are SSE. +- Errors: `genai.APIError` (value type; `Code` = HTTP status) maps to + `llm.APIError{Status: Code, Code: Status}` so the standard classifier + applies. + +## Consequences + +- `go.mod` gains genai and its transitive tree (auth, grpc, protobuf) — + the one sanctioned dependency cost. +- Vertex AI backend is NOT wired (API-key Gemini only); adding it later is + an options-level change, not a redesign. diff --git a/docs/adr/README.md b/docs/adr/README.md index d7bf294..1320a97 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -14,3 +14,4 @@ One decision per file, append-only; supersede rather than rewrite. | [0008](0008-chain-semantics.md) | Failover-chain execution semantics | Accepted | | [0009](0009-multimodal-strategy.md) | Multimodal strategy — normalize per target, enforce at provider | Accepted | | [0010](0010-tools-structured-output-mapping.md) | Tools and structured output — canonical shape, native mappings | Accepted | +| [0011](0011-google-provider.md) | Google provider on the official Gen AI SDK | Accepted | diff --git a/go.mod b/go.mod index 9db200e..b696a5c 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,24 @@ module gitea.stevedudenhoeffer.com/steve/majordomo go 1.26 + +require google.golang.org/genai v1.59.0 + +require ( + cloud.google.com/go v0.116.0 // indirect + cloud.google.com/go/auth v0.9.3 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/s2a-go v0.1.8 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect + github.com/gorilla/websocket v1.5.3 // indirect + go.opencensus.io v0.24.0 // indirect + golang.org/x/crypto v0.36.0 // indirect + golang.org/x/net v0.38.0 // indirect + golang.org/x/sys v0.31.0 // indirect + golang.org/x/text v0.23.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + google.golang.org/grpc v1.66.2 // indirect + google.golang.org/protobuf v1.34.2 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..48e9cda --- /dev/null +++ b/go.sum @@ -0,0 +1,126 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE= +cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U= +cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U= +cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= +golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genai v1.59.0 h1:xp+ydkJFW8hO0hTUaAkr8TrLM9HFP3NYAwFhPd0nDqA= +google.golang.org/genai v1.59.0/go.mod h1:mDdPDFXo1Ats7f1WXVyZgWb/CkMzFWTWJruIMy7hGIU= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.66.2 h1:3QdXkuq3Bkh7w+ywLdLvM56cmGvQHUMZpiCzt6Rqaoo= +google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/progress.md b/progress.md index 982a020..6ddaec1 100644 --- a/progress.md +++ b/progress.md @@ -1,5 +1,21 @@ # progress +## 2026-06-10 — Phase 4: Google provider (official genai SDK) + +**Landed:** `provider/google` on google.golang.org/genai v1.59.0 (ADR-0011): +lazy cached client (construction never fails; missing key = synthetic 401 +so chains fail over), assistant→model role mapping, FunctionResponse tool +results with output/error payloads, ParametersJsonSchema raw-schema tools, +ResponseJsonSchema structured output, ToolChoice→FunctionCallingConfig, +ReasoningEffort→ThinkingConfig.ThinkingLevel, usage includes thought +tokens, iter.Pull2-adapted streaming, genai.APIError→llm.APIError mapping. +Hermetic tests via HTTPOptions.BaseURL + httptest (SSE fixtures for +streaming). Registry: google + gemini schemes wired to the real provider; +the last stub machinery deleted — all six built-ins are now real clients. +README matrix: Google row fully ✅. + +**Next:** Phase 5 — Agent run loop, Toolbox ergonomics, Generate[T]. + ## 2026-06-10 — Phase 3: REST providers (OpenAI, Anthropic, Ollama×3) + media **Landed:** diff --git a/provider/google/google.go b/provider/google/google.go new file mode 100644 index 0000000..2689fec --- /dev/null +++ b/provider/google/google.go @@ -0,0 +1,416 @@ +// Package google implements majordomo's provider contract for Google's +// Gemini models on the official Google Gen AI Go SDK +// (google.golang.org/genai, the approved third-party dependency per +// ADR-0007; the legacy github.com/google/generative-ai-go SDK is +// deprecated and not used). +// +// Targeted SDK surface (verified against genai v1.59.0 source, June 2026): +// Models.GenerateContent / GenerateContentStream (iter.Seq2), Content/Part +// with InlineData blobs for images, FunctionDeclaration.ParametersJsonSchema +// for raw JSON-schema tools, FunctionCall/FunctionResponse parts for the +// tool loop, GenerateContentConfig.ResponseJsonSchema + JSON MIME for +// structured output, ThinkingConfig.ThinkingLevel for reasoning effort, and +// HTTPOptions.BaseURL + HTTPClient for hermetic tests. +package google + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "os" + "strconv" + "strings" + "sync" + + "google.golang.org/genai" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// defaultCapabilities reflects the published Gemini API limits (June 2026): +// png/jpeg/webp/heic/heif input; inline payloads bounded by a 20MB total +// request budget. MaxImagesPerReq is capped at a practical 100 (the +// published 3,600-file limit assumes the Files API, which majordomo does +// not use). +var defaultCapabilities = llm.Capabilities{ + SupportsTools: true, + SupportsStructured: true, + SupportsStreaming: true, + MaxImagesPerReq: 100, + MaxImageBytes: 15 << 20, + AllowedImageMIME: []string{"image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"}, +} + +// Provider is a Gemini provider over the official SDK. +type Provider struct { + name string + apiKey string + baseURL string + httpClient *http.Client + caps llm.Capabilities + + mu sync.Mutex + client *genai.Client +} + +// Option configures the provider. +type Option func(*Provider) + +// WithName overrides the registry name (default "google"). +func WithName(name string) Option { return func(p *Provider) { p.name = name } } + +// WithAPIKey sets the API key (default: GOOGLE_API_KEY, then +// GEMINI_API_KEY, matching the SDK's own precedence). +func WithAPIKey(key string) Option { return func(p *Provider) { p.apiKey = key } } + +// WithBaseURL overrides the API endpoint (tests, proxies). +func WithBaseURL(u string) Option { + return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") } +} + +// WithHTTPClient overrides the HTTP client. +func WithHTTPClient(c *http.Client) Option { return func(p *Provider) { p.httpClient = c } } + +// WithDefaultCapabilities overrides the provider-wide default capabilities. +func WithDefaultCapabilities(caps llm.Capabilities) Option { + return func(p *Provider) { p.caps = caps } +} + +// New creates the provider. Construction never fails: a missing key +// surfaces as an auth error at request time (and chains can fail over). +func New(opts ...Option) *Provider { + p := &Provider{ + name: "google", + caps: defaultCapabilities, + } + if key := os.Getenv("GOOGLE_API_KEY"); key != "" { + p.apiKey = key + } else if key := os.Getenv("GEMINI_API_KEY"); key != "" { + p.apiKey = key + } + for _, opt := range opts { + opt(p) + } + return p +} + +// Name implements llm.Provider. +func (p *Provider) Name() string { return p.name } + +// Model implements llm.Provider; the id passes through verbatim. +func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { + cfg := llm.ApplyModelOptions(opts) + caps := p.caps + if cfg.Capabilities != nil { + caps = *cfg.Capabilities + } + return &model{provider: p, id: id, caps: caps}, nil +} + +// genaiClient builds (once) and returns the SDK client. The SDK's +// NewClient does no network I/O for the API-key backend; failures here are +// configuration errors, returned per call and retried on the next. +func (p *Provider) genaiClient(ctx context.Context) (*genai.Client, error) { + p.mu.Lock() + defer p.mu.Unlock() + if p.client != nil { + return p.client, nil + } + if p.apiKey == "" { + return nil, &llm.APIError{ + Provider: p.name, Status: http.StatusUnauthorized, + Code: "missing_api_key", + Message: "no API key configured (set GOOGLE_API_KEY/GEMINI_API_KEY or use WithAPIKey)", + } + } + cc := &genai.ClientConfig{ + APIKey: p.apiKey, + Backend: genai.BackendGeminiAPI, + } + if p.baseURL != "" { + cc.HTTPOptions = genai.HTTPOptions{BaseURL: p.baseURL} + } + if p.httpClient != nil { + cc.HTTPClient = p.httpClient + } + client, err := genai.NewClient(ctx, cc) + if err != nil { + return nil, fmt.Errorf("google: create client: %w", err) + } + p.client = client + return client, nil +} + +type model struct { + provider *Provider + id string + caps llm.Capabilities +} + +func (m *model) Capabilities() llm.Capabilities { return m.caps } + +func (m *model) qualified() string { return m.provider.name + "/" + m.id } + +// enforceCapabilities is the provider backstop (ADR-0009); the media layer +// normalizes before requests get here. +func (m *model) enforceCapabilities(req llm.Request) error { + count := 0 + for _, msg := range req.Messages { + for _, part := range msg.Parts { + img, ok := part.(llm.ImagePart) + if !ok { + continue + } + count++ + if !m.caps.SupportsImages() { + return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified()) + } + if !m.caps.MIMEAllowed(img.MIME) { + return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME) + } + if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes { + return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d", + llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes) + } + } + } + if count > m.caps.MaxImagesPerReq && m.caps.MaxImagesPerReq > 0 { + return fmt.Errorf("%w: %d images exceed %s limit of %d", + llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq) + } + if len(req.Tools) > 0 && !m.caps.SupportsTools { + return fmt.Errorf("%w: %s does not support tools", llm.ErrUnsupported, m.qualified()) + } + if len(req.Schema) > 0 && !m.caps.SupportsStructured { + return fmt.Errorf("%w: %s does not support structured output", llm.ErrUnsupported, m.qualified()) + } + return nil +} + +// buildContents maps canonical messages onto SDK contents, and collects +// the system prompt (Request.System + folded RoleSystem messages). +func (m *model) buildContents(req llm.Request) (string, []*genai.Content, error) { + var sys []string + if req.System != "" { + sys = append(sys, req.System) + } + + var contents []*genai.Content + for _, msg := range req.Messages { + switch msg.Role { + case llm.RoleSystem: + if t := msg.Text(); t != "" { + sys = append(sys, t) + } + case llm.RoleTool: + parts := make([]*genai.Part, 0, len(msg.ToolResults)) + for _, res := range msg.ToolResults { + payload := map[string]any{"output": res.Content} + if res.IsError { + payload = map[string]any{"error": res.Content} + } + parts = append(parts, &genai.Part{FunctionResponse: &genai.FunctionResponse{ + ID: res.ID, Name: res.Name, Response: payload, + }}) + } + contents = append(contents, &genai.Content{Role: genai.RoleUser, Parts: parts}) + default: + role := genai.RoleUser + if msg.Role == llm.RoleAssistant { + role = genai.RoleModel + } + var parts []*genai.Part + for _, part := range msg.Parts { + switch v := part.(type) { + case llm.TextPart: + parts = append(parts, genai.NewPartFromText(v.Text)) + case llm.ImagePart: + parts = append(parts, genai.NewPartFromBytes(v.Data, v.MIME)) + } + } + for _, tc := range msg.ToolCalls { + args := map[string]any{} + if len(tc.Arguments) > 0 { + if err := json.Unmarshal(tc.Arguments, &args); err != nil { + return "", nil, fmt.Errorf("google: tool call %q arguments: %w", tc.Name, err) + } + } + parts = append(parts, &genai.Part{FunctionCall: &genai.FunctionCall{ + ID: tc.ID, Name: tc.Name, Args: args, + }}) + } + if len(parts) == 0 { + continue + } + contents = append(contents, &genai.Content{Role: role, Parts: parts}) + } + } + return strings.Join(sys, "\n\n"), contents, nil +} + +// buildConfig maps request knobs onto the SDK config. +func (m *model) buildConfig(req llm.Request, system string) (*genai.GenerateContentConfig, error) { + cfg := &genai.GenerateContentConfig{} + if system != "" { + cfg.SystemInstruction = genai.NewContentFromText(system, genai.RoleUser) + } + if req.Temperature != nil { + cfg.Temperature = new(float32) + *cfg.Temperature = float32(*req.Temperature) + } + if req.TopP != nil { + cfg.TopP = new(float32) + *cfg.TopP = float32(*req.TopP) + } + if req.MaxTokens > 0 { + cfg.MaxOutputTokens = int32(req.MaxTokens) + } + cfg.StopSequences = req.StopSequences + + if len(req.Tools) > 0 && req.ToolChoice != "none" { + decls := make([]*genai.FunctionDeclaration, 0, len(req.Tools)) + for _, t := range req.Tools { + decl := &genai.FunctionDeclaration{Name: t.Name, Description: t.Description} + if len(t.Parameters) > 0 { + var schema map[string]any + if err := json.Unmarshal(t.Parameters, &schema); err != nil { + return nil, fmt.Errorf("google: tool %q parameters: %w", t.Name, err) + } + decl.ParametersJsonSchema = schema + } + decls = append(decls, decl) + } + cfg.Tools = []*genai.Tool{{FunctionDeclarations: decls}} + + switch req.ToolChoice { + case "", "auto": + // SDK default. + case "required": + cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{ + Mode: genai.FunctionCallingConfigModeAny, + }} + default: + cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{ + Mode: genai.FunctionCallingConfigModeAny, AllowedFunctionNames: []string{req.ToolChoice}, + }} + } + } + + if len(req.Schema) > 0 { + var schema map[string]any + if err := json.Unmarshal(req.Schema, &schema); err != nil { + return nil, fmt.Errorf("google: output schema: %w", err) + } + cfg.ResponseJsonSchema = schema + cfg.ResponseMIMEType = "application/json" + } + + switch req.ReasoningEffort { + case "": + case "low": + cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelLow} + case "medium": + cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelMedium} + case "high": + cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelHigh} + default: + return nil, fmt.Errorf("google: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort) + } + + return cfg, nil +} + +// mapError converts SDK errors into majordomo's classification shapes. +func (m *model) mapError(err error) error { + if apiErr, ok := errors.AsType[genai.APIError](err); ok { + return &llm.APIError{ + Provider: m.provider.name, Model: m.id, + Status: apiErr.Code, Code: apiErr.Status, Message: apiErr.Message, + } + } + return fmt.Errorf("google %s: %w", m.qualified(), err) +} + +// Generate implements llm.Model. +func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { + req = req.Apply(opts...) + if err := m.enforceCapabilities(req); err != nil { + return nil, err + } + client, err := m.provider.genaiClient(ctx) + if err != nil { + return nil, err + } + system, contents, err := m.buildContents(req) + if err != nil { + return nil, err + } + cfg, err := m.buildConfig(req, system) + if err != nil { + return nil, err + } + + resp, err := client.Models.GenerateContent(ctx, m.id, contents, cfg) + if err != nil { + return nil, m.mapError(err) + } + return m.toResponse(resp), nil +} + +// toResponse converts an SDK response into the canonical shape. +func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response { + out := &llm.Response{Model: m.qualified(), Raw: resp} + if resp.UsageMetadata != nil { + out.Usage = llm.Usage{ + InputTokens: int(resp.UsageMetadata.PromptTokenCount), + OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount), + } + } + if len(resp.Candidates) == 0 { + out.FinishReason = llm.FinishOther + return out + } + cand := resp.Candidates[0] + if cand.Content != nil { + for _, part := range cand.Content.Parts { + if part == nil { + continue + } + if part.Text != "" && !part.Thought { + out.Parts = append(out.Parts, llm.Text(part.Text)) + } + if fc := part.FunctionCall; fc != nil { + id := fc.ID + if id == "" { + id = "call_" + strconv.Itoa(len(out.ToolCalls)) + } + args, err := json.Marshal(fc.Args) + if err != nil || len(fc.Args) == 0 { + args = json.RawMessage("{}") + } + out.ToolCalls = append(out.ToolCalls, llm.ToolCall{ID: id, Name: fc.Name, Arguments: args}) + } + } + } + out.FinishReason = mapFinish(cand.FinishReason, len(out.ToolCalls) > 0) + return out +} + +func mapFinish(fr genai.FinishReason, hasToolCalls bool) llm.FinishReason { + if hasToolCalls { + return llm.FinishToolCalls + } + switch fr { + case genai.FinishReasonStop, genai.FinishReasonUnspecified, "": + return llm.FinishStop + case genai.FinishReasonMaxTokens: + return llm.FinishLength + case genai.FinishReasonSafety, genai.FinishReasonRecitation, genai.FinishReasonBlocklist, + genai.FinishReasonProhibitedContent, genai.FinishReasonSPII, genai.FinishReasonImageSafety: + return llm.FinishContentFilter + default: + return llm.FinishOther + } +} diff --git a/provider/google/google_test.go b/provider/google/google_test.go new file mode 100644 index 0000000..75ac8f9 --- /dev/null +++ b/provider/google/google_test.go @@ -0,0 +1,457 @@ +package google + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +type captured struct { + path string + query string + body map[string]any +} + +// serve builds a provider pointed at an httptest server (the SDK's +// documented hermetic hook: HTTPOptions.BaseURL + HTTPClient). +func serve(t *testing.T, handler func(w http.ResponseWriter, r *http.Request)) (*Provider, *captured) { + t.Helper() + cap := &captured{} + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + cap.path = r.URL.Path + cap.query = r.URL.RawQuery + raw, _ := io.ReadAll(r.Body) + _ = json.Unmarshal(raw, &cap.body) + handler(w, r) + })) + t.Cleanup(ts.Close) + return New( + WithAPIKey("test-key"), + WithBaseURL(ts.URL), + WithHTTPClient(ts.Client()), + ), cap +} + +func textResponse(text string) string { + return fmt.Sprintf(`{ + "candidates":[{"content":{"role":"model","parts":[{"text":%q}]},"finishReason":"STOP"}], + "usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":5,"thoughtsTokenCount":2} + }`, text) +} + +func basicRequest() llm.Request { + return llm.Request{Messages: []llm.Message{llm.UserText("hi")}} +} + +func TestGenerateRoundTrip(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("hello from gemini")) + }) + + m, _ := p.Model("gemini-2.5-flash") + temp := 0.3 + resp, err := m.Generate(context.Background(), llm.Request{ + System: "be terse", + Messages: []llm.Message{llm.SystemText("extra"), llm.UserText("hi")}, + Temperature: &temp, + MaxTokens: 128, + }) + if err != nil { + t.Fatalf("Generate: %v", err) + } + + if !strings.Contains(cap.path, "models/gemini-2.5-flash:generateContent") { + t.Errorf("path = %q", cap.path) + } + sys := cap.body["systemInstruction"].(map[string]any) + sysText := sys["parts"].([]any)[0].(map[string]any)["text"] + if sysText != "be terse\n\nextra" { + t.Errorf("system = %v", sysText) + } + genCfg := cap.body["generationConfig"].(map[string]any) + if genCfg["temperature"] != 0.3 || genCfg["maxOutputTokens"] != float64(128) { + t.Errorf("generationConfig = %v", genCfg) + } + contents := cap.body["contents"].([]any) + if len(contents) != 1 { + t.Fatalf("contents = %v (system must not appear)", contents) + } + + if resp.Text() != "hello from gemini" { + t.Errorf("text = %q", resp.Text()) + } + if resp.Usage.InputTokens != 7 || resp.Usage.OutputTokens != 7 { + t.Errorf("usage = %+v (output must include thoughts)", resp.Usage) + } + if resp.FinishReason != llm.FinishStop { + t.Errorf("finish = %v", resp.FinishReason) + } + if resp.Model != "google/gemini-2.5-flash" { + t.Errorf("model = %q", resp.Model) + } +} + +func TestImageInlineData(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("a png")) + }) + m, _ := p.Model("gemini-2.5-flash") + _, err := m.Generate(context.Background(), llm.Request{ + Messages: []llm.Message{llm.UserParts(llm.Text("see"), llm.Image("image/png", []byte{1, 2, 3}))}, + }) + if err != nil { + t.Fatalf("Generate: %v", err) + } + parts := cap.body["contents"].([]any)[0].(map[string]any)["parts"].([]any) + var foundBlob bool + for _, pt := range parts { + if blob, ok := pt.(map[string]any)["inlineData"].(map[string]any); ok { + foundBlob = true + if blob["mimeType"] != "image/png" || blob["data"] != "AQID" { + t.Errorf("blob = %v", blob) + } + } + } + if !foundBlob { + t.Error("no inlineData part sent") + } +} + +func TestToolsAndFunctionCalls(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, `{ + "candidates":[{"content":{"role":"model","parts":[ + {"functionCall":{"name":"get_weather","args":{"city":"Tokyo"}}} + ]},"finishReason":"STOP"}] + }`) + }) + m, _ := p.Model("gemini-2.5-flash") + resp, err := m.Generate(context.Background(), basicRequest(), llm.WithTools(llm.Tool{ + Name: "get_weather", Description: "weather", + Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}}}`), + })) + if err != nil { + t.Fatalf("Generate: %v", err) + } + + tools := cap.body["tools"].([]any) + decls := tools[0].(map[string]any)["functionDeclarations"].([]any) + decl := decls[0].(map[string]any) + if decl["name"] != "get_weather" { + t.Errorf("decl = %v", decl) + } + if _, ok := decl["parametersJsonSchema"].(map[string]any); !ok { + t.Errorf("parametersJsonSchema missing: %v", decl) + } + + if len(resp.ToolCalls) != 1 { + t.Fatalf("tool calls = %+v", resp.ToolCalls) + } + tc := resp.ToolCalls[0] + if tc.Name != "get_weather" || tc.ID == "" { + t.Errorf("call = %+v (id synthesized)", tc) + } + var args struct { + City string `json:"city"` + } + if err := json.Unmarshal(tc.Arguments, &args); err != nil || args.City != "Tokyo" { + t.Errorf("args = %s", tc.Arguments) + } + if resp.FinishReason != llm.FinishToolCalls { + t.Errorf("finish = %v", resp.FinishReason) + } +} + +func TestToolResultsAndHistory(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("21C")) + }) + m, _ := p.Model("gemini-2.5-flash") + _, err := m.Generate(context.Background(), llm.Request{ + Messages: []llm.Message{ + llm.UserText("weather?"), + {Role: llm.RoleAssistant, ToolCalls: []llm.ToolCall{ + {ID: "c1", Name: "get_weather", Arguments: json.RawMessage(`{"city":"Tokyo"}`)}, + }}, + llm.ToolResultsMessage( + llm.ToolResult{ID: "c1", Name: "get_weather", Content: `{"temp":21}`}, + llm.ToolResult{ID: "c2", Name: "broken", Content: "boom", IsError: true}, + ), + }, + }) + if err != nil { + t.Fatalf("Generate: %v", err) + } + contents := cap.body["contents"].([]any) + if len(contents) != 3 { + t.Fatalf("contents = %d, want 3", len(contents)) + } + model := contents[1].(map[string]any) + if model["role"] != "model" { + t.Errorf("assistant role = %v", model["role"]) + } + fc := model["parts"].([]any)[0].(map[string]any)["functionCall"].(map[string]any) + if fc["name"] != "get_weather" { + t.Errorf("functionCall = %v", fc) + } + results := contents[2].(map[string]any) + parts := results["parts"].([]any) + fr1 := parts[0].(map[string]any)["functionResponse"].(map[string]any) + if fr1["name"] != "get_weather" { + t.Errorf("functionResponse = %v", fr1) + } + if resp1 := fr1["response"].(map[string]any); resp1["output"] != `{"temp":21}` { + t.Errorf("response payload = %v", resp1) + } + fr2 := parts[1].(map[string]any)["functionResponse"].(map[string]any) + if resp2 := fr2["response"].(map[string]any); resp2["error"] != "boom" { + t.Errorf("error payload = %v", resp2) + } +} + +func TestToolChoiceMapping(t *testing.T) { + for choice, want := range map[string]string{ + "required": "ANY", + "get_weather": "ANY", + } { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("x")) + }) + m, _ := p.Model("g") + _, err := m.Generate(context.Background(), basicRequest(), + llm.WithTools(llm.Tool{Name: "get_weather"}), llm.WithToolChoice(choice)) + if err != nil { + t.Fatalf("Generate(%s): %v", choice, err) + } + tc := cap.body["toolConfig"].(map[string]any)["functionCallingConfig"].(map[string]any) + if tc["mode"] != want { + t.Errorf("choice %q → mode %v, want %v", choice, tc["mode"], want) + } + if choice == "get_weather" { + allowed := tc["allowedFunctionNames"].([]any) + if allowed[0] != "get_weather" { + t.Errorf("allowedFunctionNames = %v", allowed) + } + } + } + + t.Run("none drops tools", func(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("x")) + }) + m, _ := p.Model("g") + if _, err := m.Generate(context.Background(), basicRequest(), + llm.WithTools(llm.Tool{Name: "t"}), llm.WithToolChoice("none")); err != nil { + t.Fatalf("Generate: %v", err) + } + if _, present := cap.body["tools"]; present { + t.Error("tool_choice none must omit tools") + } + }) +} + +func TestStructuredOutput(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse(`{"name":"Ada"}`)) + }) + m, _ := p.Model("g") + schema := json.RawMessage(`{"type":"object","properties":{"name":{"type":"string"}}}`) + resp, err := m.Generate(context.Background(), basicRequest(), llm.WithSchema(schema, "person")) + if err != nil { + t.Fatalf("Generate: %v", err) + } + genCfg := cap.body["generationConfig"].(map[string]any) + if genCfg["responseMimeType"] != "application/json" { + t.Errorf("responseMimeType = %v", genCfg["responseMimeType"]) + } + if _, ok := genCfg["responseJsonSchema"].(map[string]any); !ok { + t.Errorf("responseJsonSchema = %v", genCfg["responseJsonSchema"]) + } + if resp.Text() != `{"name":"Ada"}` { + t.Errorf("text = %q", resp.Text()) + } +} + +func TestReasoningEffortMapsToThinkingLevel(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("x")) + }) + m, _ := p.Model("g") + if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("high")); err != nil { + t.Fatalf("Generate: %v", err) + } + genCfg := cap.body["generationConfig"].(map[string]any) + thinking := genCfg["thinkingConfig"].(map[string]any) + if thinking["thinkingLevel"] != "HIGH" { + t.Errorf("thinkingConfig = %v", thinking) + } + + if _, err := m.Generate(context.Background(), basicRequest(), llm.WithReasoningEffort("ultra")); err == nil { + t.Error("invalid effort should error") + } +} + +func TestFinishReasonMapping(t *testing.T) { + for wire, want := range map[string]llm.FinishReason{ + "STOP": llm.FinishStop, + "MAX_TOKENS": llm.FinishLength, + "SAFETY": llm.FinishContentFilter, + "PROHIBITED_CONTENT": llm.FinishContentFilter, + "MALFORMED_FUNCTION_CALL": llm.FinishOther, + } { + p, _ := serve(t, func(w http.ResponseWriter, _ *http.Request) { + fmt.Fprintf(w, `{"candidates":[{"content":{"role":"model","parts":[{"text":"x"}]},"finishReason":%q}]}`, wire) + }) + m, _ := p.Model("g") + resp, err := m.Generate(context.Background(), basicRequest()) + if err != nil { + t.Fatalf("Generate(%s): %v", wire, err) + } + if resp.FinishReason != want { + t.Errorf("finish %q = %v, want %v", wire, resp.FinishReason, want) + } + } +} + +func TestAPIErrorMapping(t *testing.T) { + p, _ := serve(t, func(w http.ResponseWriter, _ *http.Request) { + // no response written below; status set in the closure + }) + _ = p + p2, _ := serve(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(429) + _, _ = io.WriteString(w, `{"error":{"code":429,"message":"quota exhausted","status":"RESOURCE_EXHAUSTED"}}`) + }) + m, _ := p2.Model("g") + _, err := m.Generate(context.Background(), basicRequest()) + var apiErr *llm.APIError + if !errors.As(err, &apiErr) { + t.Fatalf("error = %v (%T), want APIError", err, err) + } + if apiErr.Status != 429 || !strings.Contains(apiErr.Message, "quota") { + t.Errorf("apiErr = %+v", apiErr) + } + if llm.Classify(err) != llm.ClassTransient { + t.Error("429 must classify transient") + } +} + +func TestMissingAPIKey(t *testing.T) { + t.Setenv("GOOGLE_API_KEY", "") + t.Setenv("GEMINI_API_KEY", "") + p := New(WithAPIKey("")) + m, _ := p.Model("g") + _, err := m.Generate(context.Background(), basicRequest()) + var apiErr *llm.APIError + if !errors.As(err, &apiErr) || apiErr.Status != http.StatusUnauthorized { + t.Errorf("error = %v, want synthetic 401", err) + } +} + +func TestEnvKeyPrecedence(t *testing.T) { + t.Setenv("GOOGLE_API_KEY", "g-key") + t.Setenv("GEMINI_API_KEY", "gem-key") + if p := New(); p.apiKey != "g-key" { + t.Errorf("apiKey = %q, want GOOGLE_API_KEY to win", p.apiKey) + } + t.Setenv("GOOGLE_API_KEY", "") + if p := New(); p.apiKey != "gem-key" { + t.Errorf("apiKey = %q, want GEMINI_API_KEY fallback", p.apiKey) + } +} + +func TestCapabilityEnforcement(t *testing.T) { + p, _ := serve(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, textResponse("x")) + }) + m, _ := p.Model("g", llm.WithCapabilities(llm.Capabilities{MaxImagesPerReq: 1, AllowedImageMIME: []string{"image/png"}})) + _, err := m.Generate(context.Background(), llm.Request{Messages: []llm.Message{ + llm.UserParts(llm.Image("image/png", []byte{1}), llm.Image("image/png", []byte{2})), + }}) + if !errors.Is(err, llm.ErrUnsupported) { + t.Errorf("error = %v, want ErrUnsupported", err) + } +} + +func TestStreaming(t *testing.T) { + p, cap := serve(t, func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + _, _ = io.WriteString(w, `data: {"candidates":[{"content":{"role":"model","parts":[{"text":"Hel"}]}}]} + +data: {"candidates":[{"content":{"role":"model","parts":[{"text":"lo"}]}}]} + +data: {"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"ping","args":{}}}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":3,"candidatesTokenCount":6}} + +`) + }) + + m, _ := p.Model("gemini-2.5-flash") + s, err := m.Stream(context.Background(), basicRequest()) + if err != nil { + t.Fatalf("Stream: %v", err) + } + defer s.Close() + + if !strings.Contains(cap.query+cap.path, "streamGenerateContent") { + t.Errorf("path = %q query = %q, want streaming endpoint", cap.path, cap.query) + } + + var text strings.Builder + var calls []llm.ToolCall + var final *llm.Response + for { + ev, err := s.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("Next: %v", err) + } + text.WriteString(ev.TextDelta) + if ev.ToolCall != nil { + calls = append(calls, *ev.ToolCall) + } + if ev.Response != nil { + final = ev.Response + } + } + if text.String() != "Hello" { + t.Errorf("text = %q", text.String()) + } + if len(calls) != 1 || calls[0].Name != "ping" { + t.Errorf("calls = %+v", calls) + } + if final == nil { + t.Fatal("no final event") + } + if final.Usage.InputTokens != 3 || final.Usage.OutputTokens != 6 { + t.Errorf("usage = %+v", final.Usage) + } + if final.FinishReason != llm.FinishToolCalls { + t.Errorf("finish = %v", final.FinishReason) + } +} + +func TestStreamCloseEarly(t *testing.T) { + p, _ := serve(t, func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + _, _ = io.WriteString(w, "data: {\"candidates\":[{\"content\":{\"role\":\"model\",\"parts\":[{\"text\":\"x\"}]}}]}\n\n") + }) + m, _ := p.Model("g") + s, err := m.Stream(context.Background(), basicRequest()) + if err != nil { + t.Fatalf("Stream: %v", err) + } + if err := s.Close(); err != nil { + t.Errorf("Close: %v", err) + } + _ = s.Close() // idempotent +} diff --git a/provider/google/stream.go b/provider/google/stream.go new file mode 100644 index 0000000..fe0c0a4 --- /dev/null +++ b/provider/google/stream.go @@ -0,0 +1,140 @@ +package google + +import ( + "context" + "encoding/json" + "io" + "iter" + "strconv" + "sync" + + "google.golang.org/genai" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// Stream implements llm.Model over the SDK's range-over-func stream +// (iter.Seq2), adapted to majordomo's pull-based Stream via iter.Pull2. +func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { + req = req.Apply(opts...) + if err := m.enforceCapabilities(req); err != nil { + return nil, err + } + client, err := m.provider.genaiClient(ctx) + if err != nil { + return nil, err + } + system, contents, err := m.buildContents(req) + if err != nil { + return nil, err + } + cfg, err := m.buildConfig(req, system) + if err != nil { + return nil, err + } + + seq := client.Models.GenerateContentStream(ctx, m.id, contents, cfg) + next, stop := iter.Pull2(iter.Seq2[*genai.GenerateContentResponse, error](seq)) + return &stream{model: m, next: next, stop: stop}, nil +} + +type stream struct { + model *model + next func() (*genai.GenerateContentResponse, error, bool) + stop func() + + mu sync.Mutex + closeOnce sync.Once + finished bool + pending []llm.StreamEvent + text []byte + toolCalls []llm.ToolCall + usage llm.Usage + finish genai.FinishReason +} + +func (s *stream) Next() (llm.StreamEvent, error) { + s.mu.Lock() + defer s.mu.Unlock() + + for { + if len(s.pending) > 0 { + ev := s.pending[0] + s.pending = s.pending[1:] + return ev, nil + } + if s.finished { + return llm.StreamEvent{}, io.EOF + } + + chunk, err, ok := s.next() + if !ok { + s.queueFinal() + continue + } + if err != nil { + return llm.StreamEvent{}, s.model.mapError(err) + } + + if chunk.UsageMetadata != nil { + s.usage = llm.Usage{ + InputTokens: int(chunk.UsageMetadata.PromptTokenCount), + OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount), + } + } + if len(chunk.Candidates) == 0 { + continue + } + cand := chunk.Candidates[0] + if cand.FinishReason != "" { + s.finish = cand.FinishReason + } + if cand.Content == nil { + continue + } + for _, part := range cand.Content.Parts { + if part == nil { + continue + } + if part.Text != "" && !part.Thought { + s.text = append(s.text, part.Text...) + s.pending = append(s.pending, llm.StreamEvent{TextDelta: part.Text}) + } + // Function calls arrive whole per chunk in the Gemini stream. + if fc := part.FunctionCall; fc != nil { + id := fc.ID + if id == "" { + id = "call_" + strconv.Itoa(len(s.toolCalls)) + } + args, err := json.Marshal(fc.Args) + if err != nil || len(fc.Args) == 0 { + args = json.RawMessage("{}") + } + call := llm.ToolCall{ID: id, Name: fc.Name, Arguments: args} + s.toolCalls = append(s.toolCalls, call) + s.pending = append(s.pending, llm.StreamEvent{ToolCall: &call}) + } + } + } +} + +func (s *stream) queueFinal() { + resp := &llm.Response{ + Model: s.model.qualified(), + Usage: s.usage, + FinishReason: mapFinish(s.finish, len(s.toolCalls) > 0), + } + if len(s.text) > 0 { + resp.Parts = append(resp.Parts, llm.Text(string(s.text))) + } + if len(s.toolCalls) > 0 { + resp.ToolCalls = append([]llm.ToolCall(nil), s.toolCalls...) + } + s.pending = append(s.pending, llm.StreamEvent{Response: resp}) + s.finished = true +} + +func (s *stream) Close() error { + s.closeOnce.Do(s.stop) + return nil +}