0147a79d18
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
419 lines
13 KiB
Go
419 lines
13 KiB
Go
// Package google implements majordomo's provider contract for Google's
|
|
// Gemini models on the official Google Gen AI Go SDK
|
|
// (google.golang.org/genai, the approved third-party dependency per
|
|
// ADR-0007; the legacy github.com/google/generative-ai-go SDK is
|
|
// deprecated and not used).
|
|
//
|
|
// Targeted SDK surface (verified against genai v1.59.0 source, June 2026):
|
|
// Models.GenerateContent / GenerateContentStream (iter.Seq2), Content/Part
|
|
// with InlineData blobs for images, FunctionDeclaration.ParametersJsonSchema
|
|
// for raw JSON-schema tools, FunctionCall/FunctionResponse parts for the
|
|
// tool loop, GenerateContentConfig.ResponseJsonSchema + JSON MIME for
|
|
// structured output, ThinkingConfig.ThinkingLevel for reasoning effort, and
|
|
// HTTPOptions.BaseURL + HTTPClient for hermetic tests.
|
|
package google
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
|
|
"google.golang.org/genai"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// defaultCapabilities reflects the published Gemini API limits (June 2026):
|
|
// png/jpeg/webp/heic/heif input; inline payloads bounded by a 20MB total
|
|
// request budget. MaxImagesPerReq is capped at a practical 100 (the
|
|
// published 3,600-file limit assumes the Files API, which majordomo does
|
|
// not use).
|
|
var defaultCapabilities = llm.Capabilities{
|
|
SupportsTools: true,
|
|
SupportsStructured: true,
|
|
SupportsStreaming: true,
|
|
MaxImagesPerReq: 100,
|
|
MaxImageBytes: 15 << 20,
|
|
AllowedImageMIME: []string{"image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"},
|
|
}
|
|
|
|
// Provider is a Gemini provider over the official SDK.
|
|
type Provider struct {
|
|
name string
|
|
apiKey string
|
|
baseURL string
|
|
httpClient *http.Client
|
|
caps llm.Capabilities
|
|
|
|
mu sync.Mutex
|
|
client *genai.Client
|
|
}
|
|
|
|
// Option configures the provider.
|
|
type Option func(*Provider)
|
|
|
|
// WithName overrides the registry name (default "google").
|
|
func WithName(name string) Option { return func(p *Provider) { p.name = name } }
|
|
|
|
// WithAPIKey sets the API key (default: GOOGLE_API_KEY, then
|
|
// GEMINI_API_KEY, matching the SDK's own precedence).
|
|
func WithAPIKey(key string) Option { return func(p *Provider) { p.apiKey = key } }
|
|
|
|
// WithBaseURL overrides the API endpoint (tests, proxies).
|
|
func WithBaseURL(u string) Option {
|
|
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
|
|
}
|
|
|
|
// WithHTTPClient overrides the HTTP client.
|
|
func WithHTTPClient(c *http.Client) Option { return func(p *Provider) { p.httpClient = c } }
|
|
|
|
// WithDefaultCapabilities overrides the provider-wide default capabilities.
|
|
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
|
return func(p *Provider) { p.caps = caps }
|
|
}
|
|
|
|
// New creates the provider. Construction never fails: a missing key
|
|
// surfaces as an auth error at request time (and chains can fail over).
|
|
func New(opts ...Option) *Provider {
|
|
p := &Provider{
|
|
name: "google",
|
|
caps: defaultCapabilities,
|
|
}
|
|
if key := os.Getenv("GOOGLE_API_KEY"); key != "" {
|
|
p.apiKey = key
|
|
} else if key := os.Getenv("GEMINI_API_KEY"); key != "" {
|
|
p.apiKey = key
|
|
}
|
|
for _, opt := range opts {
|
|
opt(p)
|
|
}
|
|
return p
|
|
}
|
|
|
|
// Name implements llm.Provider.
|
|
func (p *Provider) Name() string { return p.name }
|
|
|
|
// Model implements llm.Provider; the id passes through verbatim.
|
|
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
|
cfg := llm.ApplyModelOptions(opts)
|
|
caps := p.caps
|
|
if cfg.Capabilities != nil {
|
|
caps = *cfg.Capabilities
|
|
}
|
|
return &model{provider: p, id: id, caps: caps}, nil
|
|
}
|
|
|
|
// genaiClient builds (once) and returns the SDK client. The SDK's
|
|
// NewClient does no network I/O for the API-key backend; failures here are
|
|
// configuration errors, returned per call and retried on the next.
|
|
func (p *Provider) genaiClient(ctx context.Context) (*genai.Client, error) {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
if p.client != nil {
|
|
return p.client, nil
|
|
}
|
|
if p.apiKey == "" {
|
|
return nil, &llm.APIError{
|
|
Provider: p.name, Status: http.StatusUnauthorized,
|
|
Code: "missing_api_key",
|
|
Message: "no API key configured (set GOOGLE_API_KEY/GEMINI_API_KEY or use WithAPIKey)",
|
|
}
|
|
}
|
|
cc := &genai.ClientConfig{
|
|
APIKey: p.apiKey,
|
|
Backend: genai.BackendGeminiAPI,
|
|
}
|
|
if p.baseURL != "" {
|
|
cc.HTTPOptions = genai.HTTPOptions{BaseURL: p.baseURL}
|
|
}
|
|
if p.httpClient != nil {
|
|
cc.HTTPClient = p.httpClient
|
|
}
|
|
client, err := genai.NewClient(ctx, cc)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("google: create client: %w", err)
|
|
}
|
|
p.client = client
|
|
return client, nil
|
|
}
|
|
|
|
type model struct {
|
|
provider *Provider
|
|
id string
|
|
caps llm.Capabilities
|
|
}
|
|
|
|
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
|
|
|
func (m *model) qualified() string { return m.provider.name + "/" + m.id }
|
|
|
|
// enforceCapabilities is the provider backstop (ADR-0009); the media layer
|
|
// normalizes before requests get here.
|
|
func (m *model) enforceCapabilities(req llm.Request) error {
|
|
count := 0
|
|
for _, msg := range req.Messages {
|
|
for _, part := range msg.Parts {
|
|
img, ok := part.(llm.ImagePart)
|
|
if !ok {
|
|
continue
|
|
}
|
|
count++
|
|
if !m.caps.SupportsImages() {
|
|
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified())
|
|
}
|
|
if !m.caps.MIMEAllowed(img.MIME) {
|
|
return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME)
|
|
}
|
|
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
|
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d",
|
|
llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes)
|
|
}
|
|
}
|
|
}
|
|
if count > m.caps.MaxImagesPerReq && m.caps.MaxImagesPerReq > 0 {
|
|
return fmt.Errorf("%w: %d images exceed %s limit of %d",
|
|
llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq)
|
|
}
|
|
if len(req.Tools) > 0 && !m.caps.SupportsTools {
|
|
return fmt.Errorf("%w: %s does not support tools", llm.ErrUnsupported, m.qualified())
|
|
}
|
|
if len(req.Schema) > 0 && !m.caps.SupportsStructured {
|
|
return fmt.Errorf("%w: %s does not support structured output", llm.ErrUnsupported, m.qualified())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// buildContents maps canonical messages onto SDK contents, and collects
|
|
// the system prompt (Request.System + folded RoleSystem messages).
|
|
func (m *model) buildContents(req llm.Request) (string, []*genai.Content, error) {
|
|
var sys []string
|
|
if req.System != "" {
|
|
sys = append(sys, req.System)
|
|
}
|
|
|
|
var contents []*genai.Content
|
|
for _, msg := range req.Messages {
|
|
switch msg.Role {
|
|
case llm.RoleSystem:
|
|
if t := msg.Text(); t != "" {
|
|
sys = append(sys, t)
|
|
}
|
|
case llm.RoleTool:
|
|
parts := make([]*genai.Part, 0, len(msg.ToolResults))
|
|
for _, res := range msg.ToolResults {
|
|
payload := map[string]any{"output": res.Content}
|
|
if res.IsError {
|
|
payload = map[string]any{"error": res.Content}
|
|
}
|
|
parts = append(parts, &genai.Part{FunctionResponse: &genai.FunctionResponse{
|
|
ID: res.ID, Name: res.Name, Response: payload,
|
|
}})
|
|
}
|
|
contents = append(contents, &genai.Content{Role: genai.RoleUser, Parts: parts})
|
|
default:
|
|
role := genai.RoleUser
|
|
if msg.Role == llm.RoleAssistant {
|
|
role = genai.RoleModel
|
|
}
|
|
var parts []*genai.Part
|
|
for _, part := range msg.Parts {
|
|
switch v := part.(type) {
|
|
case llm.TextPart:
|
|
parts = append(parts, genai.NewPartFromText(v.Text))
|
|
case llm.ImagePart:
|
|
parts = append(parts, genai.NewPartFromBytes(v.Data, v.MIME))
|
|
}
|
|
}
|
|
for _, tc := range msg.ToolCalls {
|
|
args := map[string]any{}
|
|
if len(tc.Arguments) > 0 {
|
|
if err := json.Unmarshal(tc.Arguments, &args); err != nil {
|
|
return "", nil, fmt.Errorf("google: tool call %q arguments: %w", tc.Name, err)
|
|
}
|
|
}
|
|
parts = append(parts, &genai.Part{FunctionCall: &genai.FunctionCall{
|
|
ID: tc.ID, Name: tc.Name, Args: args,
|
|
}})
|
|
}
|
|
if len(parts) == 0 {
|
|
continue
|
|
}
|
|
contents = append(contents, &genai.Content{Role: role, Parts: parts})
|
|
}
|
|
}
|
|
return strings.Join(sys, "\n\n"), contents, nil
|
|
}
|
|
|
|
// buildConfig maps request knobs onto the SDK config.
|
|
func (m *model) buildConfig(req llm.Request, system string) (*genai.GenerateContentConfig, error) {
|
|
cfg := &genai.GenerateContentConfig{}
|
|
if system != "" {
|
|
cfg.SystemInstruction = genai.NewContentFromText(system, genai.RoleUser)
|
|
}
|
|
if req.Temperature != nil {
|
|
cfg.Temperature = new(float32)
|
|
*cfg.Temperature = float32(*req.Temperature)
|
|
}
|
|
if req.TopP != nil {
|
|
cfg.TopP = new(float32)
|
|
*cfg.TopP = float32(*req.TopP)
|
|
}
|
|
if req.MaxTokens > 0 {
|
|
cfg.MaxOutputTokens = int32(req.MaxTokens)
|
|
}
|
|
cfg.StopSequences = req.StopSequences
|
|
|
|
if len(req.Tools) > 0 && req.ToolChoice != "none" {
|
|
decls := make([]*genai.FunctionDeclaration, 0, len(req.Tools))
|
|
for _, t := range req.Tools {
|
|
decl := &genai.FunctionDeclaration{Name: t.Name, Description: t.Description}
|
|
if len(t.Parameters) > 0 {
|
|
var schema map[string]any
|
|
if err := json.Unmarshal(t.Parameters, &schema); err != nil {
|
|
return nil, fmt.Errorf("google: tool %q parameters: %w", t.Name, err)
|
|
}
|
|
decl.ParametersJsonSchema = schema
|
|
}
|
|
decls = append(decls, decl)
|
|
}
|
|
cfg.Tools = []*genai.Tool{{FunctionDeclarations: decls}}
|
|
|
|
switch req.ToolChoice {
|
|
case "", "auto":
|
|
// SDK default.
|
|
case "required":
|
|
cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{
|
|
Mode: genai.FunctionCallingConfigModeAny,
|
|
}}
|
|
default:
|
|
cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{
|
|
Mode: genai.FunctionCallingConfigModeAny, AllowedFunctionNames: []string{req.ToolChoice},
|
|
}}
|
|
}
|
|
}
|
|
|
|
if len(req.Schema) > 0 {
|
|
var schema map[string]any
|
|
if err := json.Unmarshal(req.Schema, &schema); err != nil {
|
|
return nil, fmt.Errorf("google: output schema: %w", err)
|
|
}
|
|
cfg.ResponseJsonSchema = schema
|
|
cfg.ResponseMIMEType = "application/json"
|
|
}
|
|
|
|
switch req.ReasoningEffort {
|
|
case "":
|
|
case "low":
|
|
cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelLow}
|
|
case "medium":
|
|
cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelMedium}
|
|
case "high":
|
|
cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelHigh}
|
|
default:
|
|
return nil, fmt.Errorf("google: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort)
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
// mapError converts SDK errors into majordomo's classification shapes.
|
|
func (m *model) mapError(err error) error {
|
|
if apiErr, ok := errors.AsType[genai.APIError](err); ok {
|
|
return &llm.APIError{
|
|
Provider: m.provider.name, Model: m.id,
|
|
Status: apiErr.Code, Code: apiErr.Status, Message: apiErr.Message,
|
|
}
|
|
}
|
|
return fmt.Errorf("google %s: %w", m.qualified(), err)
|
|
}
|
|
|
|
// Generate implements llm.Model.
|
|
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
|
req = req.Apply(opts...)
|
|
if err := m.enforceCapabilities(req); err != nil {
|
|
return nil, err
|
|
}
|
|
client, err := m.provider.genaiClient(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
system, contents, err := m.buildContents(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cfg, err := m.buildConfig(req, system)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := client.Models.GenerateContent(ctx, m.id, contents, cfg)
|
|
if err != nil {
|
|
return nil, m.mapError(err)
|
|
}
|
|
return m.toResponse(resp), nil
|
|
}
|
|
|
|
// toResponse converts an SDK response into the canonical shape.
|
|
func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response {
|
|
out := &llm.Response{Model: m.qualified(), Raw: resp}
|
|
if resp.UsageMetadata != nil {
|
|
out.Usage = llm.Usage{
|
|
InputTokens: int(resp.UsageMetadata.PromptTokenCount),
|
|
OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
|
|
CacheReadTokens: int(resp.UsageMetadata.CachedContentTokenCount),
|
|
ReasoningTokens: int(resp.UsageMetadata.ThoughtsTokenCount),
|
|
}
|
|
}
|
|
if len(resp.Candidates) == 0 {
|
|
out.FinishReason = llm.FinishOther
|
|
return out
|
|
}
|
|
cand := resp.Candidates[0]
|
|
if cand.Content != nil {
|
|
for _, part := range cand.Content.Parts {
|
|
if part == nil {
|
|
continue
|
|
}
|
|
if part.Text != "" && !part.Thought {
|
|
out.Parts = append(out.Parts, llm.Text(part.Text))
|
|
}
|
|
if fc := part.FunctionCall; fc != nil {
|
|
id := fc.ID
|
|
if id == "" {
|
|
id = "call_" + strconv.Itoa(len(out.ToolCalls))
|
|
}
|
|
args, err := json.Marshal(fc.Args)
|
|
if err != nil || len(fc.Args) == 0 {
|
|
args = json.RawMessage("{}")
|
|
}
|
|
out.ToolCalls = append(out.ToolCalls, llm.ToolCall{ID: id, Name: fc.Name, Arguments: args})
|
|
}
|
|
}
|
|
}
|
|
out.FinishReason = mapFinish(cand.FinishReason, len(out.ToolCalls) > 0)
|
|
return out
|
|
}
|
|
|
|
func mapFinish(fr genai.FinishReason, hasToolCalls bool) llm.FinishReason {
|
|
if hasToolCalls {
|
|
return llm.FinishToolCalls
|
|
}
|
|
switch fr {
|
|
case genai.FinishReasonStop, genai.FinishReasonUnspecified, "":
|
|
return llm.FinishStop
|
|
case genai.FinishReasonMaxTokens:
|
|
return llm.FinishLength
|
|
case genai.FinishReasonSafety, genai.FinishReasonRecitation, genai.FinishReasonBlocklist,
|
|
genai.FinishReasonProhibitedContent, genai.FinishReasonSPII, genai.FinishReasonImageSafety:
|
|
return llm.FinishContentFilter
|
|
default:
|
|
return llm.FinishOther
|
|
}
|
|
}
|