// Package google implements majordomo's provider contract for Google's // Gemini models on the official Google Gen AI Go SDK // (google.golang.org/genai, the approved third-party dependency per // ADR-0007; the legacy github.com/google/generative-ai-go SDK is // deprecated and not used). // // Targeted SDK surface (verified against genai v1.59.0 source, June 2026): // Models.GenerateContent / GenerateContentStream (iter.Seq2), Content/Part // with InlineData blobs for images, FunctionDeclaration.ParametersJsonSchema // for raw JSON-schema tools, FunctionCall/FunctionResponse parts for the // tool loop, GenerateContentConfig.ResponseJsonSchema + JSON MIME for // structured output, ThinkingConfig.ThinkingLevel for reasoning effort, and // HTTPOptions.BaseURL + HTTPClient for hermetic tests. package google import ( "context" "encoding/json" "errors" "fmt" "net/http" "os" "strconv" "strings" "sync" "google.golang.org/genai" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // defaultCapabilities reflects the published Gemini API limits (June 2026): // png/jpeg/webp/heic/heif input; inline payloads bounded by a 20MB total // request budget. MaxImagesPerReq is capped at a practical 100 (the // published 3,600-file limit assumes the Files API, which majordomo does // not use). var defaultCapabilities = llm.Capabilities{ SupportsTools: true, SupportsStructured: true, SupportsStreaming: true, MaxImagesPerReq: 100, MaxImageBytes: 15 << 20, AllowedImageMIME: []string{"image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"}, } // Provider is a Gemini provider over the official SDK. type Provider struct { name string apiKey string baseURL string httpClient *http.Client caps llm.Capabilities mu sync.Mutex client *genai.Client } // Option configures the provider. type Option func(*Provider) // WithName overrides the registry name (default "google"). func WithName(name string) Option { return func(p *Provider) { p.name = name } } // WithAPIKey sets the API key (default: GOOGLE_API_KEY, then // GEMINI_API_KEY, matching the SDK's own precedence). func WithAPIKey(key string) Option { return func(p *Provider) { p.apiKey = key } } // WithBaseURL overrides the API endpoint (tests, proxies). func WithBaseURL(u string) Option { return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") } } // WithHTTPClient overrides the HTTP client. func WithHTTPClient(c *http.Client) Option { return func(p *Provider) { p.httpClient = c } } // WithDefaultCapabilities overrides the provider-wide default capabilities. func WithDefaultCapabilities(caps llm.Capabilities) Option { return func(p *Provider) { p.caps = caps } } // New creates the provider. Construction never fails: a missing key // surfaces as an auth error at request time (and chains can fail over). func New(opts ...Option) *Provider { p := &Provider{ name: "google", caps: defaultCapabilities, } if key := os.Getenv("GOOGLE_API_KEY"); key != "" { p.apiKey = key } else if key := os.Getenv("GEMINI_API_KEY"); key != "" { p.apiKey = key } for _, opt := range opts { opt(p) } return p } // Name implements llm.Provider. func (p *Provider) Name() string { return p.name } // Model implements llm.Provider; the id passes through verbatim. func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { cfg := llm.ApplyModelOptions(opts) caps := p.caps if cfg.Capabilities != nil { caps = *cfg.Capabilities } return &model{provider: p, id: id, caps: caps}, nil } // genaiClient builds (once) and returns the SDK client. The SDK's // NewClient does no network I/O for the API-key backend; failures here are // configuration errors, returned per call and retried on the next. func (p *Provider) genaiClient(ctx context.Context) (*genai.Client, error) { p.mu.Lock() defer p.mu.Unlock() if p.client != nil { return p.client, nil } if p.apiKey == "" { return nil, &llm.APIError{ Provider: p.name, Status: http.StatusUnauthorized, Code: "missing_api_key", Message: "no API key configured (set GOOGLE_API_KEY/GEMINI_API_KEY or use WithAPIKey)", } } cc := &genai.ClientConfig{ APIKey: p.apiKey, Backend: genai.BackendGeminiAPI, } if p.baseURL != "" { cc.HTTPOptions = genai.HTTPOptions{BaseURL: p.baseURL} } if p.httpClient != nil { cc.HTTPClient = p.httpClient } client, err := genai.NewClient(ctx, cc) if err != nil { return nil, fmt.Errorf("google: create client: %w", err) } p.client = client return client, nil } type model struct { provider *Provider id string caps llm.Capabilities } func (m *model) Capabilities() llm.Capabilities { return m.caps } func (m *model) qualified() string { return m.provider.name + "/" + m.id } // enforceCapabilities is the provider backstop (ADR-0009); the media layer // normalizes before requests get here. func (m *model) enforceCapabilities(req llm.Request) error { count := 0 for _, msg := range req.Messages { for _, part := range msg.Parts { img, ok := part.(llm.ImagePart) if !ok { continue } count++ if !m.caps.SupportsImages() { return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified()) } if !m.caps.MIMEAllowed(img.MIME) { return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME) } if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes { return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d", llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes) } } } if count > m.caps.MaxImagesPerReq && m.caps.MaxImagesPerReq > 0 { return fmt.Errorf("%w: %d images exceed %s limit of %d", llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq) } if len(req.Tools) > 0 && !m.caps.SupportsTools { return fmt.Errorf("%w: %s does not support tools", llm.ErrUnsupported, m.qualified()) } if len(req.Schema) > 0 && !m.caps.SupportsStructured { return fmt.Errorf("%w: %s does not support structured output", llm.ErrUnsupported, m.qualified()) } return nil } // buildContents maps canonical messages onto SDK contents, and collects // the system prompt (Request.System + folded RoleSystem messages). func (m *model) buildContents(req llm.Request) (string, []*genai.Content, error) { var sys []string if req.System != "" { sys = append(sys, req.System) } var contents []*genai.Content for _, msg := range req.Messages { switch msg.Role { case llm.RoleSystem: if t := msg.Text(); t != "" { sys = append(sys, t) } case llm.RoleTool: parts := make([]*genai.Part, 0, len(msg.ToolResults)) for _, res := range msg.ToolResults { payload := map[string]any{"output": res.Content} if res.IsError { payload = map[string]any{"error": res.Content} } parts = append(parts, &genai.Part{FunctionResponse: &genai.FunctionResponse{ ID: res.ID, Name: res.Name, Response: payload, }}) } contents = append(contents, &genai.Content{Role: genai.RoleUser, Parts: parts}) default: role := genai.RoleUser if msg.Role == llm.RoleAssistant { role = genai.RoleModel } var parts []*genai.Part for _, part := range msg.Parts { switch v := part.(type) { case llm.TextPart: parts = append(parts, genai.NewPartFromText(v.Text)) case llm.ImagePart: parts = append(parts, genai.NewPartFromBytes(v.Data, v.MIME)) } } for _, tc := range msg.ToolCalls { args := map[string]any{} if len(tc.Arguments) > 0 { if err := json.Unmarshal(tc.Arguments, &args); err != nil { return "", nil, fmt.Errorf("google: tool call %q arguments: %w", tc.Name, err) } } parts = append(parts, &genai.Part{FunctionCall: &genai.FunctionCall{ ID: tc.ID, Name: tc.Name, Args: args, }}) } if len(parts) == 0 { continue } contents = append(contents, &genai.Content{Role: role, Parts: parts}) } } return strings.Join(sys, "\n\n"), contents, nil } // buildConfig maps request knobs onto the SDK config. func (m *model) buildConfig(req llm.Request, system string) (*genai.GenerateContentConfig, error) { cfg := &genai.GenerateContentConfig{} if system != "" { cfg.SystemInstruction = genai.NewContentFromText(system, genai.RoleUser) } if req.Temperature != nil { cfg.Temperature = new(float32) *cfg.Temperature = float32(*req.Temperature) } if req.TopP != nil { cfg.TopP = new(float32) *cfg.TopP = float32(*req.TopP) } if req.MaxTokens > 0 { cfg.MaxOutputTokens = int32(req.MaxTokens) } cfg.StopSequences = req.StopSequences if len(req.Tools) > 0 && req.ToolChoice != "none" { decls := make([]*genai.FunctionDeclaration, 0, len(req.Tools)) for _, t := range req.Tools { decl := &genai.FunctionDeclaration{Name: t.Name, Description: t.Description} if len(t.Parameters) > 0 { var schema map[string]any if err := json.Unmarshal(t.Parameters, &schema); err != nil { return nil, fmt.Errorf("google: tool %q parameters: %w", t.Name, err) } decl.ParametersJsonSchema = schema } decls = append(decls, decl) } cfg.Tools = []*genai.Tool{{FunctionDeclarations: decls}} switch req.ToolChoice { case "", "auto": // SDK default. case "required": cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{ Mode: genai.FunctionCallingConfigModeAny, }} default: cfg.ToolConfig = &genai.ToolConfig{FunctionCallingConfig: &genai.FunctionCallingConfig{ Mode: genai.FunctionCallingConfigModeAny, AllowedFunctionNames: []string{req.ToolChoice}, }} } } if len(req.Schema) > 0 { var schema map[string]any if err := json.Unmarshal(req.Schema, &schema); err != nil { return nil, fmt.Errorf("google: output schema: %w", err) } cfg.ResponseJsonSchema = schema cfg.ResponseMIMEType = "application/json" } switch req.ReasoningEffort { case "": case "low": cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelLow} case "medium": cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelMedium} case "high": cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingLevel: genai.ThinkingLevelHigh} default: return nil, fmt.Errorf("google: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort) } return cfg, nil } // mapError converts SDK errors into majordomo's classification shapes. func (m *model) mapError(err error) error { if apiErr, ok := errors.AsType[genai.APIError](err); ok { return &llm.APIError{ Provider: m.provider.name, Model: m.id, Status: apiErr.Code, Code: apiErr.Status, Message: apiErr.Message, } } return fmt.Errorf("google %s: %w", m.qualified(), err) } // Generate implements llm.Model. func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { req = req.Apply(opts...) if err := m.enforceCapabilities(req); err != nil { return nil, err } client, err := m.provider.genaiClient(ctx) if err != nil { return nil, err } system, contents, err := m.buildContents(req) if err != nil { return nil, err } cfg, err := m.buildConfig(req, system) if err != nil { return nil, err } resp, err := client.Models.GenerateContent(ctx, m.id, contents, cfg) if err != nil { return nil, m.mapError(err) } return m.toResponse(resp), nil } // toResponse converts an SDK response into the canonical shape. func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response { out := &llm.Response{Model: m.qualified(), Raw: resp} if resp.UsageMetadata != nil { out.Usage = llm.Usage{ InputTokens: int(resp.UsageMetadata.PromptTokenCount), OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount), } } if len(resp.Candidates) == 0 { out.FinishReason = llm.FinishOther return out } cand := resp.Candidates[0] if cand.Content != nil { for _, part := range cand.Content.Parts { if part == nil { continue } if part.Text != "" && !part.Thought { out.Parts = append(out.Parts, llm.Text(part.Text)) } if fc := part.FunctionCall; fc != nil { id := fc.ID if id == "" { id = "call_" + strconv.Itoa(len(out.ToolCalls)) } args, err := json.Marshal(fc.Args) if err != nil || len(fc.Args) == 0 { args = json.RawMessage("{}") } out.ToolCalls = append(out.ToolCalls, llm.ToolCall{ID: id, Name: fc.Name, Arguments: args}) } } } out.FinishReason = mapFinish(cand.FinishReason, len(out.ToolCalls) > 0) return out } func mapFinish(fr genai.FinishReason, hasToolCalls bool) llm.FinishReason { if hasToolCalls { return llm.FinishToolCalls } switch fr { case genai.FinishReasonStop, genai.FinishReasonUnspecified, "": return llm.FinishStop case genai.FinishReasonMaxTokens: return llm.FinishLength case genai.FinishReasonSafety, genai.FinishReasonRecitation, genai.FinishReasonBlocklist, genai.FinishReasonProhibitedContent, genai.FinishReasonSPII, genai.FinishReasonImageSafety: return llm.FinishContentFilter default: return llm.FinishOther } }