package ollama import ( "bytes" "context" "encoding/base64" "encoding/json" "fmt" "io" "net/http" "strconv" "strings" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // ---- wire types (field names per ollama api/types.go) ---- type chatRequest struct { Model string `json:"model"` Messages []chatMessage `json:"messages"` Tools []toolDef `json:"tools,omitempty"` Format json.RawMessage `json:"format,omitempty"` Options map[string]any `json:"options,omitempty"` // Stream has no omitempty on purpose: the server default is true, so // Generate must send an explicit false. Stream bool `json:"stream"` // Think is bool-or-string on the wire ("low"/"medium"/"high" or a bool). Think json.RawMessage `json:"think,omitempty"` } type chatMessage struct { Role string `json:"role"` Content string `json:"content"` Images []string `json:"images,omitempty"` // raw base64, no data: prefix ToolCalls []toolCall `json:"tool_calls,omitempty"` ToolName string `json:"tool_name,omitempty"` // on role:"tool" results } type toolDef struct { Type string `json:"type"` Function toolDefFunc `json:"function"` } type toolDefFunc struct { Name string `json:"name"` Description string `json:"description,omitempty"` Parameters json.RawMessage `json:"parameters,omitempty"` } type toolCall struct { ID string `json:"id,omitempty"` Function toolCallFunc `json:"function"` } type toolCallFunc struct { Index int `json:"index,omitempty"` Name string `json:"name"` // Arguments is a JSON OBJECT on the wire (unlike OpenAI's string). Arguments json.RawMessage `json:"arguments"` } type chatResponse struct { Model string `json:"model"` Message respMessage `json:"message"` Done bool `json:"done"` DoneReason string `json:"done_reason"` PromptEvalCount int `json:"prompt_eval_count"` EvalCount int `json:"eval_count"` } type respMessage struct { Role string `json:"role"` Content string `json:"content"` Thinking string `json:"thinking"` ToolCalls []toolCall `json:"tool_calls"` } type errorBody struct { Error string `json:"error"` } // ---- model ---- type model struct { provider *Provider id string caps llm.Capabilities } func (m *model) Capabilities() llm.Capabilities { return m.caps } func (m *model) qualified() string { return m.provider.name + "/" + m.id } // enforceCapabilities is the backstop check (the media layer normalizes // before requests get here; see ADR-0009). func (m *model) enforceCapabilities(req llm.Request) error { count := 0 for _, msg := range req.Messages { for _, part := range msg.Parts { img, ok := part.(llm.ImagePart) if !ok { continue } count++ if !m.caps.SupportsImages() { return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified()) } if !m.caps.MIMEAllowed(img.MIME) { return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME) } if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes { return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d", llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes) } } } if count > 0 && m.caps.MaxImagesPerReq > 0 && count > m.caps.MaxImagesPerReq { return fmt.Errorf("%w: %d images exceed %s limit of %d", llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq) } return nil } // buildRequest maps the canonical request onto the wire shape. func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) { out := &chatRequest{Model: m.id, Stream: stream} // System prompt: dedicated field first, then folded RoleSystem messages. var sys []string if req.System != "" { sys = append(sys, req.System) } for _, msg := range req.Messages { if msg.Role == llm.RoleSystem { if t := msg.Text(); t != "" { sys = append(sys, t) } } } if len(req.Schema) > 0 { // Belt and braces: local Ollama enforces the "format" schema by // constrained decoding, but Ollama Cloud ignores the field // (verified live 2026-06-10) — so the schema is also stated as an // explicit instruction. Harmless where format works, essential // where it doesn't. sys = append(sys, "Respond with a single JSON object that validates against this JSON Schema — no markdown, no code fences, no prose before or after the JSON:\n"+string(req.Schema)) } if len(sys) > 0 { out.Messages = append(out.Messages, chatMessage{ Role: "system", Content: strings.Join(sys, "\n\n"), }) } for _, msg := range req.Messages { switch msg.Role { case llm.RoleSystem: // Already folded above. case llm.RoleTool: for _, res := range msg.ToolResults { content := res.Content if res.IsError { content = "ERROR: " + content } out.Messages = append(out.Messages, chatMessage{ Role: "tool", Content: content, ToolName: res.Name, }) } default: cm := chatMessage{Role: string(msg.Role), Content: msg.Text()} for _, part := range msg.Parts { if img, ok := part.(llm.ImagePart); ok { cm.Images = append(cm.Images, base64.StdEncoding.EncodeToString(img.Data)) } } for _, tc := range msg.ToolCalls { args := tc.Arguments if len(args) == 0 { args = json.RawMessage("{}") } cm.ToolCalls = append(cm.ToolCalls, toolCall{ ID: tc.ID, Function: toolCallFunc{Name: tc.Name, Arguments: args}, }) } out.Messages = append(out.Messages, cm) } } // Tools. Ollama has no tool_choice: "none" maps to omitting the tools; // "required"/named choices have no wire equivalent and are best-effort // ignored (documented in the README support matrix). if req.ToolChoice != "none" { for _, t := range req.Tools { params := t.Parameters if len(params) == 0 { params = json.RawMessage(`{"type":"object","properties":{}}`) } out.Tools = append(out.Tools, toolDef{ Type: "function", Function: toolDefFunc{Name: t.Name, Description: t.Description, Parameters: params}, }) } } if len(req.Schema) > 0 { out.Format = req.Schema } opts := make(map[string]any) if req.Temperature != nil { opts["temperature"] = *req.Temperature } if req.TopP != nil { opts["top_p"] = *req.TopP } if req.MaxTokens > 0 { opts["num_predict"] = req.MaxTokens } if len(req.StopSequences) > 0 { opts["stop"] = req.StopSequences } if len(opts) > 0 { out.Options = opts } switch req.ReasoningEffort { case "": case "low", "medium", "high": out.Think = json.RawMessage(strconv.Quote(req.ReasoningEffort)) default: return nil, fmt.Errorf("ollama: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort) } return out, nil } // do POSTs /api/chat and returns the response body on 2xx, or a classified // error. func (m *model) do(ctx context.Context, wireReq *chatRequest) (*http.Response, error) { p := m.provider if err := p.checkReady(); err != nil { return nil, err } body, err := json.Marshal(wireReq) if err != nil { return nil, fmt.Errorf("ollama: encode request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/chat", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("ollama: build request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") if p.token != "" { httpReq.Header.Set("Authorization", "Bearer "+p.token) } resp, err := p.client.Do(httpReq) if err != nil { return nil, fmt.Errorf("ollama %s: do request: %w", m.qualified(), err) } if resp.StatusCode/100 != 2 { defer resp.Body.Close() raw, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10)) var eb errorBody _ = json.Unmarshal(raw, &eb) msg := eb.Error if msg == "" { msg = strings.TrimSpace(string(raw)) } return nil, &llm.APIError{ Provider: p.name, Model: m.id, Status: resp.StatusCode, Message: msg, } } return resp, nil } // Generate implements llm.Model. func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { req = req.Apply(opts...) if err := m.enforceCapabilities(req); err != nil { return nil, err } wireReq, err := m.buildRequest(req, false) if err != nil { return nil, err } resp, err := m.do(ctx, wireReq) if err != nil { return nil, err } defer resp.Body.Close() var cr chatResponse if err := json.NewDecoder(resp.Body).Decode(&cr); err != nil { return nil, fmt.Errorf("ollama %s: decode response: %w", m.qualified(), err) } return m.toResponse(&cr), nil } // toResponse converts a final wire chunk into the canonical response. func (m *model) toResponse(cr *chatResponse) *llm.Response { out := &llm.Response{ Model: m.qualified(), Usage: llm.Usage{InputTokens: cr.PromptEvalCount, OutputTokens: cr.EvalCount}, Raw: cr, } if cr.Message.Content != "" { out.Parts = append(out.Parts, llm.Text(cr.Message.Content)) } out.ToolCalls = convertToolCalls(cr.Message.ToolCalls) out.FinishReason = finishReason(cr.DoneReason, len(out.ToolCalls) > 0) return out } // convertToolCalls maps wire tool calls, synthesizing ids where the model // omitted them (ids are optional in Ollama's shape but required by our // agent loop to match results to calls). func convertToolCalls(calls []toolCall) []llm.ToolCall { out := make([]llm.ToolCall, 0, len(calls)) for i, tc := range calls { id := tc.ID if id == "" { id = "call_" + strconv.Itoa(i) } args := tc.Function.Arguments if len(args) == 0 { args = json.RawMessage("{}") } out = append(out, llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args}) } if len(out) == 0 { return nil } return out } func finishReason(doneReason string, hasToolCalls bool) llm.FinishReason { if hasToolCalls { return llm.FinishToolCalls } switch doneReason { case "stop", "": return llm.FinishStop case "length": return llm.FinishLength default: return llm.FinishOther } }