go-llm/v2/ollama/native.go

// Package ollama implements the go-llm v2 provider interface for Ollama,
// targeting Ollama's native /api/chat endpoint. Supports both local Ollama
// instances (no API key) and Ollama Cloud (https://ollama.com, requires an
// API key).
package ollama

import (
	"bytes"
	"context"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
)

// DefaultLocalBaseURL is the default base URL for a locally-running Ollama
// instance.
const DefaultLocalBaseURL = "http://localhost:11434"

// DefaultCloudBaseURL is the default base URL for Ollama Cloud.
const DefaultCloudBaseURL = "https://ollama.com"

// Provider implements provider.Provider over Ollama's native /api/chat
// endpoint. An empty apiKey means local-mode (no Authorization header sent);
// a non-empty apiKey is sent as a Bearer token (cloud-mode).
type Provider struct {
	apiKey  string
	baseURL string
	client  *http.Client
}

// newNative constructs a native Ollama provider. Callers should use the
// package-level New() constructor or the v2 llm.Ollama() / llm.OllamaCloud()
// helpers.
func newNative(apiKey, baseURL string) *Provider {
	return &Provider{
		apiKey:  apiKey,
		baseURL: baseURL,
		client:  &http.Client{},
	}
}

// nativeChatRequest is the JSON body POSTed to /api/chat.
type nativeChatRequest struct {
	Model    string              `json:"model"`
	Messages []nativeChatMessage `json:"messages"`
	Tools    []nativeToolDef     `json:"tools,omitempty"`
	Stream   bool                `json:"stream"`
	// Think is polymorphic — Ollama accepts true/false or "low"/"medium"/"high".
	Think   json.RawMessage `json:"think,omitempty"`
	Options map[string]any  `json:"options,omitempty"`
}

// nativeChatMessage is one entry in the messages array on the wire. It also
// carries assistant tool calls and tool-role responses.
type nativeChatMessage struct {
	Role       string           `json:"role"`
	Content    string           `json:"content,omitempty"`
	Images     []string         `json:"images,omitempty"`
	ToolCalls  []nativeToolCall `json:"tool_calls,omitempty"`
	ToolCallID string           `json:"tool_call_id,omitempty"`
	Thinking   string           `json:"thinking,omitempty"`
}

// nativeToolCall mirrors Ollama's tool-call wire shape: a function with name
// and JSON-encoded arguments. Ollama's spec doesn't require an id, but some
// builds and some streaming chunks include one — we accept it on both wire and
// internal sides.
type nativeToolCall struct {
	ID       string             `json:"id,omitempty"`
	Function nativeFunctionCall `json:"function"`
}

type nativeFunctionCall struct {
	Index     *int            `json:"index,omitempty"`
	Name      string          `json:"name,omitempty"`
	Arguments json.RawMessage `json:"arguments,omitempty"`
}

// nativeChatResponse is the JSON body returned from a non-streaming /api/chat
// call (and is also the per-line shape during streaming).
type nativeChatResponse struct {
	Model           string            `json:"model,omitempty"`
	Message         nativeChatMessage `json:"message"`
	Done            bool              `json:"done"`
	DoneReason      string            `json:"done_reason,omitempty"`
	PromptEvalCount int               `json:"prompt_eval_count,omitempty"`
	EvalCount       int               `json:"eval_count,omitempty"`
	TotalDuration   int64             `json:"total_duration,omitempty"`
}

// nativeToolDef is the wire shape of a tool definition sent to Ollama.
type nativeToolDef struct {
	Type     string            `json:"type"`
	Function nativeFunctionDef `json:"function"`
}

type nativeFunctionDef struct {
	Name        string         `json:"name"`
	Description string         `json:"description,omitempty"`
	Parameters  map[string]any `json:"parameters,omitempty"`
}

// encodeThink converts a go-llm Reasoning string ("", "low", "medium",
// "high", or the literal strings "true"/"false") into Ollama's polymorphic
// `think` field. Returns nil for the empty string so the field is omitted.
func encodeThink(reasoning string) json.RawMessage {
	switch reasoning {
	case "":
		return nil
	case "true":
		return json.RawMessage(`true`)
	case "false":
		return json.RawMessage(`false`)
	default:
		// "low" / "medium" / "high" — encode as a JSON string.
		b, _ := json.Marshal(reasoning)
		return b
	}
}

// Complete performs a non-streaming chat completion via /api/chat.
func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) {
	body, err := p.buildChatRequest(req, false)
	if err != nil {
		return provider.Response{}, err
	}

	httpResp, err := p.doChatRequest(ctx, body)
	if err != nil {
		return provider.Response{}, err
	}
	defer httpResp.Body.Close()

	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
		b, _ := io.ReadAll(httpResp.Body)
		return provider.Response{}, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(b))
	}

	var chat nativeChatResponse
	if err := json.NewDecoder(httpResp.Body).Decode(&chat); err != nil {
		return provider.Response{}, fmt.Errorf("ollama: decode response: %w", err)
	}

	resp := provider.Response{
		Text:     chat.Message.Content,
		Thinking: chat.Message.Thinking,
	}
	for i, tc := range chat.Message.ToolCalls {
		resp.ToolCalls = append(resp.ToolCalls, provider.ToolCall{
			ID:        toolCallID(tc, i),
			Name:      tc.Function.Name,
			Arguments: rawMessageToArgString(tc.Function.Arguments),
		})
	}
	if chat.PromptEvalCount > 0 || chat.EvalCount > 0 {
		resp.Usage = &provider.Usage{
			InputTokens:  chat.PromptEvalCount,
			OutputTokens: chat.EvalCount,
			TotalTokens:  chat.PromptEvalCount + chat.EvalCount,
		}
	}
	return resp, nil
}

// Stream performs a streaming chat completion via /api/chat with
// `stream: true`, parsing NDJSON line-by-line.
func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error {
	return fmt.Errorf("ollama native provider: Stream not implemented")
}

// buildChatRequest converts a provider.Request into the native wire body
// JSON. stream toggles the stream flag (true for /api/chat streaming).
func (p *Provider) buildChatRequest(req provider.Request, stream bool) ([]byte, error) {
	wire := nativeChatRequest{
		Model:  req.Model,
		Stream: stream,
		Think:  encodeThink(req.Reasoning),
	}

	for _, msg := range req.Messages {
		m, err := convertMessage(msg)
		if err != nil {
			return nil, err
		}
		wire.Messages = append(wire.Messages, m)
	}

	for _, t := range req.Tools {
		wire.Tools = append(wire.Tools, nativeToolDef{
			Type: "function",
			Function: nativeFunctionDef{
				Name:        t.Name,
				Description: t.Description,
				Parameters:  t.Schema,
			},
		})
	}

	if req.Temperature != nil || req.MaxTokens != nil || req.TopP != nil || len(req.Stop) > 0 {
		wire.Options = map[string]any{}
		if req.Temperature != nil {
			wire.Options["temperature"] = *req.Temperature
		}
		if req.TopP != nil {
			wire.Options["top_p"] = *req.TopP
		}
		if req.MaxTokens != nil {
			wire.Options["num_predict"] = *req.MaxTokens
		}
		if len(req.Stop) > 0 {
			wire.Options["stop"] = req.Stop
		}
	}

	return json.Marshal(wire)
}

// doChatRequest POSTs the wire body to /api/chat and returns the raw HTTP
// response. The caller is responsible for closing the response body.
func (p *Provider) doChatRequest(ctx context.Context, body []byte) (*http.Response, error) {
	url := strings.TrimRight(p.baseURL, "/") + "/api/chat"
	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("ollama: build request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")
	if p.apiKey != "" {
		httpReq.Header.Set("Authorization", "Bearer "+p.apiKey)
	}
	resp, err := p.client.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("ollama: HTTP request: %w", err)
	}
	return resp, nil
}

// convertMessage maps a provider.Message into a native wire message.
func convertMessage(msg provider.Message) (nativeChatMessage, error) {
	out := nativeChatMessage{
		Role:       msg.Role,
		Content:    msg.Content,
		ToolCallID: msg.ToolCallID,
	}

	for _, img := range msg.Images {
		b64, err := imageToBase64(img)
		if err != nil {
			return nativeChatMessage{}, err
		}
		if b64 != "" {
			out.Images = append(out.Images, b64)
		}
	}

	for i, tc := range msg.ToolCalls {
		raw := json.RawMessage(strings.TrimSpace(tc.Arguments))
		if len(raw) == 0 {
			raw = json.RawMessage(`{}`)
		}
		// Preserve a stable index so streaming peers can correlate deltas.
		idx := i
		out.ToolCalls = append(out.ToolCalls, nativeToolCall{
			ID: tc.ID,
			Function: nativeFunctionCall{
				Index:     &idx,
				Name:      tc.Name,
				Arguments: raw,
			},
		})
	}

	return out, nil
}

// imageToBase64 returns the base64-encoded payload of an image, fetching
// URL-only images over HTTP if no inline base64 is supplied.
func imageToBase64(img provider.Image) (string, error) {
	if img.Base64 != "" {
		return img.Base64, nil
	}
	if img.URL == "" {
		return "", nil
	}
	resp, err := http.Get(img.URL)
	if err != nil {
		return "", fmt.Errorf("ollama: fetch image %q: %w", img.URL, err)
	}
	defer resp.Body.Close()
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		return "", fmt.Errorf("ollama: fetch image %q: HTTP %d", img.URL, resp.StatusCode)
	}
	data, err := io.ReadAll(resp.Body)
	if err != nil {
		return "", fmt.Errorf("ollama: read image %q: %w", img.URL, err)
	}
	return base64.StdEncoding.EncodeToString(data), nil
}

// rawMessageToArgString converts a JSON-encoded arguments value into the
// string form the provider package uses for ToolCall.Arguments. Object/array
// values pass through verbatim; bare string values (some Ollama builds emit
// pre-stringified arguments) are unwrapped.
func rawMessageToArgString(raw json.RawMessage) string {
	if len(raw) == 0 {
		return "{}"
	}
	trimmed := strings.TrimSpace(string(raw))
	if len(trimmed) == 0 {
		return "{}"
	}
	if trimmed[0] == '"' {
		var s string
		if err := json.Unmarshal([]byte(trimmed), &s); err == nil {
			return s
		}
	}
	return trimmed
}

// toolCallID returns a stable identifier for a tool call. Ollama's native
// API typically does not include an id, so we synthesize one from the index
// when missing.
func toolCallID(tc nativeToolCall, index int) string {
	if tc.ID != "" {
		return tc.ID
	}
	if tc.Function.Index != nil {
		return fmt.Sprintf("tc_%d", *tc.Function.Index)
	}
	return fmt.Sprintf("tc_%d", index)
}