executus/model/call.go

package model

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"log/slog"
	"runtime/debug"
	"strings"
	"time"

	majordomo "gitea.stevedudenhoeffer.com/steve/majordomo"
	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
	"github.com/google/uuid"
)

// CallResult captures the result of a single tool call execution.
type CallResult struct {
	Name      string
	Arguments string
	Result    string
	Error     error
}

// instrumentedModel decorates a parsed model so every successful Generate
// records token usage to the usage sink automatically. This is the
// single usage chokepoint: ANY call through a model from
// ParseModelRequest / ParseModelForContext is accounted, whether it goes
// through the helpers in this file, the agent loop, or a direct
// model.Generate at a call site.
//
// IMPORTANT: do not call RecordUsage on responses from a parsed model —
// that would double-count. RecordUsage exists for models obtained outside
// this package.
type instrumentedModel struct {
	inner llm.Model
}

func (m *instrumentedModel) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
	resp, err := m.inner.Generate(ctx, req, opts...)
	if err == nil && resp != nil {
		recordUsage(ctx, resp)
	}
	return resp, err
}

func (m *instrumentedModel) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
	return m.inner.Stream(ctx, req, opts...)
}

func (m *instrumentedModel) Capabilities() llm.Capabilities { return m.inner.Capabilities() }

// CallAndExecute sends messages to the model with a toolbox, executes any
// tool calls, and returns the results. It performs a single round of
// generation + tool execution (no looping) — multi-step loops belong to
// the agent package.
func CallAndExecute(ctx context.Context, model llm.Model, systemPrompt string, toolbox *llm.Toolbox, messages []llm.Message, opts ...llm.Option) ([]CallResult, string, error) {
	req := llm.Request{System: systemPrompt, Messages: messages}

	allOpts := make([]llm.Option, 0, len(opts)+1)
	if toolbox != nil {
		allOpts = append(allOpts, llm.WithToolbox(toolbox))
	}
	allOpts = append(allOpts, opts...)

	startTime := time.Now()
	resp, err := model.Generate(ctx, req, allOpts...)
	if err != nil {
		recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, nil, nil, startTime, err)
		return nil, "", fmt.Errorf("completion failed: %w", err)
	}

	if len(resp.ToolCalls) == 0 || toolbox == nil {
		recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, nil, startTime, nil)
		return nil, resp.Text(), nil
	}

	var results []CallResult
	for _, call := range resp.ToolCalls {
		tr := toolbox.Execute(ctx, call)
		cr := CallResult{
			Name:      call.Name,
			Arguments: string(call.Arguments),
			Result:    tr.Content,
		}
		if tr.IsError {
			cr.Error = errors.New(tr.Content)
		}
		results = append(results, cr)
	}

	recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, results, startTime, nil)

	return results, resp.Text(), nil
}

// GenerateWith sends messages to the model with an optional system prompt and
// returns structured output parsed into T. T must be a struct. Uses
// majordomo's native structured output (response schema derived from T).
func GenerateWith[T any](ctx context.Context, model llm.Model, systemPrompt string, messages []llm.Message, opts ...llm.Option) (T, error) {
	req := llm.Request{System: systemPrompt, Messages: messages}

	startTime := time.Now()

	// Capture the raw response so the trace span carries usage and the
	// concrete serving model even though majordomo.Generate only returns T.
	capture := &captureModel{inner: model}
	result, err := majordomo.Generate[T](ctx, capture, req, opts...)

	resolvedModel := resolvedModelName(ctx, capture.resp)

	if tracingEnabled(ctx) {
		span := Span{
			SpanID:       uuid.New().String(),
			TraceID:      traceIDFromContext(ctx),
			Model:        resolvedModel,
			SystemPrompt: systemPrompt,
			Messages:     marshalMessages(messages),
			DurationMs:   time.Since(startTime).Milliseconds(),
			StartedAt:    startTime,
			CompletedAt:  time.Now(),
			CreatedAt:    time.Now(),
		}
		if capture.resp != nil {
			span.InputTokens = capture.resp.Usage.InputTokens
			span.OutputTokens = capture.resp.Usage.OutputTokens
		}
		if err != nil {
			span.Error = err.Error()
			// Structured-output failure: log loudly so operators can chase
			// down a regression (e.g. a model returning prose or fenced
			// JSON the decoder rejects) from the trace span alone. The
			// error string includes the failing field path on decode
			// errors.
			if isStructuredOutputParseError(err) {
				slog.Warn("llms.GenerateWith: structured-output parse failure",
					"model", resolvedModel,
					"span_id", span.SpanID,
					"trace_id", span.TraceID,
					"err", err.Error(),
				)
			}
		} else {
			b, _ := json.Marshal(result)
			span.ResponseText = string(b)
		}
		traceSink.WriteSpan(span)
	} else if err != nil && isStructuredOutputParseError(err) {
		// Tracing disabled: slog.Warn is the only breadcrumb operators get.
		slog.Warn("llms.GenerateWith: structured-output parse failure (no trace span)",
			"model", resolvedModel,
			"err", err.Error(),
		)
	}

	return result, err
}

// captureModel records the last successful response so wrappers that
// only see the decoded result (majordomo.Generate) can still attribute
// usage and tracing.
type captureModel struct {
	inner llm.Model
	resp  *llm.Response
}

func (m *captureModel) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
	resp, err := m.inner.Generate(ctx, req, opts...)
	if err == nil {
		m.resp = resp
	}
	return resp, err
}

func (m *captureModel) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
	return m.inner.Stream(ctx, req, opts...)
}

func (m *captureModel) Capabilities() llm.Capabilities { return m.inner.Capabilities() }

// isStructuredOutputParseError reports whether err looks like a
// structured-output failure from majordomo.Generate — either the decode
// path ("decode structured response") or the empty-response path
// ("structured response from ... is empty"). Used to gate the loud
// slog.Warn so transport errors don't get tagged as parse failures.
func isStructuredOutputParseError(err error) bool {
	if err == nil {
		return false
	}
	s := err.Error()
	return strings.Contains(s, "decode structured response") ||
		strings.Contains(s, "structured response from")
}

// SimpleCall sends a single user message to the model with an optional system
// prompt and returns the text response. No tools involved.
func SimpleCall(ctx context.Context, model llm.Model, systemPrompt string, userMessage string, opts ...llm.Option) (string, error) {
	msgs := []llm.Message{llm.UserText(userMessage)}

	startTime := time.Now()
	resp, err := model.Generate(ctx, llm.Request{System: systemPrompt, Messages: msgs}, opts...)
	if err != nil {
		recordSpanFromWrapper(ctx, systemPrompt, msgs, nil, nil, nil, startTime, err)
		return "", fmt.Errorf("completion failed: %w", err)
	}

	recordSpanFromWrapper(ctx, systemPrompt, msgs, nil, resp, nil, startTime, nil)

	return resp.Text(), nil
}

// RecordUsage records LLM token usage from a successful Generate response.
//
// ONLY call this for models obtained outside this package: models returned
// by ParseModelRequest / ParseModelForContext record usage automatically on
// every Generate, and calling RecordUsage on their responses double-counts.
func RecordUsage(ctx context.Context, resp llm.Response) {
	recordUsage(ctx, &resp)
}

// RecordSpan records a trace span for a direct model.Generate() call.
// Call this from modules that invoke model.Generate() directly when they
// want the call traced (usage is already recorded automatically for
// parsed models).
func RecordSpan(ctx context.Context, systemPrompt string, messages []llm.Message, toolbox *llm.Toolbox, resp *llm.Response, callResults []CallResult, startTime time.Time, callErr error) {
	recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, callResults, startTime, callErr)
}

// recordUsage records token usage for one response. The model is
// attributed from the response itself when possible (resp.Model names
// the chain element that actually served the request — more precise than
// the requested spec), falling back to the context attribution set by
// ParseModelForContext.
func recordUsage(ctx context.Context, resp *llm.Response) {
	if usageSink == nil || resp == nil {
		return
	}
	u := resp.Usage
	if u.InputTokens == 0 && u.OutputTokens == 0 && u.CacheReadTokens == 0 && u.CacheWriteTokens == 0 {
		return
	}
	model := resolvedModelName(ctx, resp)
	if model == "unknown" || model == "" {
		tool := toolFromContext(ctx)
		if tool == "unknown" {
			slog.Warn("model usage: recording with both unknown model and tool",
				"user", userFromContext(ctx), "stack", string(debug.Stack()))
		} else {
			slog.Warn("model usage: recording with unknown model — caller should set model.WithModel or use model.ParseModelForContext",
				"tool", tool, "user", userFromContext(ctx))
		}
	}
	usageSink.Record(ctx, model, u.InputTokens, u.OutputTokens, u.CacheReadTokens, u.CacheWriteTokens)
}

// resolvedModelName picks the usage/trace attribution name: the serving
// model from the response when present ("provider/model" → "model"),
// else the context's requested model resolved through the tier table.
func resolvedModelName(ctx context.Context, resp *llm.Response) string {
	if resp != nil && resp.Model != "" {
		name := resp.Model
		if idx := strings.Index(name, "/"); idx >= 0 {
			name = name[idx+1:]
		}
		return name
	}
	return ResolveModelName(modelFromContext(ctx))
}

// tracingEnabled returns true if there's an active trace and tracing is enabled.
func tracingEnabled(ctx context.Context) bool {
	if traceSink == nil {
		return false
	}
	return traceIDFromContext(ctx) != ""
}

// recordSpanFromWrapper records a trace span if tracing is active.
func recordSpanFromWrapper(ctx context.Context, systemPrompt string, messages []llm.Message, toolbox *llm.Toolbox, resp *llm.Response, callResults []CallResult, startTime time.Time, callErr error) {
	if !tracingEnabled(ctx) {
		return
	}

	now := time.Now()

	span := Span{
		SpanID:          uuid.New().String(),
		TraceID:         traceIDFromContext(ctx),
		Model:           resolvedModelName(ctx, resp),
		SystemPrompt:    systemPrompt,
		Messages:        marshalMessages(messages),
		ToolDefinitions: marshalToolDefs(toolbox),
		DurationMs:      now.Sub(startTime).Milliseconds(),
		StartedAt:       startTime,
		CompletedAt:     now,
		CreatedAt:       now,
	}

	if callErr != nil {
		span.Error = callErr.Error()
	}

	if resp != nil {
		span.ResponseText = resp.Text()
		span.InputTokens = resp.Usage.InputTokens
		span.OutputTokens = resp.Usage.OutputTokens
		if len(resp.ToolCalls) > 0 {
			span.ResponseToolCalls = marshalToolCalls(resp.ToolCalls)
		}
	}

	if len(callResults) > 0 {
		span.ToolResults = marshalCallResults(callResults)
	}

	traceSink.WriteSpan(span)
}

// --- Serialization helpers ---

type jsonMessage struct {
	Role       string `json:"role"`
	Text       string `json:"text,omitempty"`
	ToolCallID string `json:"tool_call_id,omitempty"`
	ImageCount int    `json:"image_count,omitempty"`
}

func marshalMessages(msgs []llm.Message) string {
	out := make([]jsonMessage, 0, len(msgs))
	for _, m := range msgs {
		jm := jsonMessage{
			Role: string(m.Role),
			Text: m.Text(),
		}
		for _, p := range m.Parts {
			if _, ok := p.(llm.ImagePart); ok {
				jm.ImageCount++
			}
		}
		if len(m.ToolResults) > 0 {
			jm.ToolCallID = m.ToolResults[0].ID
		}
		out = append(out, jm)
	}
	b, _ := json.Marshal(out)
	return string(b)
}

type jsonToolCall struct {
	ID        string `json:"id"`
	Name      string `json:"name"`
	Arguments string `json:"arguments"`
}

func marshalToolCalls(calls []llm.ToolCall) string {
	out := make([]jsonToolCall, 0, len(calls))
	for _, c := range calls {
		out = append(out, jsonToolCall{
			ID:        c.ID,
			Name:      c.Name,
			Arguments: string(c.Arguments),
		})
	}
	b, _ := json.Marshal(out)
	return string(b)
}

type jsonCallResult struct {
	Name      string `json:"name"`
	Arguments string `json:"arguments"`
	Result    string `json:"result"`
	Error     string `json:"error,omitempty"`
}

func marshalCallResults(results []CallResult) string {
	out := make([]jsonCallResult, 0, len(results))
	for _, r := range results {
		jr := jsonCallResult{
			Name:      r.Name,
			Arguments: r.Arguments,
			Result:    r.Result,
		}
		if r.Error != nil {
			jr.Error = r.Error.Error()
		}
		out = append(out, jr)
	}
	b, _ := json.Marshal(out)
	return string(b)
}

type jsonToolDef struct {
	Name        string `json:"name"`
	Description string `json:"description"`
}

func marshalToolDefs(tb *llm.Toolbox) string {
	if tb == nil {
		return ""
	}
	tools := tb.Tools()
	if len(tools) == 0 {
		return ""
	}
	out := make([]jsonToolDef, 0, len(tools))
	for _, t := range tools {
		out = append(out, jsonToolDef{
			Name:        t.Name,
			Description: t.Description,
		})
	}
	b, _ := json.Marshal(out)
	return string(b)
}