package model import ( "context" "encoding/json" "errors" "fmt" "log/slog" "runtime/debug" "strings" "time" majordomo "gitea.stevedudenhoeffer.com/steve/majordomo" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "github.com/google/uuid" ) // CallResult captures the result of a single tool call execution. type CallResult struct { Name string Arguments string Result string Error error } // instrumentedModel decorates a parsed model so every successful Generate // records token usage to the usage sink automatically. This is the // single usage chokepoint: ANY call through a model from // ParseModelRequest / ParseModelForContext is accounted, whether it goes // through the helpers in this file, the agent loop, or a direct // model.Generate at a call site. // // IMPORTANT: do not call RecordUsage on responses from a parsed model — // that would double-count. RecordUsage exists for models obtained outside // this package. type instrumentedModel struct { inner llm.Model } func (m *instrumentedModel) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { resp, err := m.inner.Generate(ctx, req, opts...) if err == nil && resp != nil { recordUsage(ctx, resp) } return resp, err } func (m *instrumentedModel) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { return m.inner.Stream(ctx, req, opts...) } func (m *instrumentedModel) Capabilities() llm.Capabilities { return m.inner.Capabilities() } // CallAndExecute sends messages to the model with a toolbox, executes any // tool calls, and returns the results. It performs a single round of // generation + tool execution (no looping) — multi-step loops belong to // the agent package. func CallAndExecute(ctx context.Context, model llm.Model, systemPrompt string, toolbox *llm.Toolbox, messages []llm.Message, opts ...llm.Option) ([]CallResult, string, error) { req := llm.Request{System: systemPrompt, Messages: messages} allOpts := make([]llm.Option, 0, len(opts)+1) if toolbox != nil { allOpts = append(allOpts, llm.WithToolbox(toolbox)) } allOpts = append(allOpts, opts...) startTime := time.Now() resp, err := model.Generate(ctx, req, allOpts...) if err != nil { recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, nil, nil, startTime, err) return nil, "", fmt.Errorf("completion failed: %w", err) } if len(resp.ToolCalls) == 0 || toolbox == nil { recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, nil, startTime, nil) return nil, resp.Text(), nil } var results []CallResult for _, call := range resp.ToolCalls { tr := toolbox.Execute(ctx, call) cr := CallResult{ Name: call.Name, Arguments: string(call.Arguments), Result: tr.Content, } if tr.IsError { cr.Error = errors.New(tr.Content) } results = append(results, cr) } recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, results, startTime, nil) return results, resp.Text(), nil } // GenerateWith sends messages to the model with an optional system prompt and // returns structured output parsed into T. T must be a struct. Uses // majordomo's native structured output (response schema derived from T). func GenerateWith[T any](ctx context.Context, model llm.Model, systemPrompt string, messages []llm.Message, opts ...llm.Option) (T, error) { req := llm.Request{System: systemPrompt, Messages: messages} startTime := time.Now() // Capture the raw response so the trace span carries usage and the // concrete serving model even though majordomo.Generate only returns T. capture := &captureModel{inner: model} result, err := majordomo.Generate[T](ctx, capture, req, opts...) resolvedModel := resolvedModelName(ctx, capture.resp) if tracingEnabled(ctx) { span := Span{ SpanID: uuid.New().String(), TraceID: traceIDFromContext(ctx), Model: resolvedModel, SystemPrompt: systemPrompt, Messages: marshalMessages(messages), DurationMs: time.Since(startTime).Milliseconds(), StartedAt: startTime, CompletedAt: time.Now(), CreatedAt: time.Now(), } if capture.resp != nil { span.InputTokens = capture.resp.Usage.InputTokens span.OutputTokens = capture.resp.Usage.OutputTokens } if err != nil { span.Error = err.Error() // Structured-output failure: log loudly so operators can chase // down a regression (e.g. a model returning prose or fenced // JSON the decoder rejects) from the trace span alone. The // error string includes the failing field path on decode // errors. if isStructuredOutputParseError(err) { slog.Warn("llms.GenerateWith: structured-output parse failure", "model", resolvedModel, "span_id", span.SpanID, "trace_id", span.TraceID, "err", err.Error(), ) } } else { b, _ := json.Marshal(result) span.ResponseText = string(b) } traceSink.WriteSpan(span) } else if err != nil && isStructuredOutputParseError(err) { // Tracing disabled: slog.Warn is the only breadcrumb operators get. slog.Warn("llms.GenerateWith: structured-output parse failure (no trace span)", "model", resolvedModel, "err", err.Error(), ) } return result, err } // captureModel records the last successful response so wrappers that // only see the decoded result (majordomo.Generate) can still attribute // usage and tracing. type captureModel struct { inner llm.Model resp *llm.Response } func (m *captureModel) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { resp, err := m.inner.Generate(ctx, req, opts...) if err == nil { m.resp = resp } return resp, err } func (m *captureModel) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { return m.inner.Stream(ctx, req, opts...) } func (m *captureModel) Capabilities() llm.Capabilities { return m.inner.Capabilities() } // isStructuredOutputParseError reports whether err looks like a // structured-output failure from majordomo.Generate — either the decode // path ("decode structured response") or the empty-response path // ("structured response from ... is empty"). Used to gate the loud // slog.Warn so transport errors don't get tagged as parse failures. func isStructuredOutputParseError(err error) bool { if err == nil { return false } s := err.Error() return strings.Contains(s, "decode structured response") || strings.Contains(s, "structured response from") } // SimpleCall sends a single user message to the model with an optional system // prompt and returns the text response. No tools involved. func SimpleCall(ctx context.Context, model llm.Model, systemPrompt string, userMessage string, opts ...llm.Option) (string, error) { msgs := []llm.Message{llm.UserText(userMessage)} startTime := time.Now() resp, err := model.Generate(ctx, llm.Request{System: systemPrompt, Messages: msgs}, opts...) if err != nil { recordSpanFromWrapper(ctx, systemPrompt, msgs, nil, nil, nil, startTime, err) return "", fmt.Errorf("completion failed: %w", err) } recordSpanFromWrapper(ctx, systemPrompt, msgs, nil, resp, nil, startTime, nil) return resp.Text(), nil } // RecordUsage records LLM token usage from a successful Generate response. // // ONLY call this for models obtained outside this package: models returned // by ParseModelRequest / ParseModelForContext record usage automatically on // every Generate, and calling RecordUsage on their responses double-counts. func RecordUsage(ctx context.Context, resp llm.Response) { recordUsage(ctx, &resp) } // RecordSpan records a trace span for a direct model.Generate() call. // Call this from modules that invoke model.Generate() directly when they // want the call traced (usage is already recorded automatically for // parsed models). func RecordSpan(ctx context.Context, systemPrompt string, messages []llm.Message, toolbox *llm.Toolbox, resp *llm.Response, callResults []CallResult, startTime time.Time, callErr error) { recordSpanFromWrapper(ctx, systemPrompt, messages, toolbox, resp, callResults, startTime, callErr) } // recordUsage records token usage for one response. The model is // attributed from the response itself when possible (resp.Model names // the chain element that actually served the request — more precise than // the requested spec), falling back to the context attribution set by // ParseModelForContext. func recordUsage(ctx context.Context, resp *llm.Response) { if usageSink == nil || resp == nil { return } u := resp.Usage if u.InputTokens == 0 && u.OutputTokens == 0 && u.CacheReadTokens == 0 && u.CacheWriteTokens == 0 { return } model := resolvedModelName(ctx, resp) if model == "unknown" || model == "" { tool := toolFromContext(ctx) if tool == "unknown" { slog.Warn("model usage: recording with both unknown model and tool", "user", userFromContext(ctx), "stack", string(debug.Stack())) } else { slog.Warn("model usage: recording with unknown model — caller should set model.WithModel or use model.ParseModelForContext", "tool", tool, "user", userFromContext(ctx)) } } usageSink.Record(ctx, model, u.InputTokens, u.OutputTokens, u.CacheReadTokens, u.CacheWriteTokens) } // resolvedModelName picks the usage/trace attribution name: the serving // model from the response when present ("provider/model" → "model"), // else the context's requested model resolved through the tier table. func resolvedModelName(ctx context.Context, resp *llm.Response) string { if resp != nil && resp.Model != "" { name := resp.Model if idx := strings.Index(name, "/"); idx >= 0 { name = name[idx+1:] } return name } return ResolveModelName(modelFromContext(ctx)) } // tracingEnabled returns true if there's an active trace and tracing is enabled. func tracingEnabled(ctx context.Context) bool { if traceSink == nil { return false } return traceIDFromContext(ctx) != "" } // recordSpanFromWrapper records a trace span if tracing is active. func recordSpanFromWrapper(ctx context.Context, systemPrompt string, messages []llm.Message, toolbox *llm.Toolbox, resp *llm.Response, callResults []CallResult, startTime time.Time, callErr error) { if !tracingEnabled(ctx) { return } now := time.Now() span := Span{ SpanID: uuid.New().String(), TraceID: traceIDFromContext(ctx), Model: resolvedModelName(ctx, resp), SystemPrompt: systemPrompt, Messages: marshalMessages(messages), ToolDefinitions: marshalToolDefs(toolbox), DurationMs: now.Sub(startTime).Milliseconds(), StartedAt: startTime, CompletedAt: now, CreatedAt: now, } if callErr != nil { span.Error = callErr.Error() } if resp != nil { span.ResponseText = resp.Text() span.InputTokens = resp.Usage.InputTokens span.OutputTokens = resp.Usage.OutputTokens if len(resp.ToolCalls) > 0 { span.ResponseToolCalls = marshalToolCalls(resp.ToolCalls) } } if len(callResults) > 0 { span.ToolResults = marshalCallResults(callResults) } traceSink.WriteSpan(span) } // --- Serialization helpers --- type jsonMessage struct { Role string `json:"role"` Text string `json:"text,omitempty"` ToolCallID string `json:"tool_call_id,omitempty"` ImageCount int `json:"image_count,omitempty"` } func marshalMessages(msgs []llm.Message) string { out := make([]jsonMessage, 0, len(msgs)) for _, m := range msgs { jm := jsonMessage{ Role: string(m.Role), Text: m.Text(), } for _, p := range m.Parts { if _, ok := p.(llm.ImagePart); ok { jm.ImageCount++ } } if len(m.ToolResults) > 0 { jm.ToolCallID = m.ToolResults[0].ID } out = append(out, jm) } b, _ := json.Marshal(out) return string(b) } type jsonToolCall struct { ID string `json:"id"` Name string `json:"name"` Arguments string `json:"arguments"` } func marshalToolCalls(calls []llm.ToolCall) string { out := make([]jsonToolCall, 0, len(calls)) for _, c := range calls { out = append(out, jsonToolCall{ ID: c.ID, Name: c.Name, Arguments: string(c.Arguments), }) } b, _ := json.Marshal(out) return string(b) } type jsonCallResult struct { Name string `json:"name"` Arguments string `json:"arguments"` Result string `json:"result"` Error string `json:"error,omitempty"` } func marshalCallResults(results []CallResult) string { out := make([]jsonCallResult, 0, len(results)) for _, r := range results { jr := jsonCallResult{ Name: r.Name, Arguments: r.Arguments, Result: r.Result, } if r.Error != nil { jr.Error = r.Error.Error() } out = append(out, jr) } b, _ := json.Marshal(out) return string(b) } type jsonToolDef struct { Name string `json:"name"` Description string `json:"description"` } func marshalToolDefs(tb *llm.Toolbox) string { if tb == nil { return "" } tools := tb.Tools() if len(tools) == 0 { return "" } out := make([]jsonToolDef, 0, len(tools)) for _, t := range tools { out = append(out, jsonToolDef{ Name: t.Name, Description: t.Description, }) } b, _ := json.Marshal(out) return string(b) }