From 130c2bdfab073239c4550a933cca23c927359ddb Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 26 Jun 2026 20:36:32 -0400 Subject: [PATCH] P2: move compactor -> compact/ + step instrumentation -> run/steps.go - compact/compactor.go: the per-run stateful context compactor (token-threshold gate, fast-tier middle summarisation, fold memory) lifted from mort's skillexec/compactor.go. Self-contained; its only dependency is a ModelResolver func (model.ParseModelForContext satisfies it) + a token threshold. - run/steps.go: the step-emission/instrumentation (stepEmitter, tool->kind/ summary mapping with redaction, Result.Steps accumulation) from agentexec, repointed onto executus/tool. Both build green. With the run-loop mechanics, RunnableAgent DTO, run.Ports, compactor, and step instrumentation now all in place, the remaining P2 work is the run.Executor itself (wiring these + majordomo's agent loop), which makes executus runnable. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 5 +- compact/compactor.go | 331 +++++++++++++++++++++++++++++++++++ run/steps.go | 407 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 741 insertions(+), 2 deletions(-) create mode 100644 compact/compactor.go create mode 100644 run/steps.go diff --git a/CLAUDE.md b/CLAUDE.md index 2c38b9d..06ee6ed 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -45,7 +45,8 @@ CORE (majordomo + stdlib): identity/ caller identity seams [P0 ✓] run/ run-loop mechanics + RunnableAgent DTO + [P2 wip] nil-safe run.Ports (Audit/Budget/Critic/ - Checkpointer/PaletteSource) defined; the + Checkpointer/PaletteSource) + step + instrumentation (steps.go) done; the agentexec+skillexec -> run.Executor MERGE (consuming Ports) is the remaining P2 work [P2] dispatchguard/ loop/depth/fan-out caps [P0 ✓] @@ -55,7 +56,7 @@ CORE (majordomo + stdlib): (convar->config.Source; UsageSink/TraceSink seams; GenerateWith[T] structured output — no separate structured/ pkg) llmmeta/ shared meta-LLM helper over model/ [P1 ✓] - compact/ context compactor (WithCompactor hook) [P2] + compact/ context compactor (WithCompactor hook) [P2 ✓] tools/{web,net,store,compose,meta,comms} generic tools [P3] BATTERIES (opt-in siblings, each nil-safe + a default): diff --git a/compact/compactor.go b/compact/compactor.go new file mode 100644 index 0000000..0749ada --- /dev/null +++ b/compact/compactor.go @@ -0,0 +1,331 @@ +// V15.2 context compactor (re-based on majordomo). +// +// Why: the agent loop accumulates tool results indefinitely. A +// research-heavy run with many web_search / read_page / http_get +// results easily crosses 200K tokens and trips the model's HTTP-400 +// "prompt too long" rejection mid-run (observed at 410K tokens on +// qwen3-coder:480b which has a 262K cap). majordomo's agent loop calls +// the compactor with the full message slice before every model call; +// the compactor returns a shorter slice that preserves the system +// prompt + recent messages, with the middle range replaced by a +// synthetic summary. +// +// Strategy (unchanged from the agentkit era): +// - Keep any leading system message verbatim. (Under majordomo the +// system prompt normally travels in Request.System, not in the +// message slice, so this is defensive.) +// - Keep the last KeepRecent messages verbatim. This ensures the +// agent has fresh tool state to act on; compacting too aggressively +// would strip the in-flight context it needs to make the next +// decision. +// - Compress the middle range via a single fast-tier LLM call that +// receives the middle messages as raw text and produces a paragraph +// summary (URLs visited, key findings, file_ids created, what the +// agent is trying to accomplish). +// - Replace the middle range with one synthetic user-role message +// containing the summary. (user-role chosen because tool-result-role +// would be ambiguous without a matching tool_call_id.) +// +// What moved in the majordomo conversion: +// - The token-threshold gate lives HERE now. agentkit estimated +// tokens and only invoked the compactor past a configured +// threshold; majordomo's hook fires before every model call, so +// the threshold check (estimateTokens vs the per-run threshold the +// executor computes from the model's context limit) is the +// compactor's first step. +// - The compactor is per-run STATEFUL: majordomo does not replace +// the loop's internal transcript with the compacted slice (the +// hook shapes only what is SENT), so without memory the middle +// would be re-summarised from scratch on every step past the +// threshold. The state remembers how far the transcript has been +// folded into the running summary and folds the previous summary +// into the next one instead of re-paying for it. +// +// Failure path: any error (LLM unavailable, malformed response, etc.) +// is returned to the agent loop, which treats compactor errors as +// non-fatal and sends the original slice — if the next model call hits +// the provider's limit, the existing HTTP-400 error path takes over. + +package compact + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// ModelResolver resolves a tier/spec to a usable llm.Model (and an enriched +// context for usage attribution). model.ParseModelForContext satisfies it. +type ModelResolver func(ctx context.Context, tier string) (context.Context, llm.Model, error) + +// Compactor is the per-run compaction hook handed to the agent loop +// (matches the signature agent.WithCompactor expects). +type Compactor = func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error) + +// CompactionEvent describes one fired compaction so the executor can +// log it to skill_run_logs ("compaction_fired"). +type CompactionEvent struct { + // MessagesBefore/After count the messages that would have been + // sent without/with this compaction. + MessagesBefore int + MessagesAfter int + // TokensBefore/After are the estimateTokens values for the same + // two slices. + TokensBefore int + TokensAfter int +} + +// CompactorFactory mints a fresh per-run Compactor bound to a token +// threshold. onFire (nil-safe) observes every compaction that actually +// fires. A non-positive threshold yields a pass-through compactor. +type CompactorFactory func(thresholdTokens int, onFire func(CompactionEvent)) Compactor + +// CompactorConfig controls the v15.2 compactor's behaviour. Construct +// in mort.go from convars + the executor's ModelResolver. +type CompactorConfig struct { + // Models resolves a model spec (typically "fast" or a specific tier) + // to an llm.Model. Required; nil disables compaction. + Models ModelResolver + + // SummarizerTier is the model tier used for the compression LLM call. + // Production default "fast"; an admin may set "haiku" or a specific + // model spec. Empty falls back to "fast". + SummarizerTier string + + // KeepRecent is the number of trailing messages preserved verbatim. + // Default 8. Lower values compact more aggressively (the next loop + // iteration sees less recent context); higher values keep more. + KeepRecent int + + // SummaryWordCap bounds the LLM-generated summary length. Default + // 200 words ≈ 800 chars ≈ 200 tokens — small enough that the + // compaction always shrinks the slice meaningfully. + SummaryWordCap int +} + +// NewCompactor returns a CompactorFactory implementing the middle-range +// summarisation strategy. nil cfg.Models returns a factory of no-op +// compactors that always return the input unchanged (degrades to +// v15.1 behaviour). +func NewCompactor(cfg CompactorConfig) CompactorFactory { + if cfg.Models == nil { + return func(int, func(CompactionEvent)) Compactor { + return func(_ context.Context, msgs []llm.Message) ([]llm.Message, error) { + return msgs, nil + } + } + } + if cfg.SummarizerTier == "" { + cfg.SummarizerTier = "fast" + } + if cfg.KeepRecent <= 0 { + cfg.KeepRecent = 8 + } + if cfg.SummaryWordCap <= 0 { + cfg.SummaryWordCap = 200 + } + return func(threshold int, onFire func(CompactionEvent)) Compactor { + st := &compactionState{} + return func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error) { + return compactIfNeeded(ctx, cfg, st, threshold, onFire, msgs) + } + } +} + +// compactionState is the per-run fold memory: msgs[:prefixEnd] +// (excluding a leading system message) are represented by summaryText +// in the rendered slice. Guarded by mu for safety although the agent +// loop invokes the hook from a single goroutine. +type compactionState struct { + mu sync.Mutex + prefixEnd int + summaryText string +} + +// compactIfNeeded is the workhorse: render the transcript with any +// existing fold applied, check the threshold, and fold more of the +// middle into the summary when the rendered size still exceeds it. +func compactIfNeeded(ctx context.Context, cfg CompactorConfig, st *compactionState, + threshold int, onFire func(CompactionEvent), msgs []llm.Message) ([]llm.Message, error) { + st.mu.Lock() + defer st.mu.Unlock() + + rendered := renderCompacted(st, msgs) + if threshold <= 0 { + return rendered, nil + } + tokensBefore := estimateTokens(rendered) + if tokensBefore < threshold { + return rendered, nil + } + + // Determine the new middle range to fold: everything between what + // is already summarised (or the optional leading system message) + // and the KeepRecent tail. + startMiddle := st.prefixEnd + if startMiddle == 0 && len(msgs) > 0 && msgs[0].Role == llm.RoleSystem { + startMiddle = 1 + } + endMiddle := len(msgs) - cfg.KeepRecent + if endMiddle <= startMiddle { + // Nothing new to fold (the tail alone exceeds the threshold). + // Return the rendered slice; the model call may still succeed. + return rendered, nil + } + middle := msgs[startMiddle:endMiddle] + + summary, err := summariseMiddle(ctx, cfg, st.summaryText, middle) + if err != nil { + // Non-fatal upstream: the agent loop sends the original slice. + return rendered, fmt.Errorf("compactor: summarise middle: %w", err) + } + st.summaryText = summary + st.prefixEnd = endMiddle + + out := renderCompacted(st, msgs) + if onFire != nil { + onFire(CompactionEvent{ + MessagesBefore: len(rendered), + MessagesAfter: len(out), + TokensBefore: tokensBefore, + TokensAfter: estimateTokens(out), + }) + } + return out, nil +} + +// renderCompacted applies the fold state to msgs: [optional system] + +// [synthetic summary] + msgs[prefixEnd:]. With no fold yet, msgs is +// returned unchanged. +func renderCompacted(st *compactionState, msgs []llm.Message) []llm.Message { + if st.prefixEnd <= 0 || st.prefixEnd > len(msgs) { + return msgs + } + tail := msgs[st.prefixEnd:] + out := make([]llm.Message, 0, len(tail)+2) + if msgs[0].Role == llm.RoleSystem { + out = append(out, msgs[0]) + } + out = append(out, llm.UserText( + "[CONTEXT COMPACTED] The earlier portion of this conversation was summarised "+ + "to fit the model's context window. Summary:\n\n"+st.summaryText+ + "\n\nResume from the recent messages below.")) + out = append(out, tail...) + return out +} + +// summariseMiddle composes a "compress this transcript" prompt and +// fires one fast-tier LLM call. prevSummary (may be empty) is the +// running summary from earlier compactions; it is folded into the new +// summary so prior context is not lost. +func summariseMiddle(ctx context.Context, cfg CompactorConfig, prevSummary string, middle []llm.Message) (string, error) { + if len(middle) == 0 { + return "", errors.New("compactor: empty middle range") + } + modelCtx, model, err := cfg.Models(ctx, cfg.SummarizerTier) + if err != nil { + return "", fmt.Errorf("compactor: resolve summarizer model %q: %w", cfg.SummarizerTier, err) + } + if model == nil { + return "", errors.New("compactor: summarizer model resolved to nil") + } + if modelCtx != nil { + ctx = modelCtx + } + + var prior string + if strings.TrimSpace(prevSummary) != "" { + prior = "AN EARLIER PORTION WAS ALREADY SUMMARISED AS:\n" + prevSummary + + "\n\nFold that summary into your new one — its facts must survive.\n\n" + } + transcript := renderTranscript(middle) + prompt := fmt.Sprintf( + "You are compressing an in-flight agent's conversation transcript so the agent "+ + "can continue working without blowing its model context. The transcript below is "+ + "a sequence of tool calls and their results. Produce a single paragraph (under %d words) "+ + "that captures:\n"+ + " - WHAT the agent has been trying to accomplish.\n"+ + " - WHICH URLs were visited / fetched (list inline, comma-separated).\n"+ + " - KEY findings or decisions (factual results the agent will need later).\n"+ + " - ANY file_ids or KV keys the agent created — these are persistent state references the agent must keep.\n"+ + " - ANY errors or dead-ends that the agent should not re-try.\n"+ + "DO NOT include verbose HTTP headers, tool-call metadata, error stack traces, or repetitive content. "+ + "DO NOT add commentary or markdown headers. Output prose only.\n\n"+ + "%sTRANSCRIPT TO COMPRESS:\n%s", + cfg.SummaryWordCap, + prior, + transcript, + ) + + resp, err := model.Generate(ctx, llm.Request{Messages: []llm.Message{llm.UserText(prompt)}}) + if err != nil { + return "", fmt.Errorf("compactor: summarise LLM call: %w", err) + } + text := strings.TrimSpace(resp.Text()) + if text == "" { + return "", errors.New("compactor: summarizer returned empty text") + } + return text, nil +} + +// estimateTokens is the chars/4 heuristic over a message slice's text, +// tool calls, and tool results. Images count a flat ~1K tokens each. +// It intentionally matches the coarse estimator the old agentkit loop +// used — the 0.7 threshold ratio provides the safety margin. +func estimateTokens(msgs []llm.Message) int { + chars := 0 + for _, m := range msgs { + for _, p := range m.Parts { + switch v := p.(type) { + case llm.TextPart: + chars += len(v.Text) + case llm.ImagePart: + chars += 4096 + } + } + for _, tc := range m.ToolCalls { + chars += len(tc.Name) + len(tc.Arguments) + } + for _, tr := range m.ToolResults { + chars += len(tr.Content) + } + } + return chars / 4 +} + +// transcriptMessageCap bounds individual message bodies at ~2KB so a +// single ultra-long tool result can't dominate the prompt sent to the +// summarizer. +const transcriptMessageCap = 2048 + +// renderTranscript flattens a message slice to a plain-text transcript +// suitable for the summarisation prompt. Tool calls show name + args, +// tool results show name + body. Empty fields are skipped. +func renderTranscript(msgs []llm.Message) string { + var sb strings.Builder + for i, m := range msgs { + fmt.Fprintf(&sb, "---\n[%d] role=%s\n", i+1, m.Role) + if text := m.Text(); text != "" { + sb.WriteString(truncate(text, transcriptMessageCap)) + sb.WriteString("\n") + } + for _, tc := range m.ToolCalls { + fmt.Fprintf(&sb, "tool_call name=%s args=%s\n", tc.Name, truncate(string(tc.Arguments), transcriptMessageCap)) + } + for _, tr := range m.ToolResults { + fmt.Fprintf(&sb, "tool_result name=%s body=%s\n", tr.Name, truncate(tr.Content, transcriptMessageCap)) + } + } + return sb.String() +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "...(truncated)" +} diff --git a/run/steps.go b/run/steps.go new file mode 100644 index 0000000..670e09a --- /dev/null +++ b/run/steps.go @@ -0,0 +1,407 @@ +package run + +// steps.go — the per-run step emitter and the tool→step presentation +// mapping. This is the single place that turns the executor's two loop +// chokepoints (the pre-dispatch tool hook + the post-step observer in +// executor.go) into ordered tool.Step records: one per tool call, +// each with a stable id, an open-vocabulary kind, and a human +// present-tense summary that flips running→complete/error. +// +// One source feeds two consumers (mirroring the OnEvent/OnToolEvent/ +// PostRunResult pattern): the live tool.Invocation.OnStep callback +// (nil-safe) AND snapshot(), which the executor copies onto Result.Steps. +// Because the Result accumulation does not depend on OnStep being set, +// every surface — chat (JSON + SSE), Discord, cron, sub-agents — carries +// steps; OnStep is needed only for live streaming. + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// stepSummaryMaxLen caps the human summary length (section G size cap). +// Detail is unused in v1 (no live detail source while replies are +// generated blocking) so there is no Detail cap yet. +const stepSummaryMaxLen = 200 + +// stepEmitter accumulates ordered steps for one run and fires the live +// OnStep callback. +// +// Concurrency: touched ONLY from the agent-loop goroutine. Both call +// sites (the hookToolbox `before` closure and the stepObserver) run +// there; majordomo executes a step's tool calls sequentially, and +// sub-agents build their own Invocation so they never reach this +// emitter. Same single-goroutine contract as the audit Writer and the +// critic ProgressRecorder — no internal lock. +type stepEmitter struct { + onStep func(ctx context.Context, ev tool.StepEvent) + now func() time.Time + + seq int + steps []tool.Step // ordered; the snapshot for Result.Steps + byID map[string]int // step id -> index into steps + pending map[string][]string // correlation key -> queued running ids (FIFO) +} + +// newStepEmitter returns an emitter that forwards to onStep (nil-safe). +func newStepEmitter(onStep func(ctx context.Context, ev tool.StepEvent)) *stepEmitter { + return &stepEmitter{ + onStep: onStep, + now: time.Now, + byID: map[string]int{}, + pending: map[string][]string{}, + } +} + +// corrKey correlates a "start" (name + raw args, no call id available at +// the pre-dispatch hook) with its later "end" (the stepObserver has the +// full call incl. id + the same raw args). +func corrKey(name string, args json.RawMessage) string { + return name + "\x00" + string(args) +} + +// toolStart records + emits the "start" of a tool call. Called from the +// pre-dispatch hookToolbox closure, before the tool runs. +func (e *stepEmitter) toolStart(ctx context.Context, name string, args json.RawMessage) { + if e == nil { + return + } + step := e.newStep(name, args) + key := corrKey(name, args) + e.pending[key] = append(e.pending[key], step.ID) + e.fire(ctx, "start", step) +} + +// toolEnd records + emits the terminal "end" of a tool call. Called from +// the stepObserver for each completed tool call. If no matching start was +// seen (e.g. a tool with a nil handler the pre-dispatch hook skipped), a +// start is synthesized so the step still appears. +func (e *stepEmitter) toolEnd(ctx context.Context, call llm.ToolCall, result string, isError bool) { + if e == nil { + return + } + id := e.matchPending(call.Name, call.Arguments) + if id == "" { + id = e.newStep(call.Name, call.Arguments).ID + } + idx, ok := e.byID[id] + if !ok { + return + } + step := &e.steps[idx] + end := e.now() + step.EndedAt = &end + if isError { + step.Status = "error" + } else { + step.Status = "complete" + } + if s := summaryForEnd(call.Name, call.Arguments, result, isError); s != "" { + step.Summary = s + } + e.fire(ctx, "end", *step) +} + +// newStep mints + appends a running step and returns it (by value). +func (e *stepEmitter) newStep(name string, args json.RawMessage) tool.Step { + e.seq++ + step := tool.Step{ + ID: fmt.Sprintf("s%d", e.seq), + Kind: kindForTool(name), + Title: name, + Summary: summaryForStart(name, args), + Status: "running", + StartedAt: e.now(), + } + e.byID[step.ID] = len(e.steps) + e.steps = append(e.steps, step) + return step +} + +// matchPending pops the oldest running step id for (name, args). Falls +// back to the most recent still-running step of the same tool name when +// the args don't byte-match between start and end. Returns "" on no match. +func (e *stepEmitter) matchPending(name string, args json.RawMessage) string { + key := corrKey(name, args) + if q := e.pending[key]; len(q) > 0 { + id := q[0] + if len(q) == 1 { + delete(e.pending, key) + } else { + e.pending[key] = q[1:] + } + return id + } + for i := len(e.steps) - 1; i >= 0; i-- { + if e.steps[i].Title == name && e.steps[i].Status == "running" { + return e.steps[i].ID + } + } + return "" +} + +func (e *stepEmitter) fire(ctx context.Context, phase string, step tool.Step) { + if e.onStep == nil { + return + } + e.onStep(ctx, tool.StepEvent{Phase: phase, Step: step}) +} + +// snapshot returns a copy of the ordered, deduplicated step set for the +// run Result. A step still "running" at run end (e.g. the run was +// cancelled mid-tool-call) is reported as-is. +func (e *stepEmitter) snapshot() []tool.Step { + if e == nil || len(e.steps) == 0 { + return nil + } + out := make([]tool.Step, len(e.steps)) + copy(out, e.steps) + return out +} + +// kindForTool maps a tool name to an open-vocabulary step kind. Unknown +// tools fall back to "tool" — never an error, just a generic step (the +// client maps unknown kinds to a default icon). Loosely tracks the +// catalog in pkg/skilltools/CLAUDE.md. +func kindForTool(name string) string { + switch name { + case "web_search", "search_reddit", "wikipedia_summary": + return "search" + case "read_page", "read_pdf", "read_reddit", "read_video", "verify_url", + "summary_summarise", "summarize", "file_get_text", "file_get_metadata", + "http_get", "http_post", "http_get_stream", "http_stream_read": + return "read" + case "code_exec", "calculate": + return "code" + case "file_save", "file_get", "file_list", "file_delete", "file_search": + return "file" + case "kv_get", "kv_set", "kv_list", "kv_delete", + "remember", "recall", "chatbot_get_memories": + return "memory" + case "query", "query_research", "deepresearch", "animate", + "agent_invoke", "agent_invoke_parallel", "agent_spawn", + "agent_spawn_parallel", "skill_invoke", "skill_invoke_parallel": + return "delegate" + case "think": + return "thinking" + default: + switch { + case strings.HasPrefix(name, "image") || strings.Contains(name, "draw"): + return "image" + default: + return "tool" + } + } +} + +// summaryForStart builds the human present-tense running summary. It +// derives specifics from safe arg fields only; secret-bearing tools +// (mcp_call, email_send, http_*) are summarized without echoing args. +func summaryForStart(name string, args json.RawMessage) string { + var s string + switch name { + case "web_search": + if q := argString(args, "query", "q"); q != "" { + s = fmt.Sprintf("Searching the web for %q", q) + } else { + s = "Searching the web" + } + case "search_reddit": + if q := argString(args, "query", "q"); q != "" { + s = fmt.Sprintf("Searching Reddit for %q", q) + } else { + s = "Searching Reddit" + } + case "wikipedia_summary": + if q := argString(args, "query", "title"); q != "" { + s = fmt.Sprintf("Looking up %q on Wikipedia", q) + } else { + s = "Looking up Wikipedia" + } + case "read_page", "read_pdf", "read_reddit", "read_video", "verify_url": + if u := argString(args, "url", "post", "page"); u != "" { + s = "Reading " + hostOf(u) + } else { + s = "Reading a page" + } + case "http_get", "http_post", "http_get_stream": + // Show host only — a full URL can embed credentials/tokens. + if u := argString(args, "url"); u != "" { + s = "Fetching " + hostOf(u) + } else { + s = "Making an HTTP request" + } + case "summary_summarise", "summarize": + s = "Summarizing text" + case "translate": + if lang := argString(args, "target_lang", "target_language", "lang"); lang != "" { + s = "Translating to " + lang + } else { + s = "Translating text" + } + case "code_exec": + s = "Running code" + case "calculate": + if q := argString(args, "query", "expression", "expr"); q != "" { + s = "Calculating " + truncateStep(q, 60) + } else { + s = "Calculating" + } + case "remember": + // Never echo the stored value. + s = "Saving a memory" + case "recall", "chatbot_get_memories": + s = "Recalling memories" + case "kv_get", "kv_list": + s = "Reading saved data" + case "kv_set": + s = "Saving data" + case "kv_delete": + s = "Deleting saved data" + case "file_save": + if n := argString(args, "name", "filename"); n != "" { + s = "Saving file " + truncateStep(n, 60) + } else { + s = "Saving a file" + } + case "file_get", "file_get_text", "file_get_metadata": + s = "Reading a file" + case "file_list", "file_search": + s = "Listing files" + case "query", "query_research": + if q := argString(args, "query", "question", "prompt", "task"); q != "" { + s = "Researching " + truncateStep(q, 80) + } else { + s = "Researching" + } + case "deepresearch": + s = "Running deep research" + case "animate": + s = "Generating an animation" + case "agent_invoke", "agent_spawn": + if a := argString(args, "agent", "agent_name", "name"); a != "" { + s = "Delegating to " + a + } else { + s = "Delegating to a sub-agent" + } + case "agent_invoke_parallel", "agent_spawn_parallel": + s = "Delegating to sub-agents" + case "skill_invoke": + if sk := argString(args, "skill_name", "skill", "name"); sk != "" { + s = "Running skill " + sk + } else { + s = "Running a skill" + } + case "skill_invoke_parallel": + s = "Running skills in parallel" + case "think": + s = "Thinking" + case "mcp_call": + // Redact: MCP args frequently carry secrets. Name server/tool only. + srv, tl := argString(args, "server"), argString(args, "tool") + switch { + case srv != "" && tl != "": + s = fmt.Sprintf("Calling %s/%s", srv, tl) + case srv != "": + s = "Calling " + srv + default: + s = "Calling an MCP tool" + } + case "email_send": + // Redact recipients + body. + s = "Sending an email" + default: + s = "Using " + name + } + return truncateStep(s, stepSummaryMaxLen) +} + +// summaryForEnd optionally upgrades the summary to a cheap result phrase. +// Returns "" to keep the running summary (the caller then just flips the +// status). Never returns a phrase derived from raw result bytes. +func summaryForEnd(name string, _ json.RawMessage, result string, isError bool) string { + if isError { + return "" + } + switch name { + case "web_search", "search_reddit": + if n := countResults(result); n >= 0 { + return fmt.Sprintf("Found %d result%s", n, plural(n)) + } + } + return "" +} + +// argString pulls the first present non-empty string field from a tool's +// raw JSON args, trying keys in order. Returns "" when none parse. +func argString(args json.RawMessage, keys ...string) string { + if len(args) == 0 { + return "" + } + var m map[string]any + if err := json.Unmarshal(args, &m); err != nil { + return "" + } + for _, k := range keys { + if v, ok := m[k]; ok { + if s, ok := v.(string); ok && strings.TrimSpace(s) != "" { + return strings.TrimSpace(s) + } + } + } + return "" +} + +// countResults parses a v11-style {"results":[...]} envelope and returns +// the count, or -1 when the shape doesn't match. +func countResults(result string) int { + if strings.TrimSpace(result) == "" { + return -1 + } + var env struct { + Results []json.RawMessage `json:"results"` + } + if err := json.Unmarshal([]byte(result), &env); err != nil || env.Results == nil { + return -1 + } + return len(env.Results) +} + +// hostOf returns the bare host (no leading www.) of a URL, or a short +// form of the raw string when it doesn't parse as a URL. +func hostOf(raw string) string { + if u, err := url.Parse(raw); err == nil && u.Host != "" { + return strings.TrimPrefix(u.Host, "www.") + } + return truncateStep(raw, 60) +} + +// truncateStep rune-safely caps s to max, appending an ellipsis when cut. +func truncateStep(s string, max int) string { + if max <= 0 { + return "" + } + r := []rune(s) + if len(r) <= max { + return s + } + if max == 1 { + return string(r[:1]) + } + return string(r[:max-1]) + "…" +} + +func plural(n int) string { + if n == 1 { + return "" + } + return "s" +}