d0bd3ec3d9
executus CI / test (push) Has been cancelled
All 3 cloud models converged on a real access-control bug; fixed it + the other genuine findings (the false-positives were dropped): Security (HIGH — all 3 models): - create_file_url skipped ValidateScope: a same-skill caller could mint a PUBLIC url for a file scoped to another user/run. Now runs ValidateScope (admin-aware), skipped only for the descendant-grant case — mirroring the read tools. Other real fixes: - ValidateScope hard-coded `false` at every call site (admin branch dead) -> pass inv.CallerIsAdmin (the executor sets it via the host AdminPolicy; still false/fail-closed when no admin). Stale "no admin flag" comment corrected. - create_file_url: ExpiresInSeconds clamped BEFORE the *time.Second multiply (huge values overflowed to a negative duration that slipped under the cap, minting already-expired tokens); swallowed json.Marshal error now returned. - RegisterMeta: build the default budget WITH the configured MaxPerRun (was NewInMemorySearchBudget(nil) -> hardcoded 10, ignoring MetaDeps.MaxPerRun). - classify: all-zero scores no longer return a false-positive top-1 winner; coerceClassifyScore uses strconv.ParseFloat (rejects trailing garbage like "50extra" that fmt.Sscanf silently accepted). - file_delete: honor the descendant grant (parent can clean up a worker's artifacts) — was the lone cross-skill-reject-outright file tool. - meta tools: input caps truncate at a UTF-8 rune boundary (truncateUTF8), not mid-rune. - think: removed the dead `var _ = fmt.Errorf` import-keeper; file_save default aligned to 16 MiB (matched RegisterStore). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
244 lines
9.3 KiB
Go
244 lines
9.3 KiB
Go
// Package tools — v12 summarize.
|
|
//
|
|
// One fast-tier LLM call: text in → concise text summary out. Either
|
|
// `text` or `file_id` (mutually exclusive) supplies the source. Per-run
|
|
// budget enforced via the existing v11 SearchBudget surface (kind=
|
|
// "summarize"); per-skill cost accounting via the meta-LLM helper's
|
|
// ledger (skill_llm_meta_calls).
|
|
//
|
|
// Why a dedicated tool (vs reusing summary_summarise): summary_
|
|
// summarise wraps the URL-summary pipeline used by /summary; it's
|
|
// over-coupled to a specific extraction flow. v12's summarize is the
|
|
// "given any text, give me a summary" primitive that downstream tools
|
|
// (read_page → summarize, extract → summarize) can compose freely.
|
|
//
|
|
// File-id input path: when the caller supplies file_id, we dereference
|
|
// via FileStorage. Cross-skill check rejects stolen IDs (matching
|
|
// file_get's pattern). Scope check denies user:bob's file from alice's
|
|
// invocation.
|
|
//
|
|
// Test: summarize_test.go covers happy path (mock helper), file_id
|
|
// input, oversize input truncation, budget exceeded, focus-arg
|
|
// pass-through, cross-skill file_id rejection, and the
|
|
// missing-both-args validation.
|
|
package tools
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/llmmeta"
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// summarizeMaxInputBytes is the hard input cap. Inputs longer than
|
|
// this are truncated with a `truncated=true` flag in the response so
|
|
// the agent knows the summary covers a prefix.
|
|
const summarizeMaxInputBytes = 32 * 1024
|
|
|
|
// summarizeDefaultMaxWords is the default max_words when the caller
|
|
// doesn't supply one. Capped further by skills.summarize.max_words.
|
|
const summarizeDefaultMaxWords = 200
|
|
|
|
// summarizeFallbackMaxWords is the cap used when SummarizeConfig is nil.
|
|
const summarizeFallbackMaxWords = 1000
|
|
|
|
// summarizeFallbackMaxPerRun is the per-run cap used when SummarizeConfig
|
|
// is nil.
|
|
const summarizeFallbackMaxPerRun = 10
|
|
|
|
// SummarizeConfig is the narrow per-run + per-deployment config surface
|
|
// summarize reads at execute time. Production wires a closure over the
|
|
// `skills.summarize.*` convars; nil falls back to package defaults.
|
|
type SummarizeConfig interface {
|
|
MaxPerRun(ctx context.Context) int
|
|
MaxWords(ctx context.Context) int
|
|
}
|
|
|
|
// summarizeArgs is the LLM-facing param struct.
|
|
//
|
|
// Why two source fields (text + file_id) with exactly-one validation:
|
|
// the agent often produces large content via read_page / read_pdf and
|
|
// stores it as a file_id (per the v10 byte-vs-reference principle);
|
|
// forcing it to round-trip through a string would defeat the file_id
|
|
// pattern. Inline `text` is the simpler path for short snippets.
|
|
type summarizeArgs struct {
|
|
Text string `json:"text,omitempty" description:"The text to summarise. Either 'text' OR 'file_id' is required (not both). Capped at 32KB; longer inputs truncate with truncated=true in the result."`
|
|
FileID string `json:"file_id,omitempty" description:"Alternative to 'text': summarise the contents of a saved file (from read_page/read_pdf/file_save). Must belong to this skill."`
|
|
MaxWords int `json:"max_words,omitempty" description:"Maximum word count for the summary. Default 200, capped at skills.summarize.max_words (default 1000)."`
|
|
Focus string `json:"focus,omitempty" description:"Optional: what aspect to emphasise (e.g. 'security implications', 'cost analysis', 'main characters')."`
|
|
}
|
|
|
|
type summarizeResult struct {
|
|
Summary string `json:"summary"`
|
|
WordCount int `json:"word_count"`
|
|
ModelUsed string `json:"model_used"`
|
|
Truncated bool `json:"truncated,omitempty"`
|
|
BudgetMsg string `json:"budget_message,omitempty"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// NewSummarize constructs the summarize tool. helper / cfg / budget /
|
|
// fileStorage may all be nil; the handler surfaces clean errors at
|
|
// first call.
|
|
func NewSummarize(helper *llmmeta.Helper, cfg SummarizeConfig, budget SearchBudget, fileStorage FileStorage) tool.Tool {
|
|
return tool.NewGatedTool[summarizeArgs](
|
|
"summarize",
|
|
"Produce a concise summary of input text using a fast LLM. Pass either 'text' or 'file_id' (one of them is required). Optional 'focus' steers the summary; 'max_words' caps length (default 200). Counts against per-run and 7-day cost budgets.",
|
|
tool.Permission{
|
|
AuthoringRequirement: tool.RequirementAnyone,
|
|
OperatesOn: tool.ScopeCaller,
|
|
SafeForShare: true,
|
|
Categories: []string{"llm-meta", "cost-bearing"},
|
|
},
|
|
func(ctx context.Context, inv tool.Invocation, args summarizeArgs) (string, error) {
|
|
if helper == nil {
|
|
return "", fmt.Errorf("summarize: not configured")
|
|
}
|
|
text, truncated, err := loadSummarizeInput(ctx, inv, args, fileStorage)
|
|
if err != nil {
|
|
return marshalSummarizeResult(summarizeResult{Error: err.Error()}), nil
|
|
}
|
|
|
|
// Per-run budget BEFORE the LLM call so a runaway loop is
|
|
// bounded.
|
|
if budget == nil {
|
|
maxPerRun := summarizeFallbackMaxPerRun
|
|
if cfg != nil {
|
|
maxPerRun = cfg.MaxPerRun(ctx)
|
|
}
|
|
budget = NewInMemorySearchBudget(map[string]int{
|
|
"summarize": maxPerRun,
|
|
})
|
|
}
|
|
count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "summarize")
|
|
if exceeded {
|
|
return marshalSummarizeResult(summarizeResult{
|
|
Error: "summarize_budget_exceeded",
|
|
BudgetMsg: fmt.Sprintf("per-run summarize budget exceeded (%d/%d). Work with the summaries you already have, or ask an admin to raise skills.summarize.max_per_run.", count, max),
|
|
}), nil
|
|
}
|
|
|
|
maxWords := args.MaxWords
|
|
if maxWords <= 0 {
|
|
maxWords = summarizeDefaultMaxWords
|
|
}
|
|
cap := summarizeFallbackMaxWords
|
|
if cfg != nil {
|
|
cap = cfg.MaxWords(ctx)
|
|
}
|
|
if maxWords > cap {
|
|
maxWords = cap
|
|
}
|
|
|
|
systemPrompt := "You produce concise, accurate summaries. Honor the requested word count. Do NOT invent facts."
|
|
userPrompt := buildSummarizePrompt(text, maxWords, args.Focus)
|
|
|
|
res, callErr := helper.Call(ctx, llmmeta.CallSpec{
|
|
Tier: "fast",
|
|
SystemPrompt: systemPrompt,
|
|
UserPrompt: userPrompt,
|
|
MaxOutputTokens: maxWords * 8, // ~8 tokens per word upper bound
|
|
ResponseFormat: "text",
|
|
ToolName: "summarize",
|
|
RunID: inv.RunID,
|
|
SkillID: inv.SkillID,
|
|
CallerID: inv.CallerID,
|
|
})
|
|
if callErr != nil {
|
|
return "", callErr
|
|
}
|
|
if !res.Success || res.Text == "" {
|
|
kind := res.ErrorKind
|
|
if kind == "" {
|
|
kind = "llm_unavailable"
|
|
}
|
|
return marshalSummarizeResult(summarizeResult{Error: kind}), nil
|
|
}
|
|
summary := strings.TrimSpace(res.Text)
|
|
return marshalSummarizeResult(summarizeResult{
|
|
Summary: summary,
|
|
WordCount: countWords(summary),
|
|
ModelUsed: res.ModelUsed,
|
|
Truncated: truncated,
|
|
}), nil
|
|
},
|
|
)
|
|
}
|
|
|
|
// loadSummarizeInput resolves the input text from either args.Text or
|
|
// args.FileID. Exactly one MUST be supplied; both empty AND both
|
|
// populated are rejected.
|
|
func loadSummarizeInput(ctx context.Context, inv tool.Invocation, args summarizeArgs, fileStorage FileStorage) (string, bool, error) {
|
|
hasText := strings.TrimSpace(args.Text) != ""
|
|
hasFile := strings.TrimSpace(args.FileID) != ""
|
|
if hasText == hasFile {
|
|
// Both empty OR both populated.
|
|
if !hasText {
|
|
return "", false, fmt.Errorf("summarize: one of 'text' or 'file_id' is required")
|
|
}
|
|
return "", false, fmt.Errorf("summarize: 'text' and 'file_id' are mutually exclusive — pass one")
|
|
}
|
|
if hasText {
|
|
return capInput(args.Text)
|
|
}
|
|
if fileStorage == nil {
|
|
return "", false, fmt.Errorf("summarize: file_id input requires file storage to be configured")
|
|
}
|
|
meta, content, err := fileStorage.FileGet(ctx, args.FileID)
|
|
if err != nil {
|
|
if errors.Is(err, ErrFileNotFound) {
|
|
return "", false, fmt.Errorf("summarize: file_id not found")
|
|
}
|
|
return "", false, fmt.Errorf("summarize: file fetch: %w", err)
|
|
}
|
|
if meta.SkillID != inv.SkillID {
|
|
return "", false, fmt.Errorf("summarize: file does not belong to this skill")
|
|
}
|
|
if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil {
|
|
return "", false, fmt.Errorf("summarize: %w", err)
|
|
}
|
|
return capInput(string(content))
|
|
}
|
|
|
|
// capInput truncates input to the hard byte cap, returning the
|
|
// (possibly truncated) text and a flag indicating truncation occurred.
|
|
func capInput(text string) (string, bool, error) {
|
|
if len(text) <= summarizeMaxInputBytes {
|
|
return text, false, nil
|
|
}
|
|
return truncateUTF8(text, summarizeMaxInputBytes), true, nil
|
|
}
|
|
|
|
// buildSummarizePrompt composes the user message handed to the LLM.
|
|
func buildSummarizePrompt(text string, maxWords int, focus string) string {
|
|
var sb strings.Builder
|
|
fmt.Fprintf(&sb, "Summarise the following text in at most %d words.", maxWords)
|
|
if focus = strings.TrimSpace(focus); focus != "" {
|
|
fmt.Fprintf(&sb, " Emphasise: %s.", focus)
|
|
}
|
|
sb.WriteString("\n\n")
|
|
sb.WriteString(text)
|
|
return sb.String()
|
|
}
|
|
|
|
// countWords returns a rough word count via whitespace splitting.
|
|
// Good enough for the response's word_count column; the agent might
|
|
// see slight discrepancies vs the LLM's internal counter, which is
|
|
// acceptable.
|
|
func countWords(text string) int {
|
|
return len(strings.Fields(text))
|
|
}
|
|
|
|
// marshalSummarizeResult serialises a summarizeResult to JSON.
|
|
func marshalSummarizeResult(r summarizeResult) string {
|
|
b, err := json.Marshal(r)
|
|
if err != nil {
|
|
return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err)
|
|
}
|
|
return string(b)
|
|
}
|