diff --git a/CLAUDE.md b/CLAUDE.md index 86a7be9..0c92127 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,7 +58,14 @@ CORE (majordomo + stdlib): structured output — no separate structured/ pkg) llmmeta/ shared meta-LLM helper over model/ [P1 ✓] compact/ context compactor (WithCompactor hook) [P2 ✓] - tools/{web,net,store,compose,meta,comms} generic tools [P3] + tools/ generic tool library: Register (think/now/ [P3 wip] + cite, zero-config) + RegisterMeta (classify/ + extract_entities/summarize) + RegisterStore + (kv_*/file_*, default static quota); seams in + research_providers.go/file_storage.go/ + kv_storage.go/quota_provider.go. End-to-end + "agent calls a tool" test green. Remaining: + web/net/compose groups + default backends [P3] BATTERIES (opt-in siblings, each nil-safe + a default): persona/ Agent noun + AgentStore seam + yml loader [P4] diff --git a/tools/cite.go b/tools/cite.go new file mode 100644 index 0000000..53e567e --- /dev/null +++ b/tools/cite.go @@ -0,0 +1,128 @@ +// Package tools — v11 cite. +// +// Anti-hallucination forcing function. The convention: agents call +// cite(claim, url) for every numbered reference in their final +// answer. The tool verifies the URL appears in the run's +// touched-URL set (populated by web_search results + +// read_page/read_pdf/read_video). If yes → write to +// skill_run_sources, return {ok: true}. If no → return +// {ok: false, reason: "url_not_in_run_history"} and DO NOT write. +// +// Skills authored without this discipline don't lose anything; +// skills WITH it produce more reliable citations. The webui +// renders the skill_run_sources rows as a Sources panel on the +// run trace page — invisible to skills that don't use cite(). +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// citeParams is the LLM-facing param struct. +type citeParams struct { + Claim string `json:"claim" description:"The claim or fact you are asserting (e.g. 'Mort was published in 1987')."` + URL string `json:"url" description:"The URL that supports the claim. MUST be a URL the agent has previously read via read_page/read_pdf/read_video or seen as a web_search result."` +} + +// citeResponse is the JSON envelope returned to the agent. +// +// On success: ok=true, the skill_run_sources row was written. +// On failure: ok=false, reason=. +type citeResponse struct { + OK bool `json:"ok"` + Reason string `json:"reason,omitempty"` + Claim string `json:"claim,omitempty"` + URL string `json:"url,omitempty"` +} + +// NewCite constructs the v11 cite tool. cs may be nil — handler +// returns "not configured" at first call. +// +// The "anyone author / share-safe" permission shape matches every +// other v11 research-class tool. Skills that adopt cite() get the +// Sources panel automatically; skills that don't are unaffected. +func NewCite(cs CitationStorage) tool.Tool { + return tool.NewGatedTool[citeParams]( + "cite", + "Record a citation: a claim + the URL that supports it. The URL MUST be one the agent has actually fetched via read_page/read_pdf/read_video or seen as a web_search result — citing a URL the agent never visited is rejected with reason 'url_not_in_run_history'. Successful citations populate the run's Sources panel.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeGlobal, + SafeForShare: true, + Categories: []string{"citation"}, + }, + func(ctx context.Context, inv tool.Invocation, p citeParams) (string, error) { + if cs == nil { + return "", fmt.Errorf("cite: citation storage not configured") + } + claim := strings.TrimSpace(p.Claim) + urlStr := strings.TrimSpace(p.URL) + if claim == "" { + return marshalCite(citeResponse{ + OK: false, + Reason: "claim_empty", + }), nil + } + if urlStr == "" { + return marshalCite(citeResponse{ + OK: false, + Reason: "url_empty", + }), nil + } + if inv.RunID == "" { + // No run id → cite() can't verify history. Bail loud. + return marshalCite(citeResponse{ + OK: false, + Reason: "no_run_context", + Claim: claim, + URL: urlStr, + }), nil + } + + touched, err := cs.GetTouchedURLs(ctx, inv.RunID) + if err != nil { + return marshalCite(citeResponse{ + OK: false, + Reason: fmt.Sprintf("touched_lookup_failed: %v", err), + Claim: claim, + URL: urlStr, + }), nil + } + if _, ok := touched[urlStr]; !ok { + return marshalCite(citeResponse{ + OK: false, + Reason: "url_not_in_run_history", + Claim: claim, + URL: urlStr, + }), nil + } + + if err := cs.RecordCitation(ctx, inv.RunID, urlStr, claim); err != nil { + return marshalCite(citeResponse{ + OK: false, + Reason: fmt.Sprintf("record_failed: %v", err), + Claim: claim, + URL: urlStr, + }), nil + } + return marshalCite(citeResponse{ + OK: true, + Claim: claim, + URL: urlStr, + }), nil + }, + ) +} + +func marshalCite(r citeResponse) string { + b, err := json.Marshal(r) + if err != nil { + return fmt.Sprintf(`{"ok":false,"reason":"marshal_failed: %v"}`, err) + } + return string(b) +} diff --git a/tools/classify.go b/tools/classify.go new file mode 100644 index 0000000..95f1f3a --- /dev/null +++ b/tools/classify.go @@ -0,0 +1,327 @@ +// Package tools — v12 classify. +// +// Classification primitive: text + categories → labels + per-category +// scores. Single-label mode (default) returns the top-1 category; +// multi-label mode returns every category whose score crosses the +// threshold. +// +// Why a dedicated tool (vs reusing extract_entities for one-of-N +// classification): classification has a typed result (labels[] + +// scores{}) that downstream agents consume programmatically. Folding +// it into extract_entities would force every author to re-spec the +// scoring schema. +// +// Score normalisation: the LLM's reply is normalised so each score +// lands in [0, 1]. The single-label result returns scores for ALL +// categories so the author can read the distribution; multi-label +// returns labels[] of categories above 0.5. +// +// Test: classify_test.go covers single-label, multi-label, score +// normalisation, > 20 categories rejected, unknown category in the +// reply silently dropped. +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/llmmeta" + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// classifyMaxInputBytes is the input cap. +const classifyMaxInputBytes = 16 * 1024 + +// classifyMaxCategories is the hard cap on category count. +const classifyMaxCategories = 20 + +// classifyMultiLabelThreshold is the score threshold above which a +// category appears in the labels[] array in multi-label mode. +const classifyMultiLabelThreshold = 0.5 + +// classifyFallbackMaxPerRun is the per-run cap when ClassifyConfig is +// nil. +const classifyFallbackMaxPerRun = 20 + +// ClassifyConfig is the narrow per-deployment config surface. +type ClassifyConfig interface { + MaxPerRun(ctx context.Context) int +} + +// classifyArgs is the LLM-facing param struct. +type classifyArgs struct { + Text string `json:"text" description:"The text to classify. Required. Capped at 16KB."` + Categories []string `json:"categories" description:"List of categories to score the text against. Required. Max 20."` + MultiLabel bool `json:"multi_label,omitempty" description:"When true, returns every category scoring above 0.5. Default false → single-label (top-1) result."` +} + +type classifyResult struct { + Labels []string `json:"labels,omitempty"` + Scores map[string]float64 `json:"scores,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + RawReply string `json:"raw_reply,omitempty"` + Error string `json:"error,omitempty"` + BudgetMsg string `json:"budget_message,omitempty"` +} + +// NewClassify constructs the classify tool. +func NewClassify(helper *llmmeta.Helper, cfg ClassifyConfig, budget SearchBudget) tool.Tool { + return tool.NewGatedTool[classifyArgs]( + "classify", + "Classify text into one of N categories (or multiple via multi_label=true). Returns labels[] (top-1 by default) + scores{category: 0..1}. Counts against per-run and 7-day cost budgets.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"llm-meta", "cost-bearing"}, + }, + func(ctx context.Context, inv tool.Invocation, args classifyArgs) (string, error) { + if helper == nil { + return "", fmt.Errorf("classify: not configured") + } + text := args.Text + if strings.TrimSpace(text) == "" { + return marshalClassifyResult(classifyResult{Error: "text is empty"}), nil + } + if len(args.Categories) == 0 { + return marshalClassifyResult(classifyResult{Error: "categories is empty"}), nil + } + if len(args.Categories) > classifyMaxCategories { + return marshalClassifyResult(classifyResult{ + Error: fmt.Sprintf("too many categories (%d > %d)", len(args.Categories), classifyMaxCategories), + }), nil + } + // Trim + dedupe categories so the LLM sees a clean + // schema. Order is preserved for the prompt; the result + // map is order-agnostic. + categories := make([]string, 0, len(args.Categories)) + seen := make(map[string]bool, len(args.Categories)) + for _, c := range args.Categories { + c = strings.TrimSpace(c) + if c == "" || seen[c] { + continue + } + seen[c] = true + categories = append(categories, c) + } + if len(categories) == 0 { + return marshalClassifyResult(classifyResult{Error: "categories has no non-empty entries"}), nil + } + + if len(text) > classifyMaxInputBytes { + text = truncateUTF8(text, classifyMaxInputBytes) + } + + // Per-run budget gate. + if budget == nil { + maxPerRun := classifyFallbackMaxPerRun + if cfg != nil { + maxPerRun = cfg.MaxPerRun(ctx) + } + budget = NewInMemorySearchBudget(map[string]int{ + "classify": maxPerRun, + }) + } + count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "classify") + if exceeded { + return marshalClassifyResult(classifyResult{ + Error: "classify_budget_exceeded", + BudgetMsg: fmt.Sprintf("per-run classify budget exceeded (%d/%d). Ask an admin to raise skills.classify.max_per_run.", count, max), + }), nil + } + + systemPrompt := "You classify text into a fixed set of categories. Return ONLY JSON. Score each category in [0,1] (1 = perfect fit). Sum of all scores does NOT need to be 1 — high overlap across categories is allowed." + userPrompt := buildClassifyPrompt(text, categories, args.MultiLabel) + + res, callErr := helper.Call(ctx, llmmeta.CallSpec{ + Tier: "fast", + SystemPrompt: systemPrompt, + UserPrompt: userPrompt, + MaxOutputTokens: 2048, + ResponseFormat: "json", + RetryOnMalformedJSON: true, + ToolName: "classify", + RunID: inv.RunID, + SkillID: inv.SkillID, + CallerID: inv.CallerID, + }) + if callErr != nil { + return "", callErr + } + if !res.Success { + kind := res.ErrorKind + if kind == "" { + kind = "llm_unavailable" + } + return marshalClassifyResult(classifyResult{Error: kind}), nil + } + if res.ErrorKind == llmmeta.ErrorKindMalformedJSON || res.Parsed == nil { + return marshalClassifyResult(classifyResult{ + Error: "classification_failed", + RawReply: res.Text, + ModelUsed: res.ModelUsed, + }), nil + } + + parsedMap, ok := res.Parsed.(map[string]any) + if !ok { + return marshalClassifyResult(classifyResult{ + Error: "classification_failed_not_object", + RawReply: res.Text, + ModelUsed: res.ModelUsed, + }), nil + } + + scores := normaliseClassifyScores(parsedMap, categories) + labels := selectClassifyLabels(scores, categories, args.MultiLabel) + + return marshalClassifyResult(classifyResult{ + Labels: labels, + Scores: scores, + ModelUsed: res.ModelUsed, + }), nil + }, + ) +} + +// buildClassifyPrompt composes the user message. +func buildClassifyPrompt(text string, categories []string, multiLabel bool) string { + var sb strings.Builder + sb.WriteString("Classify the text below.\n\nCategories:\n") + for _, c := range categories { + sb.WriteString("- ") + sb.WriteString(c) + sb.WriteString("\n") + } + sb.WriteString("\nText:\n") + sb.WriteString(text) + sb.WriteString("\n\nReturn ONLY a JSON object: {\"scores\": {\"\": <0..1 float>, ...}}.") + if multiLabel { + sb.WriteString(" The same text may score high in MULTIPLE categories — score each independently.") + } else { + sb.WriteString(" Score each category; the highest-scoring one will be the chosen label.") + } + return sb.String() +} + +// normaliseClassifyScores extracts the scores map from the LLM's +// reply and clamps each value into [0, 1]. Categories absent from the +// reply default to 0. +// +// Why we accept either {"scores": {...}} or {...}: some models reply +// with the inner object directly, dropping the wrapping key. Both +// shapes are valid as long as the keys match the requested category +// names. +func normaliseClassifyScores(parsed map[string]any, categories []string) map[string]float64 { + scoresIn, ok := parsed["scores"].(map[string]any) + if !ok { + // Accept the bare-map shape too. + scoresIn = parsed + } + out := make(map[string]float64, len(categories)) + for _, c := range categories { + v, has := scoresIn[c] + if !has { + out[c] = 0 + continue + } + f, ok := coerceClassifyScore(v) + if !ok { + out[c] = 0 + continue + } + // Clamp into [0, 1]. + if f < 0 { + f = 0 + } + if f > 1 { + f = 1 + } + out[c] = f + } + return out +} + +// coerceClassifyScore reads a JSON value as a float in [0, 1]. Accepts +// floats, ints, and percent-strings ("85%" → 0.85). +func coerceClassifyScore(raw any) (float64, bool) { + switch v := raw.(type) { + case float64: + return v, true + case int: + return float64(v), true + case int64: + return float64(v), true + case string: + trimmed := strings.TrimSpace(v) + hasPct := strings.HasSuffix(trimmed, "%") + s := strings.TrimSuffix(trimmed, "%") + // strconv.ParseFloat (unlike fmt.Sscanf %f) rejects trailing garbage, + // so "50extra" / "0.5x" are refused instead of silently parsed as 50/0.5. + f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) + if err == nil { + if hasPct { + f = f / 100.0 + } + return f, true + } + } + return 0, false +} + +// selectClassifyLabels picks the labels to surface. Single-label mode +// returns the highest-scoring category. Multi-label returns every +// category above the threshold (sorted by score desc for stable +// rendering). +func selectClassifyLabels(scores map[string]float64, categories []string, multiLabel bool) []string { + if multiLabel { + var labels []string + for _, c := range categories { + if scores[c] >= classifyMultiLabelThreshold { + labels = append(labels, c) + } + } + // Sort labels by score desc, then category-list order for ties. + sortClassifyLabelsByScore(labels, scores) + return labels + } + // Single-label: top-1. + bestCat := "" + bestScore := -1.0 + for _, c := range categories { + if scores[c] > bestScore { + bestScore = scores[c] + bestCat = c + } + } + // No category fit: an all-zero score set must not yield a false-positive + // top-1 (the first category trivially beats the -1.0 sentinel). Returning + // no label keeps "nothing matched" distinguishable from "category A won". + if bestCat == "" || bestScore <= 0 { + return nil + } + return []string{bestCat} +} + +// sortClassifyLabelsByScore sorts labels desc by score. Stable on +// ties (preserves category-list order). +func sortClassifyLabelsByScore(labels []string, scores map[string]float64) { + for i := 1; i < len(labels); i++ { + j := i + for j > 0 && scores[labels[j]] > scores[labels[j-1]] { + labels[j], labels[j-1] = labels[j-1], labels[j] + j-- + } + } +} + +func marshalClassifyResult(r classifyResult) string { + b, err := json.Marshal(r) + if err != nil { + return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err) + } + return string(b) +} diff --git a/tools/create_file_url.go b/tools/create_file_url.go new file mode 100644 index 0000000..3762f2b --- /dev/null +++ b/tools/create_file_url.go @@ -0,0 +1,222 @@ +// create_file_url mints a public-token URL (mort.sh/files/) +// that resolves to a saved file_id. Use it for artifacts that are too +// large for Discord (>25 MiB), need a stable link to share outside +// Discord, or where the recipient is not in mort's auth domain. +// +// Why a separate tool (vs always returning a URL from file_save): +// most files are private working state — only some need a public URL, +// and minting one is a deliberate act. Decoupling save from +// publication keeps the storage layer cheap (no token row per file) +// and the audit clean (you can grep skill_file_tokens for "who +// published what"). +// +// Cycle-break: this tool can't import pkg/logic/skills directly +// (pkg/logic/skills imports pkg/skilltools). The narrow interface +// FileTokenMinter is declared here; mort.go bridges to +// *skills.System.Storage() at wiring time. +package tools + +import ( + "context" + "crypto/rand" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// FileToken is the wire-shape of the storage row that backs the +// public /files/ URL. Mirrors pkg/logic/skills.FileToken +// field-for-field; the adapter in mort.go is a struct copy. +// +// Why mirror (vs import skills.FileToken): same cycle constraint as +// FileDomainMeta / KVDomainEntry — the tool layer cannot import +// pkg/logic/skills. +type FileToken struct { + Token string + FileID string + SkillID string + CallerID string + CreatedAt time.Time + ExpiresAt *time.Time + MaxViews *int + Views int +} + +// FileTokenMinter is the narrow interface the create_file_url tool +// needs to persist a new token. Production wires to +// *skills.gormStorage via a thin adapter in mort.go. +type FileTokenMinter interface { + SaveFileToken(ctx context.Context, t FileToken) error +} + +// Caps for create_file_url. Public so tests can assert against them. +const ( + // DefaultFileURLExpiry is the default lifetime applied when the + // caller doesn't supply expires_in_seconds. + DefaultFileURLExpiry = 24 * time.Hour + // MaxFileURLExpiry is the per-tool hard cap. 30 days is generous + // enough for "share this report with someone" without becoming + // effectively-permanent. Operators can lower via the + // SkillFileURLConfigProvider; this is the floor below which the + // admin gate doesn't apply. + MaxFileURLExpiry = 30 * 24 * time.Hour + // MaxFileURLViews is the per-tool hard cap on max_views. 1000 is + // the largest value an LLM might plausibly set; anything beyond + // is "unlimited" semantically and the caller should leave the + // field absent. + MaxFileURLViews = 1000 +) + +type createFileURLArgs struct { + FileID string `json:"file_id" description:"file_id previously saved by this skill (from file_save, code_exec, etc)."` + ExpiresInSeconds int `json:"expires_in_seconds,omitempty" description:"How long the URL stays valid in seconds. Default 86400 (24h). Max 2592000 (30 days)."` + MaxViews int `json:"max_views,omitempty" description:"Optional cap on the number of times the URL can be fetched. Max 1000. Omit (or 0) for unlimited within the lifetime."` +} + +type createFileURLResult struct { + URL string `json:"url"` + Token string `json:"token"` + ExpiresAt string `json:"expires_at,omitempty"` // RFC3339 + MaxViews int `json:"max_views,omitempty"` + Note string `json:"note,omitempty"` +} + +// NewCreateFileURL constructs the create_file_url tool. nil minter → +// "not configured" at execute time; nil fileStorage same. baseURL is +// the public site (e.g. "https://mort.sh"); the path "/files/" +// is appended. +// +// Permission shape: anyone-authoring + caller-scope + share-safe + +// files/discord/composition. The "publishing" act is a tool call, +// not a save-time / share-time concern — every caller of a shared +// skill mints into their own audit trail. +func NewCreateFileURL(minter FileTokenMinter, fileStorage FileStorage, baseURL string) tool.Tool { + baseURL = strings.TrimRight(baseURL, "/") + return tool.NewGatedTool[createFileURLArgs]( + "create_file_url", + "Mint a public URL (mort.sh/files/) for a saved file_id. Use for files too large for Discord (>25 MiB) or when a stable link is preferred over an attachment. Default expiry 24h; max 30 days. Optional view-count cap (max 1000).", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"files", "discord"}, + }, + func(ctx context.Context, inv tool.Invocation, args createFileURLArgs) (string, error) { + if minter == nil || fileStorage == nil { + return "", fmt.Errorf("create_file_url: not configured") + } + if strings.TrimSpace(args.FileID) == "" { + return "", fmt.Errorf("create_file_url: file_id required") + } + + // Cross-skill rejection: the file MUST belong to the + // calling skill. Without this, a hostile skill could mint + // a URL for ANY file by file_id. + meta, _, err := fileStorage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "", fmt.Errorf("create_file_url: file_id %q not found", args.FileID) + } + return "", fmt.Errorf("create_file_url: %w", err) + } + grantedViaDescendant := false + if meta.SkillID != inv.SkillID { + if !descendantFileGrant(ctx, fileStorage, inv, meta.SkillID) { + return "", fmt.Errorf("create_file_url: file_id %q does not belong to this skill (cross-skill refs rejected)", args.FileID) + } + grantedViaDescendant = true + } + // Scope gate — this is a PUBLICATION primitive (it mints an + // unauthenticated link), so it must enforce the same per-user/per-run + // scope isolation the read tools do: a same-skill caller must not be + // able to publish a file scoped to another user/run. Skipped only for + // the descendant-grant case (the worker's file scope is the worker's + // run, not the caller's). + if !grantedViaDescendant { + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("create_file_url: %w", err) + } + } + + // Resolve expiry. Clamp the caller's seconds BEFORE the multiply so a + // huge value can't overflow int64 nanoseconds into a negative + // duration that slips under the max-expiry cap (minting an + // already-expired token). + expiry := DefaultFileURLExpiry + if args.ExpiresInSeconds > 0 { + maxSecs := int(MaxFileURLExpiry / time.Second) + secs := args.ExpiresInSeconds + if secs > maxSecs { + secs = maxSecs + } + expiry = time.Duration(secs) * time.Second + } + if expiry > MaxFileURLExpiry { + expiry = MaxFileURLExpiry + } + expiresAt := time.Now().Add(expiry) + + // Resolve max_views. + var maxViews *int + if args.MaxViews > 0 { + mv := args.MaxViews + if mv > MaxFileURLViews { + mv = MaxFileURLViews + } + maxViews = &mv + } + + // Mint a 32-byte random token, base64url-encoded + // (padless). 43 chars long; the storage column is 64 so + // there's room to grow without a migration. + token, err := mintFileURLToken() + if err != nil { + return "", fmt.Errorf("create_file_url: token generation: %w", err) + } + + // Persist. + if err := minter.SaveFileToken(ctx, FileToken{ + Token: token, + FileID: args.FileID, + SkillID: inv.SkillID, + CallerID: inv.CallerID, + ExpiresAt: &expiresAt, + MaxViews: maxViews, + }); err != nil { + return "", fmt.Errorf("create_file_url: save: %w", err) + } + + url := baseURL + "/files/" + token + res := createFileURLResult{ + URL: url, + Token: token, + ExpiresAt: expiresAt.UTC().Format(time.RFC3339), + Note: "URL is public — anyone with the link can fetch this file until it expires or the view cap is reached.", + } + if maxViews != nil { + res.MaxViews = *maxViews + } + b, err := json.Marshal(res) + if err != nil { + return "", fmt.Errorf("create_file_url: marshal: %w", err) + } + return string(b), nil + }, + ) +} + +// mintFileURLToken returns a 32-byte random token, base64url-encoded +// without padding. ~190 bits of entropy, well above the +// collision-resistance threshold for the 64-char storage column. +func mintFileURLToken() (string, error) { + var b [32]byte + if _, err := rand.Read(b[:]); err != nil { + return "", err + } + return base64.RawURLEncoding.EncodeToString(b[:]), nil +} diff --git a/tools/extract_entities.go b/tools/extract_entities.go new file mode 100644 index 0000000..bfc42ea --- /dev/null +++ b/tools/extract_entities.go @@ -0,0 +1,342 @@ +// Package tools — v12 extract_entities. +// +// Structured-output workhorse: text + field schema → typed JSON +// object. The author specifies which fields they want and what +// types; the tool builds an appropriate prompt, asks for JSON, and +// validates + coerces the response back into the requested types. +// +// Why a structured-output tool (vs forcing the agent to write its +// own prompt): every agentic skill that needs to "pull X, Y, Z out +// of unstructured text" otherwise re-invents the same prompt- +// engineering pattern. extract_entities centralises it so authors +// just describe the schema. +// +// Type coercion: an LLM responding with "42" when an int field was +// requested is normal noise. The tool coerces strings to +// int/float/bool when possible; coercion failures land the field in +// missing_fields rather than the entities map. +// +// Test: extract_entities_test.go covers happy path, missing optional +// field, missing required field surfaces in missing_fields, malformed +// JSON retry, second-attempt failure, type coercion (string→int, +// string→bool), unknown field type rejected at args validation. +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/llmmeta" + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// extractEntitiesMaxInputBytes is the hard input cap. +const extractEntitiesMaxInputBytes = 32 * 1024 + +// extractEntitiesFallbackMaxPerRun is the per-run cap when +// ExtractEntitiesConfig is nil. +const extractEntitiesFallbackMaxPerRun = 10 + +// ExtractEntitiesConfig is the narrow per-deployment config surface +// extract_entities reads at execute time. +type ExtractEntitiesConfig interface { + MaxPerRun(ctx context.Context) int +} + +// extractField is one row in the schema the agent supplies. The four +// supported types match the JSON-shape primitives we can validate + +// coerce reliably. +// +// Why an enum-shaped Type field (vs free-form): we need to know how +// to validate the LLM's reply. Free-form ("integer", "Number", +// "boolean") would invite typos that silently miss the validation. +type extractField struct { + Name string `json:"name" description:"Field name to populate (e.g. 'author', 'year_published'). Becomes a key in the returned entities object."` + Description string `json:"description" description:"Short description of what to extract (e.g. 'the book author', 'the year the article was published'). Helps the model find the right value."` + Type string `json:"type" description:"One of: 'string', 'int', 'float', 'bool', 'list_of_strings'. Determines how the LLM's reply is validated and coerced."` + Required bool `json:"required,omitempty" description:"When true, a missing/uncoercible value lands in missing_fields rather than skipping silently."` +} + +// extractEntitiesArgs is the LLM-facing param struct. +type extractEntitiesArgs struct { + Text string `json:"text" description:"The text to extract from. Required. Capped at 32KB."` + Fields []extractField `json:"fields" description:"Schema describing what to extract. Each field has name, description, type, and optional required flag."` +} + +type extractEntitiesResult struct { + Entities map[string]any `json:"entities,omitempty"` + MissingFields []string `json:"missing_fields,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + RawReply string `json:"raw_reply,omitempty"` + Error string `json:"error,omitempty"` + BudgetMsg string `json:"budget_message,omitempty"` +} + +// validExtractTypes is the closed set of Type strings the tool +// accepts. Anything else is rejected at args validation. +var validExtractTypes = map[string]bool{ + "string": true, + "int": true, + "float": true, + "bool": true, + "list_of_strings": true, +} + +// NewExtractEntities constructs the extract_entities tool. +func NewExtractEntities(helper *llmmeta.Helper, cfg ExtractEntitiesConfig, budget SearchBudget) tool.Tool { + return tool.NewGatedTool[extractEntitiesArgs]( + "extract_entities", + "Extract structured fields from unstructured text via a fast LLM. Caller supplies a schema (each field has name + description + type + required); tool returns an entities object with values matching the requested types. Types: string, int, float, bool, list_of_strings. Counts against per-run and 7-day cost budgets.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"llm-meta", "cost-bearing"}, + }, + func(ctx context.Context, inv tool.Invocation, args extractEntitiesArgs) (string, error) { + if helper == nil { + return "", fmt.Errorf("extract_entities: not configured") + } + text := args.Text + if strings.TrimSpace(text) == "" { + return marshalExtractEntities(extractEntitiesResult{Error: "text is empty"}), nil + } + if len(args.Fields) == 0 { + return marshalExtractEntities(extractEntitiesResult{Error: "fields is empty"}), nil + } + // Validate each field's Type before paying for an LLM + // call. + for _, f := range args.Fields { + if strings.TrimSpace(f.Name) == "" { + return marshalExtractEntities(extractEntitiesResult{Error: "field with empty name"}), nil + } + if !validExtractTypes[strings.ToLower(strings.TrimSpace(f.Type))] { + return marshalExtractEntities(extractEntitiesResult{ + Error: fmt.Sprintf("field %q has unsupported type %q (allowed: string|int|float|bool|list_of_strings)", f.Name, f.Type), + }), nil + } + } + + if len(text) > extractEntitiesMaxInputBytes { + text = truncateUTF8(text, extractEntitiesMaxInputBytes) + } + + // Per-run budget gate. + if budget == nil { + maxPerRun := extractEntitiesFallbackMaxPerRun + if cfg != nil { + maxPerRun = cfg.MaxPerRun(ctx) + } + budget = NewInMemorySearchBudget(map[string]int{ + "extract_entities": maxPerRun, + }) + } + count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "extract_entities") + if exceeded { + return marshalExtractEntities(extractEntitiesResult{ + Error: "extract_entities_budget_exceeded", + BudgetMsg: fmt.Sprintf("per-run extract_entities budget exceeded (%d/%d). Ask an admin to raise skills.extract_entities.max_per_run.", count, max), + }), nil + } + + systemPrompt := "You extract structured data from unstructured text. Return ONLY valid JSON with the requested keys. If a value is not present in the text, omit the key. Do NOT invent values." + userPrompt := buildExtractPrompt(text, args.Fields) + + res, callErr := helper.Call(ctx, llmmeta.CallSpec{ + Tier: "fast", + SystemPrompt: systemPrompt, + UserPrompt: userPrompt, + MaxOutputTokens: 4096, + ResponseFormat: "json", + RetryOnMalformedJSON: true, + ToolName: "extract_entities", + RunID: inv.RunID, + SkillID: inv.SkillID, + CallerID: inv.CallerID, + }) + if callErr != nil { + return "", callErr + } + if !res.Success { + kind := res.ErrorKind + if kind == "" { + kind = "llm_unavailable" + } + return marshalExtractEntities(extractEntitiesResult{Error: kind}), nil + } + + // Second-failure malformed JSON (success=true but parsed + // is nil and ErrorKind=malformed_json). Surface the raw + // reply so the agent can salvage. + if res.ErrorKind == llmmeta.ErrorKindMalformedJSON || res.Parsed == nil { + return marshalExtractEntities(extractEntitiesResult{ + Error: "extraction_failed", + RawReply: res.Text, + ModelUsed: res.ModelUsed, + }), nil + } + + parsedMap, ok := res.Parsed.(map[string]any) + if !ok { + return marshalExtractEntities(extractEntitiesResult{ + Error: "extraction_failed_not_object", + RawReply: res.Text, + ModelUsed: res.ModelUsed, + }), nil + } + + entities, missing := coerceExtractedEntities(parsedMap, args.Fields) + return marshalExtractEntities(extractEntitiesResult{ + Entities: entities, + MissingFields: missing, + ModelUsed: res.ModelUsed, + }), nil + }, + ) +} + +// buildExtractPrompt composes the user message describing the schema +// + source text. +func buildExtractPrompt(text string, fields []extractField) string { + var sb strings.Builder + sb.WriteString("Extract the following fields from the text below. Return a JSON object with the field names as keys.\n\nFields:\n") + for _, f := range fields { + fmt.Fprintf(&sb, "- %s (%s): %s", f.Name, f.Type, f.Description) + if f.Required { + sb.WriteString(" [required]") + } + sb.WriteString("\n") + } + sb.WriteString("\nText:\n") + sb.WriteString(text) + return sb.String() +} + +// coerceExtractedEntities walks the LLM's response, validating + (when +// possible) coercing each value to the requested type. Required fields +// missing or uncoercible land in missing[]; optional fields silently +// drop. +func coerceExtractedEntities(parsed map[string]any, fields []extractField) (map[string]any, []string) { + entities := make(map[string]any, len(fields)) + var missing []string + for _, f := range fields { + raw, present := parsed[f.Name] + if !present || raw == nil { + if f.Required { + missing = append(missing, f.Name) + } + continue + } + value, ok := coerceFieldValue(raw, f.Type) + if !ok { + if f.Required { + missing = append(missing, f.Name) + } + continue + } + entities[f.Name] = value + } + return entities, missing +} + +// coerceFieldValue attempts to convert raw to the requested type. +// Returns (value, true) on success or (nil, false) on failure. +// +// Why coerce (vs strict reject): LLMs frequently reply with strings +// that contain numbers ("42") or pseudo-booleans ("yes"). Strict +// rejection would force every author to clean the response themselves. +// Coercion is conservative — string "42" → int 42 succeeds; string +// "forty-two" → int 42 fails (the agent never asked for word-form +// parsing). +func coerceFieldValue(raw any, fieldType string) (any, bool) { + switch strings.ToLower(strings.TrimSpace(fieldType)) { + case "string": + switch v := raw.(type) { + case string: + return v, true + case float64: + return strconv.FormatFloat(v, 'f', -1, 64), true + case bool: + return strconv.FormatBool(v), true + } + return nil, false + + case "int": + switch v := raw.(type) { + case float64: + // JSON numbers are float64 by default. + if v == float64(int64(v)) { + return int64(v), true + } + return nil, false + case string: + if n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64); err == nil { + return n, true + } + // Try float-string-with-zero-fractional ("42.0"). + if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil && f == float64(int64(f)) { + return int64(f), true + } + } + return nil, false + + case "float": + switch v := raw.(type) { + case float64: + return v, true + case string: + if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil { + return f, true + } + } + return nil, false + + case "bool": + switch v := raw.(type) { + case bool: + return v, true + case string: + s := strings.ToLower(strings.TrimSpace(v)) + switch s { + case "true", "yes", "1", "y": + return true, true + case "false", "no", "0", "n": + return false, true + } + case float64: + return v != 0, true + } + return nil, false + + case "list_of_strings": + switch v := raw.(type) { + case []any: + out := make([]string, 0, len(v)) + for _, e := range v { + if s, ok := e.(string); ok { + out = append(out, s) + } else { + // Mixed-type lists fail the type contract. + return nil, false + } + } + return out, true + case string: + // Single-string can be lifted into a one-element list. + return []string{v}, true + } + return nil, false + } + return nil, false +} + +func marshalExtractEntities(r extractEntitiesResult) string { + b, err := json.Marshal(r) + if err != nil { + return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err) + } + return string(b) +} diff --git a/tools/file_delete.go b/tools/file_delete.go new file mode 100644 index 0000000..4a7d45d --- /dev/null +++ b/tools/file_delete.go @@ -0,0 +1,79 @@ +// file_delete removes a saved file by its file_id. Decrements the +// underlying blob's refcount in storage; the blob row is removed when +// refcount hits zero. +// +// Why scope is checked POST-fetch (mirrors file_get): file_id is the +// only key the caller has; we must read the row to know the scope. +package tools + +import ( + "context" + "errors" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type fileDeleteArgs struct { + FileID string `json:"file_id" description:"Opaque file ID returned by file_save or file_list."` +} + +// NewFileDelete constructs the file_delete tool. storage nil → "not +// configured" at execute time. +func NewFileDelete(storage FileStorage) tool.Tool { + return tool.NewGatedTool[fileDeleteArgs]( + "file_delete", + "Remove a saved file by file_id. Returns 'ok' on success or 'not_found' if no file matched.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "write"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileDeleteArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_delete: not configured") + } + if args.FileID == "" { + return "", fmt.Errorf("file_delete: file_id required") + } + + // Fetch first so we can validate scope before deleting. The + // extra read is acceptable for a write path that's not in + // the hot loop, and it preserves the cross-skill / + // cross-user safety story. + meta, _, err := storage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "not_found", nil + } + return "", fmt.Errorf("file_delete: %w", err) + } + // Honor the descendant grant like the read tools do, so a parent + // orchestrator can clean up a worker's artifacts (gadfly flagged the + // asymmetry: delete previously rejected cross-skill outright). + grantedViaDescendant := false + if meta.SkillID != inv.SkillID { + if !descendantFileGrant(ctx, storage, inv, meta.SkillID) { + return "", fmt.Errorf("file_delete: file does not belong to this skill") + } + grantedViaDescendant = true + } + if !grantedViaDescendant { + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_delete: %w", err) + } + } + + if err := storage.FileDelete(ctx, args.FileID); err != nil { + if errors.Is(err, ErrFileNotFound) { + // Race: row was deleted between FileGet and + // FileDelete. Surface as a clean miss. + return "not_found", nil + } + return "", fmt.Errorf("file_delete: %w", err) + } + return "ok", nil + }, + ) +} diff --git a/tools/file_descendant_grant.go b/tools/file_descendant_grant.go new file mode 100644 index 0000000..6c98203 --- /dev/null +++ b/tools/file_descendant_grant.go @@ -0,0 +1,58 @@ +// file_descendant_grant.go — the cross-skill file-access escape hatch +// for parent → spawned-worker handoff. +// +// The blanket rule everywhere in this package is "a file belongs to +// the skill that saved it; cross-skill refs are rejected". That rule +// breaks the agent_spawn flow: a worker saves a chart with file_save +// under ITS ephemeral ID, returns the file_id as text, and the parent +// (which orchestrated the whole thing) can't attach, read, or host it. +// Observed live on the second spawn test — the chart never reached +// Discord; general could only apologise with the file_id. +// +// The grant: a caller may access a file whose owning skill/agent +// PRODUCED A RUN THAT DESCENDS FROM THE CALLER'S CURRENT RUN. In other +// words: you may touch the artifacts of workers you (transitively) +// dispatched in this very tree — output you were already entitled to +// see as their tool results. You may NOT touch files from siblings, +// ancestors, other trees, or unrelated skills; those still reject. +// +// Why an optional interface upgrade (vs a new constructor dep on +// every file tool): six tools enforce the ownership rule, each with +// its own narrow storage interface — threading a new dep through all +// of them churns every signature and test fake. Instead, the +// production storage adapter (mort.go's skillsFileStorageAdapter, +// which backs ALL of those interfaces) additionally implements +// DescendantRunChecker; the tools type-assert at the rejection site. +// Fakes that don't implement it keep the strict behaviour — the grant +// is fail-closed everywhere. Same pattern as KVHistoryRecorder (v7). +package tools + +import ( + "context" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// DescendantRunChecker reports whether ownerSkillID (the file's owning +// skill or agent ID — e.g. a spawned worker's "eph-…" ID) produced a +// run that is a DESCENDANT of callerRunID. Production walks the audit +// parent_run_id chain; see mort_skills_storage_adapters.go. +type DescendantRunChecker interface { + IsDescendantProducer(ctx context.Context, ownerSkillID, callerRunID string) (bool, error) +} + +// descendantFileGrant is called at a cross-skill rejection site with +// the tool's storage dep. Returns true only when the dep implements +// DescendantRunChecker AND the owner's run descends from the caller's +// run. Any error or missing context keeps the strict rejection. +func descendantFileGrant(ctx context.Context, dep any, inv tool.Invocation, ownerSkillID string) bool { + if ownerSkillID == "" || inv.RunID == "" { + return false + } + checker, ok := dep.(DescendantRunChecker) + if !ok || checker == nil { + return false + } + granted, err := checker.IsDescendantProducer(ctx, ownerSkillID, inv.RunID) + return err == nil && granted +} diff --git a/tools/file_get.go b/tools/file_get.go new file mode 100644 index 0000000..a097cf1 --- /dev/null +++ b/tools/file_get.go @@ -0,0 +1,103 @@ +// file_get fetches a previously-saved file by its opaque file_id and +// returns the metadata + base64-encoded bytes. +// +// Why scope is checked POST-fetch: file_id is the only key the caller +// knows; the scope (and therefore the authorisation envelope) is +// stored on the FileMeta row. We must read the row first to know which +// scope to validate. The trade-off is that file_id existence is +// observable (a foreign caller can probe IDs and learn that one +// exists), but the bytes themselves are still gated. file_id is a UUID, +// so the probe surface is impractical. +// +// Why base64 in the response: same reason as file_save — JSON can't +// carry arbitrary bytes natively. Callers that want a paste link or a +// direct download go through a separate path. +package tools + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type fileGetArgs struct { + FileID string `json:"file_id" description:"Opaque file ID returned by file_save or file_list."` +} + +type fileGetResult struct { + Name string `json:"name"` + ContentBase64 string `json:"content_base64"` + Mime string `json:"mime"` + SizeBytes int64 `json:"size_bytes"` + CreatedAt string `json:"created_at"` // RFC3339 +} + +// NewFileGet constructs the file_get tool. storage nil → "not +// configured" at execute time. +func NewFileGet(storage FileStorage) tool.Tool { + return tool.NewGatedTool[fileGetArgs]( + "file_get", + "Fetch a saved file by its file_id. Returns name, base64 content, MIME, size, and created_at. The caller must have access to the file's scope (skill / own user: / own run:).", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileGetArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_get: not configured") + } + if args.FileID == "" { + return "", fmt.Errorf("file_get: file_id required") + } + + meta, content, err := storage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "", fmt.Errorf("file_get: not found") + } + return "", fmt.Errorf("file_get: %w", err) + } + + // Cross-skill access check: a file's SkillID must match the + // current invocation's SkillID. Without this, a caller + // could probe another skill's file_ids and read content. + // One exception — the descendant grant (see + // file_descendant_grant.go): workers this run dispatched. + grantedViaDescendant := false + if meta.SkillID != inv.SkillID { + if !descendantFileGrant(ctx, storage, inv, meta.SkillID) { + return "", fmt.Errorf("file_get: file does not belong to this skill") + } + grantedViaDescendant = true + } + + // Scope check: even within the same skill, the scope on the + // row gates access (e.g. user:bob's file is unreadable by + // alice). The descendant grant stands in for it — the file's + // scope is the WORKER's run, never the caller's. + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil && !grantedViaDescendant { + return "", fmt.Errorf("file_get: %w", err) + } + + res := fileGetResult{ + Name: meta.Name, + ContentBase64: base64.StdEncoding.EncodeToString(content), + Mime: meta.MimeType, + SizeBytes: meta.SizeBytes, + CreatedAt: meta.CreatedAt.UTC().Format(time.RFC3339), + } + b, err := json.Marshal(res) + if err != nil { + return "", fmt.Errorf("file_get: marshal: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/file_get_metadata.go b/tools/file_get_metadata.go new file mode 100644 index 0000000..481367c --- /dev/null +++ b/tools/file_get_metadata.go @@ -0,0 +1,91 @@ +// file_get_metadata returns metadata about a saved file (name, mime, +// size, created_at) WITHOUT loading the bytes. This is the v10 +// agent-friendly companion to file_get — agents that just need to +// reason about a file's properties (size, type, name) should use +// file_get_metadata instead of pulling the full body into the context +// window. +// +// Why a separate tool (vs adding a flag to file_get): the byte-vs- +// reference principle is enforced statically — file_get_metadata's +// return shape simply does not carry bytes, so agents and tool +// authors can rely on the type signature. A flag-gated variant would +// invite "what does include_content=false mean" confusion. +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type fileGetMetadataArgs struct { + FileID string `json:"file_id" description:"Opaque file ID returned by file_save or file_list."` +} + +type fileGetMetadataResult struct { + Name string `json:"name"` + Mime string `json:"mime"` + SizeBytes int64 `json:"size_bytes"` + CreatedAt string `json:"created_at"` // RFC3339 + Scope string `json:"scope"` +} + +// NewFileGetMetadata constructs the file_get_metadata tool. storage +// nil → "not configured" at execute time. +func NewFileGetMetadata(storage FileStorage) tool.Tool { + return tool.NewGatedTool[fileGetMetadataArgs]( + "file_get_metadata", + "Fetch metadata for a saved file by its file_id (name, mime, size_bytes, created_at, scope). Does NOT load the file bytes — use file_get_text for text content or send_attachments to ship binary content to Discord.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileGetMetadataArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_get_metadata: not configured") + } + if args.FileID == "" { + return "", fmt.Errorf("file_get_metadata: file_id required") + } + meta, _, err := storage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "", fmt.Errorf("file_get_metadata: not found") + } + return "", fmt.Errorf("file_get_metadata: %w", err) + } + // Descendant grant: see file_descendant_grant.go — covers + // the scope check too (the file's scope is the worker's run). + grantedViaDescendant := false + if meta.SkillID != inv.SkillID { + if !descendantFileGrant(ctx, storage, inv, meta.SkillID) { + return "", fmt.Errorf("file_get_metadata: file does not belong to this skill") + } + grantedViaDescendant = true + } + if !grantedViaDescendant { + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_get_metadata: %w", err) + } + } + res := fileGetMetadataResult{ + Name: meta.Name, + Mime: meta.MimeType, + SizeBytes: meta.SizeBytes, + CreatedAt: meta.CreatedAt.UTC().Format(time.RFC3339), + Scope: meta.Scope, + } + b, err := json.Marshal(res) + if err != nil { + return "", fmt.Errorf("file_get_metadata: marshal: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/file_get_text.go b/tools/file_get_text.go new file mode 100644 index 0000000..dc176ec --- /dev/null +++ b/tools/file_get_text.go @@ -0,0 +1,119 @@ +// file_get_text fetches a saved text file's content as plain text. +// Only succeeds for text/* MIMEs; binary MIMEs return an error so the +// agent knows to use a different path (file_get_metadata for +// reasoning, send_attachments for delivery). +// +// Why a 64 KiB cap: the v10 byte-vs-reference principle says inline +// text content stays under ~10KB ideally; we set the hard cap at 64 +// KiB to handle reasonable text artifacts (logs, configs, small +// reports) without blowing the agent's context. Files larger than +// the cap return an error pointing to send_attachments. +// +// Why a separate tool (vs file_get): file_get returns base64 + +// metadata regardless of MIME, which agents misuse to dump 10MB PDFs +// into the context window. file_get_text is the agent-friendly +// alternative that explicitly fails fast on binary content. +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +const fileGetTextMaxBytes = 64 * 1024 + +type fileGetTextArgs struct { + FileID string `json:"file_id" description:"Opaque file ID returned by file_save or file_list."` +} + +type fileGetTextResult struct { + Text string `json:"text"` + Mime string `json:"mime"` + SizeBytes int64 `json:"size_bytes"` + CreatedAt string `json:"created_at"` // RFC3339 +} + +// NewFileGetText constructs the file_get_text tool. storage nil → +// "not configured" at execute time. +func NewFileGetText(storage FileStorage) tool.Tool { + return tool.NewGatedTool[fileGetTextArgs]( + "file_get_text", + "Fetch a saved text file's content (text/* MIMEs only, capped at 64KB). For binary content use file_get_metadata + send_attachments. Errors with 'not_text' for non-text MIMEs and 'too_large' for files > 64KB.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileGetTextArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_get_text: not configured") + } + if args.FileID == "" { + return "", fmt.Errorf("file_get_text: file_id required") + } + meta, content, err := storage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "", fmt.Errorf("file_get_text: not found") + } + return "", fmt.Errorf("file_get_text: %w", err) + } + // Descendant grant: a worker this run (transitively) + // dispatched may have produced the file — its scope is the + // WORKER's run, so the grant also stands in for the scope + // check below. + grantedViaDescendant := false + if meta.SkillID != inv.SkillID { + if !descendantFileGrant(ctx, storage, inv, meta.SkillID) { + return "", fmt.Errorf("file_get_text: file does not belong to this skill") + } + grantedViaDescendant = true + } + if !grantedViaDescendant { + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_get_text: %w", err) + } + } + if !isTextMime(meta.MimeType) { + return "", fmt.Errorf("file_get_text: not_text: mime %q is not text/*", meta.MimeType) + } + if int64(len(content)) > fileGetTextMaxBytes { + return "", fmt.Errorf("file_get_text: too_large: %d bytes exceeds 64KB cap; use send_attachments to deliver this file to Discord", len(content)) + } + res := fileGetTextResult{ + Text: string(content), + Mime: meta.MimeType, + SizeBytes: meta.SizeBytes, + CreatedAt: meta.CreatedAt.UTC().Format(time.RFC3339), + } + b, err := json.Marshal(res) + if err != nil { + return "", fmt.Errorf("file_get_text: marshal: %w", err) + } + return string(b), nil + }, + ) +} + +// isTextMime reports whether the given MIME is a text/* type. +// Accepts "text/plain", "text/markdown", "text/csv", "application/json" +// and "application/xml" since those are conventionally text. +func isTextMime(mime string) bool { + mime = strings.ToLower(strings.TrimSpace(mime)) + if strings.HasPrefix(mime, "text/") { + return true + } + switch mime { + case "application/json", "application/xml", "application/xhtml+xml", + "application/javascript", "application/yaml", "application/x-yaml": + return true + } + return false +} diff --git a/tools/file_list.go b/tools/file_list.go new file mode 100644 index 0000000..2073420 --- /dev/null +++ b/tools/file_list.go @@ -0,0 +1,74 @@ +// file_list returns metadata for files in a scope. Blob bytes are NOT +// loaded — listing is a hot path that must stay light, and the LLM +// would burn tokens for no benefit. +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type fileListArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill', 'user:', or 'run:'."` +} + +type fileListEntry struct { + FileID string `json:"file_id"` + Name string `json:"name"` + Mime string `json:"mime"` + SizeBytes int64 `json:"size_bytes"` + CreatedAt string `json:"created_at"` +} + +// NewFileList constructs the file_list tool. storage nil → "not +// configured" at execute time. +func NewFileList(storage FileStorage) tool.Tool { + return tool.NewGatedTool[fileListArgs]( + "file_list", + "List files in a scope. Returns a JSON array of {file_id, name, mime, size_bytes, created_at}. Does NOT include bytes — call file_get with a file_id to fetch content.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileListArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_list: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_list: %w", err) + } + // root_run is a KV-only scope (v1) — see file_save's guard. + if strings.HasPrefix(args.Scope, "root_run:") { + return "", fmt.Errorf("file_list: root_run scope is KV-only") + } + + rows, err := storage.FileList(ctx, inv.SkillID, args.Scope) + if err != nil { + return "", fmt.Errorf("file_list: %w", err) + } + + out := make([]fileListEntry, 0, len(rows)) + for _, r := range rows { + out = append(out, fileListEntry{ + FileID: r.ID, + Name: r.Name, + Mime: r.MimeType, + SizeBytes: r.SizeBytes, + CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339), + }) + } + b, err := json.Marshal(out) + if err != nil { + return "", fmt.Errorf("file_list: marshal: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/file_save.go b/tools/file_save.go new file mode 100644 index 0000000..1ebe747 --- /dev/null +++ b/tools/file_save.go @@ -0,0 +1,171 @@ +// file_save persists arbitrary bytes (base64-encoded by the caller) +// against a (scope, name) tuple within the calling skill's namespace. +// Returns the new file_id, the SHA256 content hash, and the size. +// +// Why base64 over raw bytes: the LLM's tool-call wire format is JSON, +// which can't carry arbitrary bytes natively. Base64 round-trips +// cleanly through the schema. +// +// Why hash + size in the response: agents commonly want to dedup +// across runs (same hash = same content) or build a manifest. Reporting +// these inline saves an immediate file_get round-trip just to compute +// them. +// +// Per-file cap: maxFileBytes (constructor arg) enforces an upper bound +// on individual file size. 0 falls back to defaultFileMaxBytes (10 MB). +// +// Per-skill quota (sum across all files): the constructor's QuotaProvider +// arg drives the v4 Phase 4 enforcement. nil disables enforcement +// (useful for tests and admin-only deployments). The check is: +// +// used := storage.FileUsageBytes(skill) +// if used + len(new content) > filesMax → quota_exceeded +// +// Note we do NOT subtract a "prior" value here the way kv_set does: +// file_save always inserts a new file row (content-addressable dedup +// is at the blob layer, not the row layer), so every save is additive +// to FileUsageBytes. +package tools + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + "github.com/google/uuid" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +const defaultFileMaxBytes = 16 * 1024 * 1024 // 10 MiB + +type fileSaveArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:' (per-caller), or 'run:' (this run's scratchpad)."` + Name string `json:"name" description:"Filename including extension. Used for display only — the file is identified by an opaque file_id."` + ContentBase64 string `json:"content_base64" description:"Base64-encoded file content."` + Mime string `json:"mime,omitempty" description:"Optional MIME type. If omitted, detected from the first 512 bytes of content."` +} + +type fileSaveResult struct { + FileID string `json:"file_id"` + Hash string `json:"hash"` + SizeBytes int64 `json:"size_bytes"` +} + +// NewFileSave constructs the file_save tool. +// +// storage nil → "not configured" at execute time. +// maxFileBytes <= 0 falls back to defaultFileMaxBytes (10 MiB). +// quota nil → per-skill quota check skipped (per-file cap still applies). +// +// Permission: anyone may author; safe for share. Scope check at handler +// entry prevents cross-user writes; per-user buckets are isolated by +// inv.CallerID. +func NewFileSave(storage FileStorage, quota QuotaProvider, maxFileBytes int) tool.Tool { + if maxFileBytes <= 0 { + maxFileBytes = defaultFileMaxBytes + } + return tool.NewGatedTool[fileSaveArgs]( + "file_save", + "Save base64-encoded bytes against a (scope, name) tuple. Returns file_id (opaque), SHA256 hash, and size_bytes. Content is dedup'd by hash — multiple file_save calls with identical bytes share storage. NOTE: for files produced inside code_exec, do NOT hand-encode base64 here (it corrupts) — write them to /workspace/ in the code_exec call and use the files_out file_id it returns.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "write"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileSaveArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("file_save: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_save: %w", err) + } + // root_run is a KV-only scope (v1): file storage partitions + // by the calling skill, so a root_run file would silently be + // invisible to siblings AND escape the run-scope sweeper. + // Reject loudly instead. + if strings.HasPrefix(args.Scope, "root_run:") { + return "", fmt.Errorf("file_save: root_run scope is KV-only; save under run: and share the file_id via kv_set in the root_run scope") + } + if args.Name == "" { + return "", fmt.Errorf("file_save: name required") + } + if args.ContentBase64 == "" { + return "", fmt.Errorf("file_save: content_base64 required") + } + + // Decode + cap. Decoding twice (once to count, once to + // store) would waste cycles; we decode once and check size + // after. + content, err := base64.StdEncoding.DecodeString(args.ContentBase64) + if err != nil { + return "", fmt.Errorf("file_save: invalid base64: %w", err) + } + if len(content) > maxFileBytes { + return "", fmt.Errorf("file_save: file exceeds max %d bytes (got %d)", maxFileBytes, len(content)) + } + + // Per-skill quota gate (v4 Phase 4). Skipped when quota is nil + // (tests / admin opt-out) so the per-file cap above is the + // only line of defence in that mode. + if quota != nil { + _, filesMax, err := quota.EffectiveQuota(ctx, inv.SkillID) + if err != nil { + return "", fmt.Errorf("file_save: quota lookup: %w", err) + } + used, err := storage.FileUsageBytes(ctx, inv.SkillID) + if err != nil { + return "", fmt.Errorf("file_save: usage check: %w", err) + } + if used+int64(len(content)) > filesMax { + return "", fmt.Errorf("file_save: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, filesMax) + } + } + + // SHA256 for content-addressable dedup at the storage layer. + h := sha256.Sum256(content) + hashHex := hex.EncodeToString(h[:]) + + mime := args.Mime + if mime == "" { + // http.DetectContentType is documented to read at most + // the first 512 bytes; passing the full slice is fine. + mime = http.DetectContentType(content) + } + + meta := FileDomainMeta{ + ID: uuid.NewString(), + SkillID: inv.SkillID, + Scope: args.Scope, + Name: args.Name, + ContentHash: hashHex, + MimeType: mime, + SizeBytes: int64(len(content)), + CreatedAt: time.Now(), + } + + fileID, err := storage.FileSave(ctx, meta, content) + if err != nil { + return "", fmt.Errorf("file_save: %w", err) + } + + res := fileSaveResult{ + FileID: fileID, + Hash: hashHex, + SizeBytes: int64(len(content)), + } + b, err := json.Marshal(res) + if err != nil { + return "", fmt.Errorf("file_save: marshal result: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/file_search.go b/tools/file_search.go new file mode 100644 index 0000000..b8b586a --- /dev/null +++ b/tools/file_search.go @@ -0,0 +1,131 @@ +// file_search runs a token-AND search over the per-skill (or, for +// admin authors, cross-skill) file index. Returns up to N matches with +// {file_id, name, snippet, score}. +// +// Why admin-authoring only: a public skill could otherwise probe +// other skills' file content via cross-skill search. Restricting the +// tool's authoring requirement to admins blocks shared/public skills +// from depending on file_search at all (it never appears in their +// allowed-tool catalog at save time). Within a private skill, +// admin-authored or otherwise, scope is per-call: the handler always +// pins skill_id to inv.SkillID — no matter what the LLM-supplied scope +// arg says — so a non-admin caller invoking an admin-authored public +// skill cannot escape the skill's own bucket. +// +// Why use Storage's SearchFiles directly: token logic + scoring lives +// in the skills package. The handler is a thin transcoder. +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// FileSearcher is the narrow surface the file_search tool needs. +// Production wiring (mort.go) bridges *skills.System.Storage(). +// nil-safe: a nil FileSearcher surfaces "not configured" at the first +// call. +type FileSearcher interface { + SearchFiles(ctx context.Context, skillID, scope, query string, limit int) ([]FileSearchDomainHit, error) +} + +// FileSearchDomainHit mirrors skills.FileSearchHit (cycle-break domain +// shape). The production adapter is a struct copy. +type FileSearchDomainHit struct { + FileID string + SkillID string + Scope string + Name string + MimeType string + Snippet string + Score int +} + +type fileSearchArgs struct { + Query string `json:"query" description:"Free-text search query. Tokenised, lowercased, ANDed."` + Scope string `json:"scope,omitempty" description:"Optional storage scope to restrict the search ('skill', 'user:', 'run:'). Empty = all scopes within this skill."` + Limit int `json:"limit,omitempty" description:"Optional max hits to return (default 25, max 100)."` +} + +type fileSearchHit struct { + FileID string `json:"file_id"` + Name string `json:"name"` + Mime string `json:"mime,omitempty"` + Snippet string `json:"snippet,omitempty"` + Score int `json:"score"` +} + +// NewFileSearch constructs the file_search tool. Authoring-required +// admin so non-admins can't include this tool in shared/public skills +// (the share-safety check rejects share+admin-only as private-only). +// +// Wait — if the tool is admin-authoring AND share-safe, an admin could +// author a public skill that uses it. That's the desired flow: admin +// curates the skill, but the privacy property still holds because the +// handler PINS skill_id to inv.SkillID. A non-admin caller of the +// public skill can ONLY search files within that skill's bucket, not +// cross-skill. +// +// Setting SafeForShare=false would force this tool to be private-only; +// that's needlessly restrictive. The privacy property comes from the +// per-call skill_id pin, not from share-time gating. +func NewFileSearch(searcher FileSearcher) tool.Tool { + return tool.NewGatedTool[fileSearchArgs]( + "file_search", + "Full-text search over this skill's saved files. Returns array of {file_id, name, snippet, score} ordered by score desc. Tokens are lowercased + ANDed. Admin-authored only — non-admin callers of an admin-authored public skill still see only that skill's files.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAdmin, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args fileSearchArgs) (string, error) { + if searcher == nil { + return "", fmt.Errorf("file_search: not configured") + } + if args.Query == "" { + return "", fmt.Errorf("file_search: query required") + } + limit := args.Limit + if limit <= 0 { + limit = 25 + } + if limit > 100 { + limit = 100 + } + scope := args.Scope + if scope != "" { + if err := ValidateScope(inv, scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("file_search: %w", err) + } + } + + // Pin skill_id to the invoking skill — even if the LLM + // supplies a different value somewhere, the handler always + // scopes to inv.SkillID. This is the privacy guarantee + // referenced in the package doc. + rows, err := searcher.SearchFiles(ctx, inv.SkillID, scope, args.Query, limit) + if err != nil { + return "", fmt.Errorf("file_search: %w", err) + } + out := make([]fileSearchHit, 0, len(rows)) + for _, r := range rows { + out = append(out, fileSearchHit{ + FileID: r.FileID, + Name: r.Name, + Mime: r.MimeType, + Snippet: r.Snippet, + Score: r.Score, + }) + } + b, err := json.Marshal(out) + if err != nil { + return "", fmt.Errorf("file_search: marshal: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/file_storage.go b/tools/file_storage.go new file mode 100644 index 0000000..2fea928 --- /dev/null +++ b/tools/file_storage.go @@ -0,0 +1,49 @@ +// file_storage.go declares the narrow FileStorage interface that the +// four v4 file tools (file_save, file_get, file_list, file_delete) +// need at execute time. +// +// Why a narrow interface (vs importing pkg/logic/skills directly): same +// cycle constraint as kv_storage.go — pkg/logic/skills imports +// pkg/skilltools, so we mirror the FileMeta shape here and let +// pkg/logic/mort.go adapt at wiring time. +// +// FileDomainMeta is field-for-field with skills.FileMeta; the production +// adapter is a struct copy. +package tools + +import ( + "context" + "errors" + "time" +) + +// FileStorage is the narrow surface file tools need from the skills +// package. Production wiring (mort.go) bridges *skills.System.Storage(). +// nil-safe: tools constructed against a nil FileStorage surface "not +// configured" at the first call. +type FileStorage interface { + FileSave(ctx context.Context, meta FileDomainMeta, content []byte) (string, error) + FileGet(ctx context.Context, fileID string) (*FileDomainMeta, []byte, error) + FileList(ctx context.Context, skillID, scope string) ([]FileDomainMeta, error) + FileDelete(ctx context.Context, fileID string) error + FileUsageBytes(ctx context.Context, skillID string) (int64, error) +} + +// FileDomainMeta mirrors skills.FileMeta. Field-for-field; the +// production adapter is a struct copy. +type FileDomainMeta struct { + ID string // UUID, the public file_id + SkillID string + Scope string + Name string + ContentHash string // SHA256 hex + MimeType string + SizeBytes int64 + CreatedAt time.Time +} + +// ErrFileNotFound mirrors skills.ErrFileNotFound. The production +// adapter returns this sentinel when wrapping a skills.ErrFileNotFound; +// tools detect it with errors.Is to surface a "not_found" string to the +// LLM rather than a generic error. +var ErrFileNotFound = errors.New("file: not found") diff --git a/tools/integration_test.go b/tools/integration_test.go new file mode 100644 index 0000000..d9cf39e --- /dev/null +++ b/tools/integration_test.go @@ -0,0 +1,73 @@ +package tools_test + +import ( + "context" + "encoding/json" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" + + "gitea.stevedudenhoeffer.com/steve/executus/run" + "gitea.stevedudenhoeffer.com/steve/executus/tool" + "gitea.stevedudenhoeffer.com/steve/executus/tools" +) + +// TestExecutorRunsToolUsingAgent is the end-to-end proof that a host can +// register a generic tool and the executor runs an agent that CALLS it: the +// fake model emits a `think` tool call, the executor dispatches it through the +// registered tool, then the model finalises. Exercises the full tool-dispatch +// loop + step instrumentation. +func TestExecutorRunsToolUsingAgent(t *testing.T) { + reg := tool.NewRegistry() + if err := tools.Register(reg); err != nil { + t.Fatalf("register tools: %v", err) + } + + fp := fake.New("fake") + fp.Enqueue("test-model", + // Step 1: the model decides to call `think`. + fake.ReplyWith(llm.Response{ + ToolCalls: []llm.ToolCall{{ + ID: "call-1", + Name: "think", + Arguments: json.RawMessage(`{"thought":"plan: answer briefly"}`), + }}, + }), + // Step 2: with the tool result in hand, the model finalises. + fake.Reply("all done"), + ) + m, err := fp.Model("test-model") + if err != nil { + t.Fatalf("fake model: %v", err) + } + + ex := run.New(run.Config{ + Registry: reg, + Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { + return ctx, m, nil + }, + }) + + res := ex.Run(context.Background(), + run.RunnableAgent{Name: "thinker", ModelTier: "test-model", LowLevelTools: []string{"think"}}, + tool.Invocation{RunID: "run-tool-1", CallerID: "c"}, + "do the thing") + + if res.Err != nil { + t.Fatalf("run error: %v", res.Err) + } + if res.Output != "all done" { + t.Fatalf("output = %q, want %q", res.Output, "all done") + } + // The step instrumentation should have captured the think call. + var sawThink bool + for _, s := range res.Steps { + if s.Title == "think" { + sawThink = true + } + } + if !sawThink { + t.Errorf("expected a `think` step in Result.Steps, got %d steps: %+v", len(res.Steps), res.Steps) + } +} diff --git a/tools/kv_delete.go b/tools/kv_delete.go new file mode 100644 index 0000000..d44b00a --- /dev/null +++ b/tools/kv_delete.go @@ -0,0 +1,52 @@ +// kv_delete removes a single entry by (scope, key). Missing rows +// surface as the literal string "not_found" rather than an error so the +// LLM can reason "did this row exist?" without wrapping the call in +// error handling. +package tools + +import ( + "context" + "errors" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type kvDeleteArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill', 'user:', 'run:', or 'root_run:'."` + Key string `json:"key" description:"Key within the scope."` +} + +// NewKVDelete constructs the kv_delete tool. storage nil → "not +// configured" at execute time. +func NewKVDelete(storage KVStorage) tool.Tool { + return tool.NewGatedTool[kvDeleteArgs]( + "kv_delete", + "Remove an entry by (scope, key). Returns 'ok' on success or 'not_found' if no row matched.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "write"}, + }, + func(ctx context.Context, inv tool.Invocation, args kvDeleteArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("kv_delete: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("kv_delete: %w", err) + } + if args.Key == "" { + return "", fmt.Errorf("kv_delete: key required") + } + + if err := storage.KVDelete(ctx, kvPartition(inv, args.Scope), args.Scope, args.Key); err != nil { + if errors.Is(err, ErrKVNotFound) { + return "not_found", nil + } + return "", fmt.Errorf("kv_delete: %w", err) + } + return "ok", nil + }, + ) +} diff --git a/tools/kv_get.go b/tools/kv_get.go new file mode 100644 index 0000000..8e1fcac --- /dev/null +++ b/tools/kv_get.go @@ -0,0 +1,63 @@ +// kv_get is the v4 KV-storage read tool. It looks up a single value by +// (scope, key) within the calling skill's KV namespace and returns the +// stored JSON value, or `null` when no row matches. +// +// Why "null" on miss (vs an error): the LLM's most natural use is +// "fetch this if cached, otherwise compute and store". Miss-as-error +// would force the agent to wrap every call in error handling; miss-as- +// null collapses the happy path. +package tools + +import ( + "context" + "errors" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type kvGetArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:' (per-caller), 'run:' (this run's scratchpad), or 'root_run:' (shared scratchpad of this whole dispatch tree — use to coordinate with parallel sibling workers)."` + Key string `json:"key" description:"Key within the scope."` +} + +// NewKVGet constructs the kv_get tool. storage may be nil — the tool +// then surfaces "not configured" at execute time instead of failing +// registration. +// +// Permission: anyone may author; safe for share. The scope check at +// handler entry makes share-safety meaningful — a shared skill cannot +// read another caller's `user:` bucket because ValidateScope +// rejects that. +func NewKVGet(storage KVStorage) tool.Tool { + return tool.NewGatedTool[kvGetArgs]( + "kv_get", + "Look up a value by key in this skill's storage. Returns the stored JSON value, or `null` if no row matches the (scope, key) tuple.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args kvGetArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("kv_get: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("kv_get: %w", err) + } + if args.Key == "" { + return "", fmt.Errorf("kv_get: key required") + } + + entry, err := storage.KVGet(ctx, kvPartition(inv, args.Scope), args.Scope, args.Key) + if err != nil { + if errors.Is(err, ErrKVNotFound) { + return "null", nil + } + return "", fmt.Errorf("kv_get: %w", err) + } + return string(entry.Value), nil + }, + ) +} diff --git a/tools/kv_list.go b/tools/kv_list.go new file mode 100644 index 0000000..be82d71 --- /dev/null +++ b/tools/kv_list.go @@ -0,0 +1,88 @@ +// kv_list returns metadata (key, size, expiry) for entries within a +// scope, optionally filtered by key prefix. Values are NOT loaded — +// listing is a hot path that should stay light, and dumping every +// value byte into the LLM context would burn tokens for no benefit. +package tools + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +const ( + kvListDefaultLimit = 100 + kvListMaxLimit = 1000 +) + +type kvListArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill', 'user:', 'run:', or 'root_run:'."` + Prefix string `json:"prefix,omitempty" description:"Optional key-prefix filter. Empty matches all keys in the scope."` + Limit int `json:"limit,omitempty" description:"Max entries to return. Default 100, hard cap 1000."` +} + +type kvListEntry struct { + Key string `json:"key"` + SizeBytes int `json:"size_bytes"` + // ExpiresAt is RFC3339 when set, "" otherwise. JSON serialised this + // way so the LLM can reason about it as a string field consistently + // (rather than null vs. missing key). + ExpiresAt string `json:"expires_at,omitempty"` +} + +// NewKVList constructs the kv_list tool. storage nil → "not configured" +// at execute time. +func NewKVList(storage KVStorage) tool.Tool { + return tool.NewGatedTool[kvListArgs]( + "kv_list", + "List keys + sizes + expiries in a scope (optionally filtered by key prefix). Returns a JSON array. Does NOT include values — call kv_get to fetch a specific value.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "read"}, + }, + func(ctx context.Context, inv tool.Invocation, args kvListArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("kv_list: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("kv_list: %w", err) + } + + limit := args.Limit + if limit <= 0 { + limit = kvListDefaultLimit + } + if limit > kvListMaxLimit { + limit = kvListMaxLimit + } + + rows, err := storage.KVList(ctx, kvPartition(inv, args.Scope), args.Scope, args.Prefix, limit) + if err != nil { + return "", fmt.Errorf("kv_list: %w", err) + } + + out := make([]kvListEntry, 0, len(rows)) + for _, r := range rows { + e := kvListEntry{ + Key: r.Key, + SizeBytes: len(r.Value), + } + if r.ExpiresAt != nil { + e.ExpiresAt = r.ExpiresAt.Format(time.RFC3339) + } + out = append(out, e) + } + + b, err := json.Marshal(out) + if err != nil { + return "", fmt.Errorf("kv_list: marshal: %w", err) + } + return string(b), nil + }, + ) +} diff --git a/tools/kv_set.go b/tools/kv_set.go new file mode 100644 index 0000000..43754b7 --- /dev/null +++ b/tools/kv_set.go @@ -0,0 +1,145 @@ +// kv_set is the v4 KV-storage write tool. It upserts (scope, key) → +// value within the calling skill's namespace, with optional TTL. +// +// Per-value cap: the constructor takes maxValueBytes (typically read +// from convar `skills.storage.kv_max_value_bytes`); 0 means use the +// 64 KiB default. +// +// Per-skill quota (sum across all rows): the constructor's QuotaProvider +// arg drives the v4 Phase 4 enforcement. nil disables enforcement +// (useful for tests and admin-only deployments). The check is: +// +// used := storage.KVUsageBytes(skill) +// delta := len(new value) - len(prior value if updating same key) +// if used + delta > kvMax → quota_exceeded +// +// We subtract the existing value's size on UPDATE so an in-place edit +// of a hot key never trips the cap unless the new value is larger. +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +const defaultKVMaxValueBytes = 65536 // 64 KiB + +type kvSetArgs struct { + Scope string `json:"scope" description:"Storage scope: 'skill', 'user:', 'run:', or 'root_run:' (shared across the whole dispatch tree)."` + Key string `json:"key" description:"Key within the scope."` + Value json.RawMessage `json:"value" description:"JSON value to store. Must parse as valid JSON (object, array, string, number, bool, or null)."` + TTLSeconds *int `json:"ttl_seconds,omitempty" description:"Optional TTL in seconds. The entry expires (and is lazy-purged on read) after this duration."` +} + +// NewKVSet constructs the kv_set tool. +// +// storage nil → "not configured" at execute time. +// maxValueBytes <= 0 falls back to defaultKVMaxValueBytes. +// quota nil → per-skill quota check is skipped (per-value cap still +// applies). +func NewKVSet(storage KVStorage, quota QuotaProvider, maxValueBytes int) tool.Tool { + if maxValueBytes <= 0 { + maxValueBytes = defaultKVMaxValueBytes + } + return tool.NewGatedTool[kvSetArgs]( + "kv_set", + "Set a value at the given scope+key. Optionally with a TTL after which the entry auto-expires.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"storage", "write"}, + }, + func(ctx context.Context, inv tool.Invocation, args kvSetArgs) (string, error) { + if storage == nil { + return "", fmt.Errorf("kv_set: not configured") + } + if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { + return "", fmt.Errorf("kv_set: %w", err) + } + if args.Key == "" { + return "", fmt.Errorf("kv_set: key required") + } + if len(args.Value) == 0 { + return "", fmt.Errorf("kv_set: value required") + } + if len(args.Value) > maxValueBytes { + return "", fmt.Errorf("kv_set: value exceeds max %d bytes (got %d)", maxValueBytes, len(args.Value)) + } + + // Validate JSON. The storage layer treats the raw bytes as + // opaque, but the LLM contract says "value is a JSON value" + // — surfacing a parse error here gives a friendlier message + // than letting an invalid blob round-trip and confuse the + // reader on a future kv_get. + var probe any + if err := json.Unmarshal(args.Value, &probe); err != nil { + return "", fmt.Errorf("kv_set: value is not valid JSON: %w", err) + } + + partition := kvPartition(inv, args.Scope) + + // Per-skill quota gate (v4 Phase 4). Skipped when quota is nil + // (tests / admin opt-out) so the per-value cap above is the + // only line of defence in that mode. Also skipped for the + // shared root_run partition — per-skill quota attribution is + // meaningless across the sentinel; the per-value cap above + + // the run-scope sweeper bound that partition's growth. + if quota != nil && partition == inv.SkillID { + kvMax, _, err := quota.EffectiveQuota(ctx, inv.SkillID) + if err != nil { + return "", fmt.Errorf("kv_set: quota lookup: %w", err) + } + used, err := storage.KVUsageBytes(ctx, inv.SkillID) + if err != nil { + return "", fmt.Errorf("kv_set: usage check: %w", err) + } + delta := int64(len(args.Value)) + // On UPDATE, subtract the prior value's size so an + // in-place edit of a hot key doesn't double-count. A + // brand-new key (KVGet returns ErrKVNotFound) leaves + // delta untouched. + if existing, getErr := storage.KVGet(ctx, inv.SkillID, args.Scope, args.Key); getErr == nil && existing != nil { + delta -= int64(len(existing.Value)) + } else if getErr != nil && !errors.Is(getErr, ErrKVNotFound) { + return "", fmt.Errorf("kv_set: pre-write lookup: %w", getErr) + } + if used+delta > kvMax { + return "", fmt.Errorf("kv_set: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, kvMax) + } + } + + now := time.Now() + entry := KVDomainEntry{ + SkillID: partition, + Scope: args.Scope, + Key: args.Key, + Value: args.Value, + CreatedAt: now, + UpdatedAt: now, + } + if args.TTLSeconds != nil && *args.TTLSeconds > 0 { + expires := now.Add(time.Duration(*args.TTLSeconds) * time.Second) + entry.ExpiresAt = &expires + } + + if err := storage.KVSet(ctx, entry); err != nil { + return "", fmt.Errorf("kv_set: %w", err) + } + // V7 versioned KV history (admin diagnostic). Best-effort — + // a failed history write must NOT shadow the successful + // kv_set return, so we ignore the error after logging. + // Production adapter satisfies KVHistoryRecorder; tests + // using a bare KVStorage skip this branch entirely. + if h, ok := storage.(KVHistoryRecorder); ok && h != nil { + _ = h.RecordKVHistory(ctx, partition, args.Scope, args.Key, []byte(args.Value), inv.CallerID) + } + return "ok", nil + }, + ) +} diff --git a/tools/kv_storage.go b/tools/kv_storage.go new file mode 100644 index 0000000..3166d9b --- /dev/null +++ b/tools/kv_storage.go @@ -0,0 +1,89 @@ +// kv_storage.go declares the narrow KV-storage interface that the four +// KV tools (kv_get, kv_set, kv_list, kv_delete) need at execute time. +// +// Why a narrow interface (vs importing pkg/logic/skills directly): +// pkg/logic/skills imports pkg/skilltools (for Invocation + Tool), so +// importing skills back here would form an import cycle. Production +// wiring (pkg/logic/mort.go, deferred) will supply a concrete adapter +// that wraps `*skills.System.Storage()` and translates between +// skills.KVEntry and the local KVDomainEntry shape. +// +// Why a *separate* domain shape (KVDomainEntry) vs reusing skills.KVEntry: +// the cycle break has to be complete — even importing the type would +// pull skills into skilltools/tools' import graph. The two shapes mirror +// each other field-for-field; the adapter is a trivial struct copy. +// +// The same pattern is used by skill_invoke.go (SkillInvokerProvider). +package tools + +import ( + "context" + "encoding/json" + "errors" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// kvPartition picks the skill_id partition for a KV operation. KV rows +// are keyed (skill_id, scope, key); for the shared `root_run:` +// scope, every run in a dispatch tree — including ephemeral workers +// with distinct agent IDs — must land in ONE partition or siblings +// could never see each other's writes. The sentinel +// tool.RootRunKVPartition is that shared partition; isolation +// between trees is preserved because the scope string embeds the root +// run id, which ValidateScope checks against inv.RootRunID. +func kvPartition(inv tool.Invocation, scope string) string { + if strings.HasPrefix(scope, "root_run:") { + return tool.RootRunKVPartition + } + return inv.SkillID +} + +// KVStorage is the narrow surface KV tools need from the skills package. +// nil-safe: tools constructed against a nil KVStorage surface a clean +// "not configured" error at the first call rather than crashing. +type KVStorage interface { + KVGet(ctx context.Context, skillID, scope, key string) (*KVDomainEntry, error) + KVSet(ctx context.Context, e KVDomainEntry) error + KVList(ctx context.Context, skillID, scope, prefix string, limit int) ([]KVDomainEntry, error) + KVDelete(ctx context.Context, skillID, scope, key string) error + KVUsageBytes(ctx context.Context, skillID string) (int64, error) +} + +// KVHistoryRecorder is the OPTIONAL post-write hook for the v7 +// versioned KV history. The kv_set tool checks for this interface via +// type assertion; production storage adapters that satisfy it write a +// history row AFTER a successful KVSet. +// +// Why optional (vs adding to KVStorage): existing test fakes don't +// need to grow a method. Production wires the real adapter which +// satisfies the interface; tests that don't care about history skip +// the implementation entirely. +// +// Why only on success: a failed KVSet leaves no skill_kv row to refer +// to; appending a history entry would create an orphan record of a +// change that didn't happen. +type KVHistoryRecorder interface { + RecordKVHistory(ctx context.Context, skillID, scope, key string, value []byte, changedBy string) error +} + +// KVDomainEntry mirrors skills.KVEntry without pulling in the cycle. +// Field-for-field with the skills package's KVEntry; the production +// adapter is a struct copy. +type KVDomainEntry struct { + SkillID string + Scope string // "skill" | "user:" | "run:" + Key string + Value json.RawMessage + ExpiresAt *time.Time + CreatedAt time.Time + UpdatedAt time.Time +} + +// ErrKVNotFound mirrors skills.ErrKVNotFound. The production adapter +// returns this sentinel when wrapping a skills.ErrKVNotFound; tools +// detect it with errors.Is to surface "not_found" to the LLM rather +// than a generic error. +var ErrKVNotFound = errors.New("kv: not found") diff --git a/tools/now.go b/tools/now.go new file mode 100644 index 0000000..051d73e --- /dev/null +++ b/tools/now.go @@ -0,0 +1,101 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// nowParams is the LLM-facing param struct for current_time / now. +// +// Why optional `timezone`: most agent prompts know the user's local +// timezone (it's in the chatbot's system prompt) but the agent has +// no way to override on a per-call basis. An explicit arg lets a +// research skill ask "what time is it in NYC for the user reading +// this report?" without needing access to a member-config lookup +// tool. +type nowParams struct { + Timezone string `json:"timezone,omitempty" description:"Optional IANA timezone name (e.g. 'America/Chicago', 'Europe/London'). Defaults to the calling user's configured timezone, falling back to UTC."` +} + +// nowResponse is the JSON envelope returned to the agent. +// +// Why a structured shape: the v1 tool returned a markdown blob. +// Agents that needed just the year had to substring-parse, which +// fails on locale variations. JSON lets the agent pick the field +// it cares about. +type nowResponse struct { + NowISO string `json:"now_iso"` + NowHuman string `json:"now_human"` + Timezone string `json:"timezone"` + Weekday string `json:"weekday"` + Year int `json:"year"` + Month int `json:"month"` + Day int `json:"day"` + Hour int `json:"hour"` + Minute int `json:"minute"` + Second int `json:"second"` + Warning string `json:"warning,omitempty"` +} + +// NewNow constructs the v11 current_time / now tool. The provider +// supplies the calling member's configured timezone (per-user +// localisation). nil falls back to UTC. +// +// V11 keeps the registered tool name "now" for back-compat with the +// existing tool catalog tests AND adds the same tool surface under +// the agent-facing description "current time". The design spec +// called the tool "current_time" but the v1 registry already used +// "now" — switching the registry name would break stored skills' +// `tools` lists. Same name, expanded behaviour. +func NewNow(provider CurrentTimeProvider) tool.Tool { + return tool.NewGatedTool[nowParams]( + "now", + "Return the current time. Optional 'timezone' (IANA name e.g. 'America/Chicago'); defaults to the calling user's configured timezone or UTC. Returns ISO + human-readable formats plus structured year/month/day/weekday for time-relative reasoning. Use this BEFORE assuming a year — the agent's knowledge cut-off may differ from real time.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeGlobal, + SafeForShare: true, + Categories: []string{"utility"}, + }, + func(ctx context.Context, inv tool.Invocation, p nowParams) (string, error) { + tzName := strings.TrimSpace(p.Timezone) + warning := "" + if tzName == "" && provider != nil { + tzName = provider.UserTimezone(ctx, inv.CallerID) + } + if tzName == "" { + tzName = "UTC" + } + loc, err := time.LoadLocation(tzName) + if err != nil { + warning = fmt.Sprintf("unknown timezone %q; falling back to UTC", tzName) + tzName = "UTC" + loc = time.UTC + } + t := time.Now().In(loc) + out := nowResponse{ + NowISO: t.Format(time.RFC3339), + NowHuman: t.Format("Monday, January 2, 2006 at 3:04 PM MST"), + Timezone: tzName, + Weekday: t.Weekday().String(), + Year: t.Year(), + Month: int(t.Month()), + Day: t.Day(), + Hour: t.Hour(), + Minute: t.Minute(), + Second: t.Second(), + Warning: warning, + } + b, mErr := json.Marshal(out) + if mErr != nil { + return "", fmt.Errorf("now: marshal: %w", mErr) + } + return string(b), nil + }, + ) +} diff --git a/tools/quota_provider.go b/tools/quota_provider.go new file mode 100644 index 0000000..72a3879 --- /dev/null +++ b/tools/quota_provider.go @@ -0,0 +1,32 @@ +// quota_provider.go declares the narrow QuotaProvider interface used by +// kv_set and file_save to enforce per-skill byte quotas at write time. +// +// Why a narrow interface (vs importing pkg/logic/skills directly): same +// cycle constraint as kv_storage.go and file_storage.go — pkg/logic/skills +// already imports pkg/skilltools, so importing skills back here would +// form an import cycle. Production wiring (pkg/logic/mort.go) supplies +// *skills.System, which satisfies QuotaProvider via its EffectiveQuota +// method. +// +// Why a separate interface vs adding the method to KVStorage/FileStorage: +// quota resolution is a system-level policy (combining override + convar +// + default), not a pure storage read. Keeping it separate lets a tool +// constructor accept a nil QuotaProvider when an integrator wants to +// skip enforcement (e.g. an admin-only skill that bypasses caps). +package tools + +import "context" + +// QuotaProvider returns effective per-skill quotas for the storage +// tools' write-path enforcement. Production wires *skills.System, which +// satisfies this via its EffectiveQuota method. +// +// nil-safe: tools constructed against a nil QuotaProvider do NOT enforce +// per-skill quotas. That mode is useful for tests and for environments +// where quota enforcement is intentionally disabled. +type QuotaProvider interface { + // EffectiveQuota returns the effective KV and file byte caps for the + // skill. The two values resolve admin overrides + convar defaults + + // package constants in that order. + EffectiveQuota(ctx context.Context, skillID string) (kvMax, filesMax int64, err error) +} diff --git a/tools/research_defaults.go b/tools/research_defaults.go new file mode 100644 index 0000000..a53773f --- /dev/null +++ b/tools/research_defaults.go @@ -0,0 +1,97 @@ +package tools + +import ( + "context" + "sync" +) + +// DefaultResearchConfig returns a ResearchConfig pinned to the v11 +// design defaults. Production wiring overrides via a convar-aware +// adapter; tests use the defaults directly. +func DefaultResearchConfig() ResearchConfig { + return defaultResearchConfig{} +} + +type defaultResearchConfig struct{} + +func (defaultResearchConfig) MaxInlineBytes(_ context.Context) int { return 12 * 1024 } +func (defaultResearchConfig) PDFMaxPages(_ context.Context) int { return 50 } +func (defaultResearchConfig) WebSearchEnabled(_ context.Context) bool { return true } +func (defaultResearchConfig) WebSearchMaxPerRun(_ context.Context) int { return 10 } +func (defaultResearchConfig) ReadPageMaxPerRun(_ context.Context) int { return 10 } +func (defaultResearchConfig) VideoMaxPerRun(_ context.Context) int { return 5 } +func (defaultResearchConfig) VerifyURLMaxPerRun(_ context.Context) int { return 20 } +func (defaultResearchConfig) ReadPDFMaxPerRun(_ context.Context) int { return 5 } +func (defaultResearchConfig) HTTPGetMaxPerRun(_ context.Context) int { return 20 } +func (defaultResearchConfig) HTTPPostMaxPerRun(_ context.Context) int { return 20 } +func (defaultResearchConfig) WebSearchAugmentThreshold(_ context.Context) int { return 5 } + +// InMemorySearchBudget is the package-default SearchBudget — a +// simple per-(run,kind) counter held in a map. NOT +// production-correct because the map persists across the process +// lifetime; production wiring MUST plug a per-run reset. +// +// Why a default at all: tests want a working SearchBudget without +// rolling their own. Documenting the production-correctness gap +// here keeps the production adapter (in mort.go) honest. +type InMemorySearchBudget struct { + cap map[string]int // by kind; "" means "use Default" + + mu sync.Mutex + counts map[string]int // key = runID+"|"+kind +} + +// NewInMemorySearchBudget constructs a default SearchBudget. Pass a +// per-kind cap map (e.g. {"web_search": 10, "read_page": 10}); kinds +// missing from the map fall back to maxPerKindDefault. +func NewInMemorySearchBudget(caps map[string]int) *InMemorySearchBudget { + if caps == nil { + caps = map[string]int{} + } + return &InMemorySearchBudget{ + cap: caps, + counts: make(map[string]int), + } +} + +// CheckAndIncrement implements SearchBudget. Returns the count AFTER +// incrementing on success; the counter is NOT incremented when the +// call would exceed the cap (so a "search_budget_exceeded" rejection +// doesn't burn budget on retry). +func (b *InMemorySearchBudget) CheckAndIncrement(_ context.Context, runID, kind string) (int, int, bool) { + max := b.cap[kind] + if max <= 0 { + max = 10 // safe default + } + b.mu.Lock() + defer b.mu.Unlock() + key := runID + "|" + kind + cur := b.counts[key] + if cur >= max { + return cur, max, true + } + b.counts[key] = cur + 1 + return cur + 1, max, false +} + +// ResetRun is a test helper: clears the counters for a single run +// across all kinds. Production wiring uses its own per-run lifecycle +// (the executor's RunFinalizer interface). +func (b *InMemorySearchBudget) ResetRun(runID string) { + b.mu.Lock() + defer b.mu.Unlock() + prefix := runID + "|" + for k := range b.counts { + if len(k) > len(prefix) && k[:len(prefix)] == prefix { + delete(b.counts, k) + } + } +} + +// StaticTimeProvider is the package-default CurrentTimeProvider — +// returns "" for every member (the tool then falls back to UTC). +// Tests that need a specific timezone wire a one-line struct. +type StaticTimeProvider struct{} + +// UserTimezone implements CurrentTimeProvider with a flat fallback to "". +func (StaticTimeProvider) UserTimezone(_ context.Context, _ string) string { return "" } diff --git a/tools/research_providers.go b/tools/research_providers.go new file mode 100644 index 0000000..d8585d2 --- /dev/null +++ b/tools/research_providers.go @@ -0,0 +1,332 @@ +// Package tools — research provider plumbing for v11. +// +// This file declares the narrow interfaces v11's research tools +// (web_search, read_page, read_video, read_pdf, verify_url, etc.) need +// at execute time. Production wiring lives in pkg/logic/mort.go and +// closes over the searcher chain, the extractor / chromedp client, the +// PDF extractor, and the yt-dlp wrapper. +// +// Why narrow interfaces (vs importing pkg/logic/searcher / extractor +// directly): the same cycle-break pattern used by KVStorage, FileStorage, +// HTTPConfigProvider — keeps pkg/skilltools/tools free of the wiring +// layer so tests can stub each dependency. Each provider is nil-safe: +// the tool surfaces "not configured" at first call rather than failing +// at registration. +// +// Test: each tool under pkg/skilltools/tools/ wired against these +// interfaces has its own *_test.go using the in-package fakes in +// research_providers_fakes_test.go. +package tools + +import ( + "context" + "errors" + "time" +) + +// PageCache is the narrow surface read_page (and read_pdf) consult to +// avoid re-fetching the same URL within the cache's TTL. Production +// wiring bridges this interface to the legacy *cache.Cache held by +// pkg/logic/query.System so a `.query foo.com` and a +// `.skill query foo.com` for the same URL share one cache slot. +// +// Why a narrow interface (vs importing the cache package directly): +// same cycle-break pattern as KVStorage / FileStorage / CitationStorage +// — keeps pkg/skilltools/tools free of the wiring layer. The legacy +// cache slot key is `sha256(url)`; the production adapter is +// responsible for hashing so this interface stays clean (raw URL in/out) +// and skill-tool authors never need to know the slot shape. +// +// nil-safe: a tool constructed with a nil PageCache simply skips the +// cache layer (always treat Get as a miss; Set is a no-op). +// +// Test: tests pass a fake PageCache that records Get/Set calls and +// returns canned hits. See page_cache_test.go for the read_page hit / +// miss scenarios. +type PageCache interface { + // Get returns the cached body for urlStr and true on hit, or + // (nil, false) on miss. Implementations MUST treat any backing- + // store error as a miss (best-effort, never fail the caller). + Get(ctx context.Context, urlStr string) ([]byte, bool) + + // Set writes body under the slot for urlStr with the supplied TTL. + // Implementations MUST swallow backing-store errors (best-effort + // caching is correct: a write failure should not propagate to the + // agent loop). + Set(ctx context.Context, urlStr string, body []byte, ttl time.Duration) +} + +// PageCacheTTL is the default TTL applied by tools that consult a +// PageCache. Mirrors the legacy `query.pageCacheTTL` constant +// (1 hour) so a `.query`-warmed slot reads back from a `.skill query` +// (and vice versa) within the same window. +// +// Tools that want a different TTL pass an explicit value to +// PageCache.Set; this constant is the project default the v11 / v-research +// tools all use. +const PageCacheTTL = 1 * time.Hour + +// PageExtractor is the narrow surface read_page needs at execute +// time. The production adapter wraps mort's existing extractor +// (Ollama web_fetch first, chromedp fallback on JS-heavy pages). +// +// nil-safe: a tool constructed with a nil PageExtractor surfaces +// "not configured" at first call. +// +// Why: read_page used to be a thin io.ReadAll over the URL — it +// missed JS rendering, didn't honour the v6 page cache, and could +// not surface the underlying provider name. v11 routes through this +// interface so the production wiring (mort.go) can plug in the +// existing query-side extractor without exposing query.Agent. +type PageExtractor interface { + // ExtractPage fetches and extracts readable text from urlStr. + // Returns the extracted body, a final URL (after any redirects + // the extractor followed), the provider name ("ollama" | + // "chromedp" | "ytdlp"), and an error. + // + // The returned body is the FULL extracted text — callers apply + // the v10 byte-vs-reference cap before surfacing to the agent. + // + // bypassCache=true skips any page cache and forces a fresh + // extraction. Default false. + ExtractPage(ctx context.Context, urlStr string, bypassCache bool) (text string, finalURL string, provider string, err error) +} + +// VideoTranscriber is the narrow surface read_video needs at +// execute time. Production wiring wraps internal/ytdlp. +// +// nil-safe: tool surfaces "not configured" at first call. +// +// Why a separate interface from PageExtractor: video is a different +// shape (transcript + metadata) and a different binary (yt-dlp). +// Keeping them distinct lets tests stub each independently. +type VideoTranscriber interface { + // ExtractVideoTranscript returns the transcript text and the + // best-effort metadata (title, duration in seconds, channel). + // Implementations MUST return a non-empty transcript or an + // error — empty-transcript success is interpreted by the tool + // as a "transcript_unavailable" failure. + ExtractVideoTranscript(ctx context.Context, urlStr string) (transcript string, meta VideoMeta, err error) +} + +// VideoMeta is best-effort metadata returned alongside a video +// transcript. Any field may be empty/zero if the implementation +// could not extract it. +type VideoMeta struct { + Title string + Channel string + DurationSeconds int +} + +// PDFFetcher is the narrow surface read_pdf needs at execute time. +// Production wiring uses an HTTP-aware fetcher that HEAD-validates +// content-type before downloading the body. +// +// nil-safe: tool surfaces "not configured" at first call. +// +// Why: a tool that just embedded PDF extraction would couple +// fetching + parsing. Splitting the fetch (allowlist + SSRF + +// HEAD check) from the extract (page-level parsing) keeps each +// step testable and lets the same fetcher serve verify_url one +// day if we want a PDF-aware fast path. +type PDFFetcher interface { + // FetchPDF downloads the PDF at urlStr (after HEAD-validating + // content-type) and returns the raw bytes plus the final URL. + // HEAD-validation rejects a URL whose Content-Type is not a + // PDF mime AND whose path does not end in .pdf. + FetchPDF(ctx context.Context, urlStr string) (body []byte, finalURL string, err error) +} + +// PDFExtractor parses PDF bytes into plain text + page count. +// Production wires internal.ExtractPDFText. +// +// Why split from PDFFetcher: tests want to vary the fetch (mock +// server returning bytes) without rebuilding the extractor. +type PDFExtractor interface { + // ExtractPDFText returns the concatenated plain-text content + // of the PDF along with the page count. The caller applies any + // per-page cap and the v10 byte-vs-reference cap on the result. + ExtractPDFText(ctx context.Context, body []byte, maxPages int) (text string, pageCount int, truncated bool, err error) +} + +// HEADChecker is the narrow surface verify_url needs at execute +// time. Production wiring uses the same SSRF-pinned transport as +// http_get so the security envelope is consistent. +// +// Why a separate interface (vs reusing HTTPConfigProvider+doHTTP): +// verify_url's contract is simpler — HEAD only, no body bytes +// returned, and the agent only cares about reachable / status / +// final URL / content-type. A bespoke surface lets the production +// adapter optimise for that path (no body buffer, no body close). +type HEADChecker interface { + // HEAD performs a HEAD request against urlStr (with SSRF + + // allowlist enforcement) and returns the final URL after any + // redirects, the HTTP status code, and the Content-Type header. + // Returns reachable=false with a non-nil err for transport + // failures (DNS, TCP, allowlist rejection); reachable=true with + // any HTTP status (including 4xx/5xx) is the success shape — + // the agent decides whether the URL is "real". + HEAD(ctx context.Context, urlStr string) (finalURL string, status int, contentType string, reachable bool, err error) +} + +// CitationStorage is the narrow surface cite() needs at execute +// time. Production wires *skills.System.Storage(); tests stub. +// +// nil-safe: tool surfaces "not configured" at first call. +// +// Why a narrow interface (vs importing pkg/logic/skills): same +// cycle constraint as KVStorage / FileStorage. Production adapter +// in mort.go bridges to skills.Storage's RecordCitation / +// ListCitations methods AND a separate URL-history tracker. +// +// Two responsibilities, deliberately separate: +// +// 1. RecordCitation writes a row into skill_run_sources — this is +// the user-visible citations table for the Sources panel and +// CSV export. ONLY rows the agent successfully cited via +// cite() land here. +// 2. RecordURLTouch / GetTouchedURLs maintains a per-run set of +// URLs the agent has interacted with (web_search results, +// read_page input, read_pdf input, read_video input). cite() +// reads this set to reject claims for URLs the agent never +// touched. This set lives in a different table or scope from +// the citations table — it's working state, not a record. +type CitationStorage interface { + // RecordCitation appends one (run_id, url, claim, cited_at) + // row to the citations table (skill_run_sources). cited_at is + // set by the storage layer to time.Now() when zero. The caller + // has already verified the URL is in the touched-URL set + // (via GetTouchedURLs); this method is the persistence step. + RecordCitation(ctx context.Context, runID, url, claim string) error + + // RecordURLTouch records that the agent has interacted with + // `url` during `runID`. Called by web_search (per result), + // read_page, read_pdf, and read_video. Idempotent — repeat + // calls for the same (run_id, url) are no-ops at the storage + // layer. + RecordURLTouch(ctx context.Context, runID, url string) error + + // GetTouchedURLs returns the set of URLs the run has + // interacted with. Used by cite() to verify that a claim's + // URL is one the agent actually visited. Empty for a fresh + // run — cite() then rejects every claim with + // "url_not_in_run_history". + GetTouchedURLs(ctx context.Context, runID string) (map[string]struct{}, error) + + // ListCitations returns all citations recorded for the run, in + // insertion order. Powers the /skills/{id}/runs/{run_id} + // Sources panel. + ListCitations(ctx context.Context, runID string) ([]CitationRow, error) +} + +// CitationRow mirrors the skill_run_sources row shape. Fields +// match the spec: run_id is implicit in the query, url + claim are +// what the agent submitted, cited_at is the wall-clock timestamp +// at insert. +type CitationRow struct { + URL string + Claim string + CitedAt int64 // unix-seconds; storage adapter normalises from time.Time +} + +// CurrentTimeProvider exposes a "now" + per-user timezone lookup. +// Production wiring closes over the bot's member-config getter. +// +// nil-safe: a tool constructed with a nil provider falls back to +// server-time + UTC (current behaviour of NewNow before v11). +type CurrentTimeProvider interface { + // UserTimezone returns the IANA timezone name configured for + // the given Discord member ID, or "" when the member has no + // timezone configured. Empty fallback is "UTC". + UserTimezone(ctx context.Context, memberID string) string +} + +// SearchBudget is the narrow surface web_search reads at execute +// time to honour skills.web_search.max_per_run. +// +// Production wiring closes over a per-run counter held by the +// executor. nil-safe: tool falls back to a built-in package +// counter (process-wide, NOT per-run) — useful for tests but NOT +// production-correct because budget bleeds across runs. The +// production adapter MUST be wired. +type SearchBudget interface { + // CheckAndIncrement returns the current count AFTER incrementing + // for the given runID, the configured max, and an error when + // the call would exceed the cap. The handler returns a clean + // "search_budget_exceeded" string on exceed (not an error so + // the agent can react). + CheckAndIncrement(ctx context.Context, runID, kind string) (count, max int, exceeded bool) +} + +// ResearchConfig is the narrow surface that read_page / read_video / +// read_pdf / verify_url read at execute time for per-tool budget caps +// and inline-vs-file_id thresholds. Production wiring closes over +// the relevant convars. +// +// nil-safe: tools fall back to package defaults. +type ResearchConfig interface { + // MaxInlineBytes returns the cap above which extracted text is + // persisted as a file_id under run-scope (v10 byte-vs-reference + // principle). Default 12 KiB. + MaxInlineBytes(ctx context.Context) int + + // PDFMaxPages returns the cap on pages extracted from a PDF + // before truncation. Default 50. + PDFMaxPages(ctx context.Context) int + + // WebSearchEnabled is the master switch for web_search. + WebSearchEnabled(ctx context.Context) bool + + // WebSearchMaxPerRun is the per-run search cap. + WebSearchMaxPerRun(ctx context.Context) int + + // ReadPageMaxPerRun is the per-run page-read cap. + ReadPageMaxPerRun(ctx context.Context) int + + // VideoMaxPerRun is the per-run video-read cap. + VideoMaxPerRun(ctx context.Context) int + + // VerifyURLMaxPerRun is the per-run HEAD-check cap. + VerifyURLMaxPerRun(ctx context.Context) int + + // ReadPDFMaxPerRun is the per-run PDF-read cap. + ReadPDFMaxPerRun(ctx context.Context) int + + // HTTPGetMaxPerRun (v15.2) is the per-run http_get cap. The agent + // otherwise can retry-storm through random URLs and bloat its own + // context with each tool result. Default 20. + HTTPGetMaxPerRun(ctx context.Context) int + + // HTTPPostMaxPerRun (v15.2) is the per-run http_post cap. Default 20. + HTTPPostMaxPerRun(ctx context.Context) int + + // WebSearchAugmentThreshold is the minimum number of primary + // (Ollama) results required to skip the secondary (DDG/Brave) + // search. When the primary backend returns fewer than this many + // results, the augmented searcher also queries the secondary and + // merges both result sets. Default 5. + WebSearchAugmentThreshold(ctx context.Context) int + + // ReplyChainDepthMax is unused here; placeholder shape for + // future per-tool caps. Kept off this interface — callers reach + // into the convar reader directly when they need it. +} + +// ErrPageExtractionFailed is the sentinel returned by a PageExtractor +// when both Ollama and chromedp paths produce empty content. +var ErrPageExtractionFailed = errors.New("page extraction failed: empty content") + +// ErrVideoTranscriptUnavailable is the sentinel returned by a +// VideoTranscriber when no captions / transcript could be obtained. +var ErrVideoTranscriptUnavailable = errors.New("video transcript unavailable") + +// ErrPDFNotPDF is the sentinel returned by a PDFFetcher when the +// HEAD response indicates a non-PDF content-type AND the URL path +// has no .pdf extension. Surfaces a clean "url_is_not_a_pdf" +// rejection rather than a generic transport error. +var ErrPDFNotPDF = errors.New("url does not serve a PDF") + +// ErrPDFEncrypted is returned by a PDFExtractor when the PDF refuses +// extraction because it is password-protected. Surfaces a clean +// "pdf_encrypted" rejection. +var ErrPDFEncrypted = errors.New("pdf is encrypted") diff --git a/tools/scope_validate.go b/tools/scope_validate.go new file mode 100644 index 0000000..02779b1 --- /dev/null +++ b/tools/scope_validate.go @@ -0,0 +1,113 @@ +// scope_validate.go centralises the storage-scope authorisation check +// shared by every v4 KV and file tool. It enforces: +// +// - "skill" — always allowed (the skill's shared, cross-caller area). +// - "user:" — allowed if it matches inv.CallerID (or admin). +// - "user:" — allowed only for admin callers. +// - "run:" — allowed if it matches inv.RunID (or admin). +// - "run:" — allowed only for admin callers. +// - "root_run:" — allowed if it matches inv.RootRunID (or admin): +// the dispatch tree's SHARED scratchpad, readable +// and writable by every run under one root +// (parallel sibling workers coordinate here). +// - any other shape — rejected with a descriptive error. +// +// Why a single helper (vs inline checks in each tool): the parsing rules +// must match exactly across kv_get/set/list/delete and file_save/get/ +// list/delete. Centralising them means one place to fix when the +// vocabulary evolves and one place for the test matrix. +// +// Why the isAdmin parameter: the v4 Invocation does NOT carry an +// admin flag — the executor sets inv.CallerIsAdmin via the host AdminPolicy; tools pass it through +// parameter exists for tests (which exercise the admin paths) and for a +// future Invocation extension that adds an admin signal without +// breaking this helper's signature. +package tools + +import ( + "fmt" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// ValidateScope rejects scope strings the caller is not authorised to +// access. See file-level doc for the exact ruleset. +// +// Why isAdmin is parameterised: tests pass true to verify admin paths; +// production tools currently always pass false because Invocation +// doesn't carry admin status. The gate is "you can access your own +// scope only" until a future extension threads an admin signal through +// the executor. +func ValidateScope(inv tool.Invocation, scope string, isAdmin bool) error { + if scope == "skill" { + return nil + } + if rest, ok := strings.CutPrefix(scope, "user:"); ok { + if rest == "" { + return fmt.Errorf("scope: empty user id after 'user:'") + } + if rest == inv.CallerID { + return nil + } + if isAdmin { + return nil + } + return fmt.Errorf("scope %q: cannot access another user's storage", scope) + } + if rest, ok := strings.CutPrefix(scope, "root_run:"); ok { + if rest == "" { + return fmt.Errorf("scope: empty run id after 'root_run:'") + } + // The dispatch tree's shared scratchpad. Every run in one tree + // carries the same RootRunID (stamped by both executors from the + // dispatchguard chain), so siblings spawned in parallel — even + // ephemeral workers with distinct agent IDs — validate against + // the same scope string. Storage-side, root_run scopes live in + // the shared RootRunKVPartition; this check is the isolation + // boundary between trees. + if rest == inv.RootRunID && inv.RootRunID != "" { + return nil + } + if isAdmin { + return nil + } + return fmt.Errorf("scope %q: cannot access another dispatch tree's storage", scope) + } + if rest, ok := strings.CutPrefix(scope, "run:"); ok { + if rest == "" { + return fmt.Errorf("scope: empty run id after 'run:'") + } + if rest == inv.RunID { + return nil + } + // V10: when this run is a reply continuation, the agent may + // access the PARENT run's scope. The parent's run-scope KV is + // the natural carrier for "ask user a question, save state, + // resume on reply" — without this access, every continuation + // would have to re-derive state from parent_output alone. + // Note: the parent's run-scope is subject to the v4 + // auto-purge (24h after parent finished). Long-delayed replies + // will see an empty scope. + if inv.Continuation != nil && rest == inv.Continuation.ParentRunID { + return nil + } + // V14: when this run is invoked via skill_invoke / + // skill_invoke_parallel from a parent skill, the agent may + // access the PARENT run's scope. This is the natural carrier + // for the "scout fans out, parent reads consolidated state" + // pattern that deepresearch uses — research-scout writes its + // touched-URL list under run: and the parent + // reads it back during the investigate phase. Without this + // access, every parent/child handoff would have to be + // serialised through tool-result strings. + if inv.ParentRunID != "" && rest == inv.ParentRunID { + return nil + } + if isAdmin { + return nil + } + return fmt.Errorf("scope %q: cannot access another run's storage", scope) + } + return fmt.Errorf("scope %q: unknown shape; expected 'skill', 'user:', 'run:', or 'root_run:'", scope) +} diff --git a/tools/store.go b/tools/store.go new file mode 100644 index 0000000..0ead05b --- /dev/null +++ b/tools/store.go @@ -0,0 +1,77 @@ +package tools + +import ( + "context" + "errors" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// StoreDeps wires the persistent-memory tools (kv_* and file_*). A host +// supplies its KV and/or File backends; the kv group registers only when KV is +// set and the file group only when Files is set, so a host can take just one. +// Everything else has a sensible default: +// +// - Quota defaults to a generous static cap (a host that meters per-skill +// storage supplies its own QuotaProvider). +// - FileSearch / Minter+BaseURL are optional — file_search and +// create_file_url register only when wired. +// - MaxValueBytes / MaxFileBytes default when non-positive. +type StoreDeps struct { + KV KVStorage + Files FileStorage + Quota QuotaProvider + FileSearch FileSearcher + Minter FileTokenMinter + BaseURL string + + MaxValueBytes int // kv_set per-value cap; default 256 KiB + MaxFileBytes int // file_save per-file cap; default 16 MiB +} + +// RegisterStore registers the kv_* tools (when KV is set) and the file_* tools +// (when Files is set). At least one of KV/Files is required. +func RegisterStore(reg tool.Registry, d StoreDeps) error { + if d.KV == nil && d.Files == nil { + return errors.New("tools: RegisterStore needs at least KV or Files") + } + if d.Quota == nil { + d.Quota = staticQuota{kvMax: 64 << 20, filesMax: 1 << 30} + } + if d.MaxValueBytes <= 0 { + d.MaxValueBytes = 256 << 10 + } + if d.MaxFileBytes <= 0 { + d.MaxFileBytes = 16 << 20 + } + + var ts []tool.Tool + if d.KV != nil { + ts = append(ts, + NewKVGet(d.KV), NewKVSet(d.KV, d.Quota, d.MaxValueBytes), + NewKVList(d.KV), NewKVDelete(d.KV), + ) + } + if d.Files != nil { + ts = append(ts, + NewFileSave(d.Files, d.Quota, d.MaxFileBytes), + NewFileGet(d.Files), NewFileGetText(d.Files), NewFileGetMetadata(d.Files), + NewFileList(d.Files), NewFileDelete(d.Files), + ) + if d.FileSearch != nil { + ts = append(ts, NewFileSearch(d.FileSearch)) + } + if d.Minter != nil && d.BaseURL != "" { + ts = append(ts, NewCreateFileURL(d.Minter, d.Files, d.BaseURL)) + } + } + return registerAll(reg, ts...) +} + +// staticQuota is the default QuotaProvider: a fixed KV/file byte cap for every +// skill. A host that needs per-skill metering supplies its own. +type staticQuota struct{ kvMax, filesMax int64 } + +func (q staticQuota) EffectiveQuota(context.Context, string) (kvMax, filesMax int64, err error) { + return q.kvMax, q.filesMax, nil +} diff --git a/tools/summarize.go b/tools/summarize.go new file mode 100644 index 0000000..dc8783c --- /dev/null +++ b/tools/summarize.go @@ -0,0 +1,243 @@ +// Package tools — v12 summarize. +// +// One fast-tier LLM call: text in → concise text summary out. Either +// `text` or `file_id` (mutually exclusive) supplies the source. Per-run +// budget enforced via the existing v11 SearchBudget surface (kind= +// "summarize"); per-skill cost accounting via the meta-LLM helper's +// ledger (skill_llm_meta_calls). +// +// Why a dedicated tool (vs reusing summary_summarise): summary_ +// summarise wraps the URL-summary pipeline used by /summary; it's +// over-coupled to a specific extraction flow. v12's summarize is the +// "given any text, give me a summary" primitive that downstream tools +// (read_page → summarize, extract → summarize) can compose freely. +// +// File-id input path: when the caller supplies file_id, we dereference +// via FileStorage. Cross-skill check rejects stolen IDs (matching +// file_get's pattern). Scope check denies user:bob's file from alice's +// invocation. +// +// Test: summarize_test.go covers happy path (mock helper), file_id +// input, oversize input truncation, budget exceeded, focus-arg +// pass-through, cross-skill file_id rejection, and the +// missing-both-args validation. +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/llmmeta" + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// summarizeMaxInputBytes is the hard input cap. Inputs longer than +// this are truncated with a `truncated=true` flag in the response so +// the agent knows the summary covers a prefix. +const summarizeMaxInputBytes = 32 * 1024 + +// summarizeDefaultMaxWords is the default max_words when the caller +// doesn't supply one. Capped further by skills.summarize.max_words. +const summarizeDefaultMaxWords = 200 + +// summarizeFallbackMaxWords is the cap used when SummarizeConfig is nil. +const summarizeFallbackMaxWords = 1000 + +// summarizeFallbackMaxPerRun is the per-run cap used when SummarizeConfig +// is nil. +const summarizeFallbackMaxPerRun = 10 + +// SummarizeConfig is the narrow per-run + per-deployment config surface +// summarize reads at execute time. Production wires a closure over the +// `skills.summarize.*` convars; nil falls back to package defaults. +type SummarizeConfig interface { + MaxPerRun(ctx context.Context) int + MaxWords(ctx context.Context) int +} + +// summarizeArgs is the LLM-facing param struct. +// +// Why two source fields (text + file_id) with exactly-one validation: +// the agent often produces large content via read_page / read_pdf and +// stores it as a file_id (per the v10 byte-vs-reference principle); +// forcing it to round-trip through a string would defeat the file_id +// pattern. Inline `text` is the simpler path for short snippets. +type summarizeArgs struct { + Text string `json:"text,omitempty" description:"The text to summarise. Either 'text' OR 'file_id' is required (not both). Capped at 32KB; longer inputs truncate with truncated=true in the result."` + FileID string `json:"file_id,omitempty" description:"Alternative to 'text': summarise the contents of a saved file (from read_page/read_pdf/file_save). Must belong to this skill."` + MaxWords int `json:"max_words,omitempty" description:"Maximum word count for the summary. Default 200, capped at skills.summarize.max_words (default 1000)."` + Focus string `json:"focus,omitempty" description:"Optional: what aspect to emphasise (e.g. 'security implications', 'cost analysis', 'main characters')."` +} + +type summarizeResult struct { + Summary string `json:"summary"` + WordCount int `json:"word_count"` + ModelUsed string `json:"model_used"` + Truncated bool `json:"truncated,omitempty"` + BudgetMsg string `json:"budget_message,omitempty"` + Error string `json:"error,omitempty"` +} + +// NewSummarize constructs the summarize tool. helper / cfg / budget / +// fileStorage may all be nil; the handler surfaces clean errors at +// first call. +func NewSummarize(helper *llmmeta.Helper, cfg SummarizeConfig, budget SearchBudget, fileStorage FileStorage) tool.Tool { + return tool.NewGatedTool[summarizeArgs]( + "summarize", + "Produce a concise summary of input text using a fast LLM. Pass either 'text' or 'file_id' (one of them is required). Optional 'focus' steers the summary; 'max_words' caps length (default 200). Counts against per-run and 7-day cost budgets.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeCaller, + SafeForShare: true, + Categories: []string{"llm-meta", "cost-bearing"}, + }, + func(ctx context.Context, inv tool.Invocation, args summarizeArgs) (string, error) { + if helper == nil { + return "", fmt.Errorf("summarize: not configured") + } + text, truncated, err := loadSummarizeInput(ctx, inv, args, fileStorage) + if err != nil { + return marshalSummarizeResult(summarizeResult{Error: err.Error()}), nil + } + + // Per-run budget BEFORE the LLM call so a runaway loop is + // bounded. + if budget == nil { + maxPerRun := summarizeFallbackMaxPerRun + if cfg != nil { + maxPerRun = cfg.MaxPerRun(ctx) + } + budget = NewInMemorySearchBudget(map[string]int{ + "summarize": maxPerRun, + }) + } + count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "summarize") + if exceeded { + return marshalSummarizeResult(summarizeResult{ + Error: "summarize_budget_exceeded", + BudgetMsg: fmt.Sprintf("per-run summarize budget exceeded (%d/%d). Work with the summaries you already have, or ask an admin to raise skills.summarize.max_per_run.", count, max), + }), nil + } + + maxWords := args.MaxWords + if maxWords <= 0 { + maxWords = summarizeDefaultMaxWords + } + cap := summarizeFallbackMaxWords + if cfg != nil { + cap = cfg.MaxWords(ctx) + } + if maxWords > cap { + maxWords = cap + } + + systemPrompt := "You produce concise, accurate summaries. Honor the requested word count. Do NOT invent facts." + userPrompt := buildSummarizePrompt(text, maxWords, args.Focus) + + res, callErr := helper.Call(ctx, llmmeta.CallSpec{ + Tier: "fast", + SystemPrompt: systemPrompt, + UserPrompt: userPrompt, + MaxOutputTokens: maxWords * 8, // ~8 tokens per word upper bound + ResponseFormat: "text", + ToolName: "summarize", + RunID: inv.RunID, + SkillID: inv.SkillID, + CallerID: inv.CallerID, + }) + if callErr != nil { + return "", callErr + } + if !res.Success || res.Text == "" { + kind := res.ErrorKind + if kind == "" { + kind = "llm_unavailable" + } + return marshalSummarizeResult(summarizeResult{Error: kind}), nil + } + summary := strings.TrimSpace(res.Text) + return marshalSummarizeResult(summarizeResult{ + Summary: summary, + WordCount: countWords(summary), + ModelUsed: res.ModelUsed, + Truncated: truncated, + }), nil + }, + ) +} + +// loadSummarizeInput resolves the input text from either args.Text or +// args.FileID. Exactly one MUST be supplied; both empty AND both +// populated are rejected. +func loadSummarizeInput(ctx context.Context, inv tool.Invocation, args summarizeArgs, fileStorage FileStorage) (string, bool, error) { + hasText := strings.TrimSpace(args.Text) != "" + hasFile := strings.TrimSpace(args.FileID) != "" + if hasText == hasFile { + // Both empty OR both populated. + if !hasText { + return "", false, fmt.Errorf("summarize: one of 'text' or 'file_id' is required") + } + return "", false, fmt.Errorf("summarize: 'text' and 'file_id' are mutually exclusive — pass one") + } + if hasText { + return capInput(args.Text) + } + if fileStorage == nil { + return "", false, fmt.Errorf("summarize: file_id input requires file storage to be configured") + } + meta, content, err := fileStorage.FileGet(ctx, args.FileID) + if err != nil { + if errors.Is(err, ErrFileNotFound) { + return "", false, fmt.Errorf("summarize: file_id not found") + } + return "", false, fmt.Errorf("summarize: file fetch: %w", err) + } + if meta.SkillID != inv.SkillID { + return "", false, fmt.Errorf("summarize: file does not belong to this skill") + } + if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil { + return "", false, fmt.Errorf("summarize: %w", err) + } + return capInput(string(content)) +} + +// capInput truncates input to the hard byte cap, returning the +// (possibly truncated) text and a flag indicating truncation occurred. +func capInput(text string) (string, bool, error) { + if len(text) <= summarizeMaxInputBytes { + return text, false, nil + } + return truncateUTF8(text, summarizeMaxInputBytes), true, nil +} + +// buildSummarizePrompt composes the user message handed to the LLM. +func buildSummarizePrompt(text string, maxWords int, focus string) string { + var sb strings.Builder + fmt.Fprintf(&sb, "Summarise the following text in at most %d words.", maxWords) + if focus = strings.TrimSpace(focus); focus != "" { + fmt.Fprintf(&sb, " Emphasise: %s.", focus) + } + sb.WriteString("\n\n") + sb.WriteString(text) + return sb.String() +} + +// countWords returns a rough word count via whitespace splitting. +// Good enough for the response's word_count column; the agent might +// see slight discrepancies vs the LLM's internal counter, which is +// acceptable. +func countWords(text string) int { + return len(strings.Fields(text)) +} + +// marshalSummarizeResult serialises a summarizeResult to JSON. +func marshalSummarizeResult(r summarizeResult) string { + b, err := json.Marshal(r) + if err != nil { + return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err) + } + return string(b) +} diff --git a/tools/think.go b/tools/think.go new file mode 100644 index 0000000..7af45c7 --- /dev/null +++ b/tools/think.go @@ -0,0 +1,70 @@ +// Package tools — v11 think. +// +// Pure prompt-engineering tool: the agent's "thought" is recorded +// to skill_run_logs (via the audit hook the gated wrapper applies +// transparently) but produces no side effect. The literature on +// agent design notes that giving an agent an explicit `think` tool +// keeps it on plan better than giving it nothing — without one, +// agents tend to either skip planning OR babble into the final +// output. With one, planning lands in tool calls and the final +// output stays clean. +// +// V11 deliberately rejects empty thoughts. An agent that learns +// "calling think with empty args is free" will spam it; a +// rejection forces the call to actually carry reasoning. +package tools + +import ( + "context" + "strings" + + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +type thinkParams struct { + Thought string `json:"thought" description:"Your reasoning. May be a plan, a working hypothesis, an analysis of a tool result, or anything else you'd note in a private scratchpad. Empty input is rejected — make this load-bearing."` +} + +// thinkResponse is intentionally minimal. The agent doesn't need +// machine-readable output; the value is the audit trail + the +// implicit "now you've planned, what's next" prompting the call +// gives the agent loop. +type thinkResponse struct { + OK bool `json:"ok"` + Error string `json:"error,omitempty"` +} + +// NewThink constructs the v11 think tool. No deps — the audit +// hook wrapper handles persistence transparently. +func NewThink() tool.Tool { + return tool.NewGatedTool[thinkParams]( + "think", + "Record a thought / plan / working hypothesis. The thought is logged to the run trace but does NOT affect any external state. Use to slow down before a tricky tool call, sketch a multi-step plan, or summarise findings before continuing. Empty thoughts are rejected.", + tool.Permission{ + AuthoringRequirement: tool.RequirementAnyone, + OperatesOn: tool.ScopeGlobal, + SafeForShare: true, + Categories: []string{"utility"}, + }, + func(_ context.Context, _ tool.Invocation, p thinkParams) (string, error) { + if strings.TrimSpace(p.Thought) == "" { + // Returns ok:false in a structured envelope rather + // than an error so the agent loop continues with a + // recoverable signal. + return `{"ok":false,"error":"empty_thought"}`, nil + } + // Successful think emits a flat JSON. The audit hook + // (auto-injected by NewGatedTool) writes the args + result + // pair so the trace UI shows the thought verbatim. + return `{"ok":true}`, nil + }, + ) +} + +// Note: returning a hand-rolled JSON literal instead of a marshaller +// keeps think the cheapest possible tool — no heap allocation, no +// json.Marshal call, no goroutine-local buffer churn. The two output +// shapes are static. If a future field is added to thinkResponse, +// switch back to json.Marshal — but until then, the literal is the +// idiom that matches the tool's "do nothing" intent. +var _ = thinkResponse{} // declared so vet doesn't flag the unused struct diff --git a/tools/tools.go b/tools/tools.go new file mode 100644 index 0000000..f3a06e7 --- /dev/null +++ b/tools/tools.go @@ -0,0 +1,96 @@ +// Package tools is executus's library of generic, host-agnostic agent tools. +// +// A host registers the tools it wants against a tool.Registry, then runs an +// agent whose RunnableAgent.LowLevelTools name them. Tools split two ways: +// +// - Always-available, zero-configuration tools register via Register (think, +// now, cite) — all nil-safe, so a light host (gadfly) calls Register and is +// immediately useful. +// - Backed tools take a nil-safe Deps describing their host backend and +// register via grouped registrars (RegisterMeta, and RegisterWeb/Store/… +// as they land). Each Deps ships sensible defaults so "some setup" is small. +// +// A host adds its own domain tools against the SAME registry. +package tools + +import ( + "context" + "errors" + + "gitea.stevedudenhoeffer.com/steve/executus/llmmeta" + "gitea.stevedudenhoeffer.com/steve/executus/tool" +) + +// Register adds the always-available, zero-configuration generic tools: +// +// - think — record a thought to the run trace (no external effect) +// - now — current time (UTC unless a CurrentTimeProvider is wired) +// - cite — record a source citation (inert unless a CitationStorage is wired) +// +// All are nil-safe. Returns the first registration error. +func Register(reg tool.Registry) error { + return registerAll(reg, + NewThink(), + NewNow(nil), + NewCite(nil), + ) +} + +// MetaDeps wires the LLM-backed meta tools (classify, extract_entities, +// summarize). Helper is required. Budget defaults to an in-memory per-run cap; +// Files is optional (summarize's file_id input is inert without it); MaxPerRun +// and MaxWords default when non-positive. +type MetaDeps struct { + Helper *llmmeta.Helper + Budget SearchBudget + Files FileStorage + MaxPerRun int // per-run cap for each meta tool; default 10 + MaxWords int // summarize length cap; default 200 +} + +// RegisterMeta adds classify, extract_entities, and summarize. It requires a +// configured llmmeta.Helper (the fast-tier meta-LLM caller); everything else +// defaults. +func RegisterMeta(reg tool.Registry, d MetaDeps) error { + if d.Helper == nil { + return errors.New("tools: MetaDeps.Helper is required for the meta tools") + } + if d.MaxPerRun <= 0 { + d.MaxPerRun = 10 + } + if d.MaxWords <= 0 { + d.MaxWords = 200 + } + if d.Budget == nil { + // Build the default budget WITH the configured per-run cap so + // MetaDeps.MaxPerRun is honored — an empty caps map would fall back to + // the budget's hardcoded default and silently ignore MaxPerRun. + d.Budget = NewInMemorySearchBudget(map[string]int{ + "classify": d.MaxPerRun, + "extract_entities": d.MaxPerRun, + "summarize": d.MaxPerRun, + }) + } + cfg := fixedMetaConfig{maxPerRun: d.MaxPerRun, maxWords: d.MaxWords} + return registerAll(reg, + NewClassify(d.Helper, cfg, d.Budget), + NewExtractEntities(d.Helper, cfg, d.Budget), + NewSummarize(d.Helper, cfg, d.Budget, d.Files), + ) +} + +func registerAll(reg tool.Registry, ts ...tool.Tool) error { + for _, t := range ts { + if err := reg.Register(t); err != nil { + return err + } + } + return nil +} + +// fixedMetaConfig satisfies ClassifyConfig / ExtractEntitiesConfig / +// SummarizeConfig with static caps read from MetaDeps. +type fixedMetaConfig struct{ maxPerRun, maxWords int } + +func (c fixedMetaConfig) MaxPerRun(context.Context) int { return c.maxPerRun } +func (c fixedMetaConfig) MaxWords(context.Context) int { return c.maxWords } diff --git a/tools/truncate.go b/tools/truncate.go new file mode 100644 index 0000000..dd2429b --- /dev/null +++ b/tools/truncate.go @@ -0,0 +1,18 @@ +package tools + +import "unicode/utf8" + +// truncateUTF8 returns s truncated to at most maxBytes, backing off to the last +// complete UTF-8 rune boundary so a multibyte rune (CJK, emoji, …) is never +// split — a byte-boundary cut would hand the LLM invalid UTF-8 / replacement +// chars. Used by the meta tools' input caps. +func truncateUTF8(s string, maxBytes int) string { + if len(s) <= maxBytes { + return s + } + s = s[:maxBytes] + for len(s) > 0 && !utf8.ValidString(s) { + s = s[:len(s)-1] + } + return s +}