P3: meta + primitive tool group (think/now/cite + classify/extract/summarize)

Grow executus/tools into a real generic tool library:

- Register(reg): the always-available, zero-config tools — think, now (UTC
  unless a CurrentTimeProvider is wired), cite (inert unless a CitationStorage
  is wired). All nil-safe; a light host calls Register and is useful.
- RegisterMeta(reg, MetaDeps): the LLM-backed meta tools — classify,
  extract_entities, summarize — over the llmmeta helper. Budget defaults to the
  shipped in-memory per-run cap; Files optional; caps default.
- Seams moved (interface/type-only, no host coupling): research_providers.go
  (CurrentTimeProvider/CitationStorage/SearchBudget/PageExtractor/PDFFetcher/…)
  and file_storage.go (FileStorage + FileDomainMeta). Plus the in-memory budget
  default (research_defaults.go) and scope_validate.go.

calculate deferred (drags github.com/Krognol/go-wolfram + a module-path replace
— not worth it in the lean core for one tool). Core go.sum still free of
gorm/redis/discordgo/sqlite/wolfram.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 21:00:45 -04:00
parent df95425bb5
commit 1e201550b3
11 changed files with 1802 additions and 17 deletions
+128
View File
@@ -0,0 +1,128 @@
// Package tools — v11 cite.
//
// Anti-hallucination forcing function. The convention: agents call
// cite(claim, url) for every numbered reference in their final
// answer. The tool verifies the URL appears in the run's
// touched-URL set (populated by web_search results +
// read_page/read_pdf/read_video). If yes → write to
// skill_run_sources, return {ok: true}. If no → return
// {ok: false, reason: "url_not_in_run_history"} and DO NOT write.
//
// Skills authored without this discipline don't lose anything;
// skills WITH it produce more reliable citations. The webui
// renders the skill_run_sources rows as a Sources panel on the
// run trace page — invisible to skills that don't use cite().
package tools
import (
"context"
"encoding/json"
"fmt"
"strings"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
// citeParams is the LLM-facing param struct.
type citeParams struct {
Claim string `json:"claim" description:"The claim or fact you are asserting (e.g. 'Mort was published in 1987')."`
URL string `json:"url" description:"The URL that supports the claim. MUST be a URL the agent has previously read via read_page/read_pdf/read_video or seen as a web_search result."`
}
// citeResponse is the JSON envelope returned to the agent.
//
// On success: ok=true, the skill_run_sources row was written.
// On failure: ok=false, reason=<one of the documented sentinels>.
type citeResponse struct {
OK bool `json:"ok"`
Reason string `json:"reason,omitempty"`
Claim string `json:"claim,omitempty"`
URL string `json:"url,omitempty"`
}
// NewCite constructs the v11 cite tool. cs may be nil — handler
// returns "not configured" at first call.
//
// The "anyone author / share-safe" permission shape matches every
// other v11 research-class tool. Skills that adopt cite() get the
// Sources panel automatically; skills that don't are unaffected.
func NewCite(cs CitationStorage) tool.Tool {
return tool.NewGatedTool[citeParams](
"cite",
"Record a citation: a claim + the URL that supports it. The URL MUST be one the agent has actually fetched via read_page/read_pdf/read_video or seen as a web_search result — citing a URL the agent never visited is rejected with reason 'url_not_in_run_history'. Successful citations populate the run's Sources panel.",
tool.Permission{
AuthoringRequirement: tool.RequirementAnyone,
OperatesOn: tool.ScopeGlobal,
SafeForShare: true,
Categories: []string{"citation"},
},
func(ctx context.Context, inv tool.Invocation, p citeParams) (string, error) {
if cs == nil {
return "", fmt.Errorf("cite: citation storage not configured")
}
claim := strings.TrimSpace(p.Claim)
urlStr := strings.TrimSpace(p.URL)
if claim == "" {
return marshalCite(citeResponse{
OK: false,
Reason: "claim_empty",
}), nil
}
if urlStr == "" {
return marshalCite(citeResponse{
OK: false,
Reason: "url_empty",
}), nil
}
if inv.RunID == "" {
// No run id → cite() can't verify history. Bail loud.
return marshalCite(citeResponse{
OK: false,
Reason: "no_run_context",
Claim: claim,
URL: urlStr,
}), nil
}
touched, err := cs.GetTouchedURLs(ctx, inv.RunID)
if err != nil {
return marshalCite(citeResponse{
OK: false,
Reason: fmt.Sprintf("touched_lookup_failed: %v", err),
Claim: claim,
URL: urlStr,
}), nil
}
if _, ok := touched[urlStr]; !ok {
return marshalCite(citeResponse{
OK: false,
Reason: "url_not_in_run_history",
Claim: claim,
URL: urlStr,
}), nil
}
if err := cs.RecordCitation(ctx, inv.RunID, urlStr, claim); err != nil {
return marshalCite(citeResponse{
OK: false,
Reason: fmt.Sprintf("record_failed: %v", err),
Claim: claim,
URL: urlStr,
}), nil
}
return marshalCite(citeResponse{
OK: true,
Claim: claim,
URL: urlStr,
}), nil
},
)
}
func marshalCite(r citeResponse) string {
b, err := json.Marshal(r)
if err != nil {
return fmt.Sprintf(`{"ok":false,"reason":"marshal_failed: %v"}`, err)
}
return string(b)
}