// Package tools — research provider plumbing for v11. // // This file declares the narrow interfaces v11's research tools // (web_search, read_page, read_video, read_pdf, verify_url, etc.) need // at execute time. Production wiring lives in pkg/logic/mort.go and // closes over the searcher chain, the extractor / chromedp client, the // PDF extractor, and the yt-dlp wrapper. // // Why narrow interfaces (vs importing pkg/logic/searcher / extractor // directly): the same cycle-break pattern used by KVStorage, FileStorage, // HTTPConfigProvider — keeps pkg/skilltools/tools free of the wiring // layer so tests can stub each dependency. Each provider is nil-safe: // the tool surfaces "not configured" at first call rather than failing // at registration. // // Test: each tool under pkg/skilltools/tools/ wired against these // interfaces has its own *_test.go using the in-package fakes in // research_providers_fakes_test.go. package tools import ( "context" "errors" "time" ) // PageCache is the narrow surface read_page (and read_pdf) consult to // avoid re-fetching the same URL within the cache's TTL. Production // wiring bridges this interface to the legacy *cache.Cache held by // pkg/logic/query.System so a `.query foo.com` and a // `.skill query foo.com` for the same URL share one cache slot. // // Why a narrow interface (vs importing the cache package directly): // same cycle-break pattern as KVStorage / FileStorage / CitationStorage // — keeps pkg/skilltools/tools free of the wiring layer. The legacy // cache slot key is `sha256(url)`; the production adapter is // responsible for hashing so this interface stays clean (raw URL in/out) // and skill-tool authors never need to know the slot shape. // // nil-safe: a tool constructed with a nil PageCache simply skips the // cache layer (always treat Get as a miss; Set is a no-op). // // Test: tests pass a fake PageCache that records Get/Set calls and // returns canned hits. See page_cache_test.go for the read_page hit / // miss scenarios. type PageCache interface { // Get returns the cached body for urlStr and true on hit, or // (nil, false) on miss. Implementations MUST treat any backing- // store error as a miss (best-effort, never fail the caller). Get(ctx context.Context, urlStr string) ([]byte, bool) // Set writes body under the slot for urlStr with the supplied TTL. // Implementations MUST swallow backing-store errors (best-effort // caching is correct: a write failure should not propagate to the // agent loop). Set(ctx context.Context, urlStr string, body []byte, ttl time.Duration) } // PageCacheTTL is the default TTL applied by tools that consult a // PageCache. Mirrors the legacy `query.pageCacheTTL` constant // (1 hour) so a `.query`-warmed slot reads back from a `.skill query` // (and vice versa) within the same window. // // Tools that want a different TTL pass an explicit value to // PageCache.Set; this constant is the project default the v11 / v-research // tools all use. const PageCacheTTL = 1 * time.Hour // PageExtractor is the narrow surface read_page needs at execute // time. The production adapter wraps mort's existing extractor // (Ollama web_fetch first, chromedp fallback on JS-heavy pages). // // nil-safe: a tool constructed with a nil PageExtractor surfaces // "not configured" at first call. // // Why: read_page used to be a thin io.ReadAll over the URL — it // missed JS rendering, didn't honour the v6 page cache, and could // not surface the underlying provider name. v11 routes through this // interface so the production wiring (mort.go) can plug in the // existing query-side extractor without exposing query.Agent. type PageExtractor interface { // ExtractPage fetches and extracts readable text from urlStr. // Returns the extracted body, a final URL (after any redirects // the extractor followed), the provider name ("ollama" | // "chromedp" | "ytdlp"), and an error. // // The returned body is the FULL extracted text — callers apply // the v10 byte-vs-reference cap before surfacing to the agent. // // bypassCache=true skips any page cache and forces a fresh // extraction. Default false. ExtractPage(ctx context.Context, urlStr string, bypassCache bool) (text string, finalURL string, provider string, err error) } // VideoTranscriber is the narrow surface read_video needs at // execute time. Production wiring wraps internal/ytdlp. // // nil-safe: tool surfaces "not configured" at first call. // // Why a separate interface from PageExtractor: video is a different // shape (transcript + metadata) and a different binary (yt-dlp). // Keeping them distinct lets tests stub each independently. type VideoTranscriber interface { // ExtractVideoTranscript returns the transcript text and the // best-effort metadata (title, duration in seconds, channel). // Implementations MUST return a non-empty transcript or an // error — empty-transcript success is interpreted by the tool // as a "transcript_unavailable" failure. ExtractVideoTranscript(ctx context.Context, urlStr string) (transcript string, meta VideoMeta, err error) } // VideoMeta is best-effort metadata returned alongside a video // transcript. Any field may be empty/zero if the implementation // could not extract it. type VideoMeta struct { Title string Channel string DurationSeconds int } // PDFFetcher is the narrow surface read_pdf needs at execute time. // Production wiring uses an HTTP-aware fetcher that HEAD-validates // content-type before downloading the body. // // nil-safe: tool surfaces "not configured" at first call. // // Why: a tool that just embedded PDF extraction would couple // fetching + parsing. Splitting the fetch (allowlist + SSRF + // HEAD check) from the extract (page-level parsing) keeps each // step testable and lets the same fetcher serve verify_url one // day if we want a PDF-aware fast path. type PDFFetcher interface { // FetchPDF downloads the PDF at urlStr (after HEAD-validating // content-type) and returns the raw bytes plus the final URL. // HEAD-validation rejects a URL whose Content-Type is not a // PDF mime AND whose path does not end in .pdf. FetchPDF(ctx context.Context, urlStr string) (body []byte, finalURL string, err error) } // PDFExtractor parses PDF bytes into plain text + page count. // Production wires internal.ExtractPDFText. // // Why split from PDFFetcher: tests want to vary the fetch (mock // server returning bytes) without rebuilding the extractor. type PDFExtractor interface { // ExtractPDFText returns the concatenated plain-text content // of the PDF along with the page count. The caller applies any // per-page cap and the v10 byte-vs-reference cap on the result. ExtractPDFText(ctx context.Context, body []byte, maxPages int) (text string, pageCount int, truncated bool, err error) } // HEADChecker is the narrow surface verify_url needs at execute // time. Production wiring uses the same SSRF-pinned transport as // http_get so the security envelope is consistent. // // Why a separate interface (vs reusing HTTPConfigProvider+doHTTP): // verify_url's contract is simpler — HEAD only, no body bytes // returned, and the agent only cares about reachable / status / // final URL / content-type. A bespoke surface lets the production // adapter optimise for that path (no body buffer, no body close). type HEADChecker interface { // HEAD performs a HEAD request against urlStr (with SSRF + // allowlist enforcement) and returns the final URL after any // redirects, the HTTP status code, and the Content-Type header. // Returns reachable=false with a non-nil err for transport // failures (DNS, TCP, allowlist rejection); reachable=true with // any HTTP status (including 4xx/5xx) is the success shape — // the agent decides whether the URL is "real". HEAD(ctx context.Context, urlStr string) (finalURL string, status int, contentType string, reachable bool, err error) } // CitationStorage is the narrow surface cite() needs at execute // time. Production wires *skills.System.Storage(); tests stub. // // nil-safe: tool surfaces "not configured" at first call. // // Why a narrow interface (vs importing pkg/logic/skills): same // cycle constraint as KVStorage / FileStorage. Production adapter // in mort.go bridges to skills.Storage's RecordCitation / // ListCitations methods AND a separate URL-history tracker. // // Two responsibilities, deliberately separate: // // 1. RecordCitation writes a row into skill_run_sources — this is // the user-visible citations table for the Sources panel and // CSV export. ONLY rows the agent successfully cited via // cite() land here. // 2. RecordURLTouch / GetTouchedURLs maintains a per-run set of // URLs the agent has interacted with (web_search results, // read_page input, read_pdf input, read_video input). cite() // reads this set to reject claims for URLs the agent never // touched. This set lives in a different table or scope from // the citations table — it's working state, not a record. type CitationStorage interface { // RecordCitation appends one (run_id, url, claim, cited_at) // row to the citations table (skill_run_sources). cited_at is // set by the storage layer to time.Now() when zero. The caller // has already verified the URL is in the touched-URL set // (via GetTouchedURLs); this method is the persistence step. RecordCitation(ctx context.Context, runID, url, claim string) error // RecordURLTouch records that the agent has interacted with // `url` during `runID`. Called by web_search (per result), // read_page, read_pdf, and read_video. Idempotent — repeat // calls for the same (run_id, url) are no-ops at the storage // layer. RecordURLTouch(ctx context.Context, runID, url string) error // GetTouchedURLs returns the set of URLs the run has // interacted with. Used by cite() to verify that a claim's // URL is one the agent actually visited. Empty for a fresh // run — cite() then rejects every claim with // "url_not_in_run_history". GetTouchedURLs(ctx context.Context, runID string) (map[string]struct{}, error) // ListCitations returns all citations recorded for the run, in // insertion order. Powers the /skills/{id}/runs/{run_id} // Sources panel. ListCitations(ctx context.Context, runID string) ([]CitationRow, error) } // CitationRow mirrors the skill_run_sources row shape. Fields // match the spec: run_id is implicit in the query, url + claim are // what the agent submitted, cited_at is the wall-clock timestamp // at insert. type CitationRow struct { URL string Claim string CitedAt int64 // unix-seconds; storage adapter normalises from time.Time } // CurrentTimeProvider exposes a "now" + per-user timezone lookup. // Production wiring closes over the bot's member-config getter. // // nil-safe: a tool constructed with a nil provider falls back to // server-time + UTC (current behaviour of NewNow before v11). type CurrentTimeProvider interface { // UserTimezone returns the IANA timezone name configured for // the given Discord member ID, or "" when the member has no // timezone configured. Empty fallback is "UTC". UserTimezone(ctx context.Context, memberID string) string } // SearchBudget is the narrow surface web_search reads at execute // time to honour skills.web_search.max_per_run. // // Production wiring closes over a per-run counter held by the // executor. nil-safe: tool falls back to a built-in package // counter (process-wide, NOT per-run) — useful for tests but NOT // production-correct because budget bleeds across runs. The // production adapter MUST be wired. type SearchBudget interface { // CheckAndIncrement returns the current count AFTER incrementing // for the given runID, the configured max, and an error when // the call would exceed the cap. The handler returns a clean // "search_budget_exceeded" string on exceed (not an error so // the agent can react). CheckAndIncrement(ctx context.Context, runID, kind string) (count, max int, exceeded bool) } // ResearchConfig is the narrow surface that read_page / read_video / // read_pdf / verify_url read at execute time for per-tool budget caps // and inline-vs-file_id thresholds. Production wiring closes over // the relevant convars. // // nil-safe: tools fall back to package defaults. type ResearchConfig interface { // MaxInlineBytes returns the cap above which extracted text is // persisted as a file_id under run-scope (v10 byte-vs-reference // principle). Default 12 KiB. MaxInlineBytes(ctx context.Context) int // PDFMaxPages returns the cap on pages extracted from a PDF // before truncation. Default 50. PDFMaxPages(ctx context.Context) int // WebSearchEnabled is the master switch for web_search. WebSearchEnabled(ctx context.Context) bool // WebSearchMaxPerRun is the per-run search cap. WebSearchMaxPerRun(ctx context.Context) int // ReadPageMaxPerRun is the per-run page-read cap. ReadPageMaxPerRun(ctx context.Context) int // VideoMaxPerRun is the per-run video-read cap. VideoMaxPerRun(ctx context.Context) int // VerifyURLMaxPerRun is the per-run HEAD-check cap. VerifyURLMaxPerRun(ctx context.Context) int // ReadPDFMaxPerRun is the per-run PDF-read cap. ReadPDFMaxPerRun(ctx context.Context) int // HTTPGetMaxPerRun (v15.2) is the per-run http_get cap. The agent // otherwise can retry-storm through random URLs and bloat its own // context with each tool result. Default 20. HTTPGetMaxPerRun(ctx context.Context) int // HTTPPostMaxPerRun (v15.2) is the per-run http_post cap. Default 20. HTTPPostMaxPerRun(ctx context.Context) int // WebSearchAugmentThreshold is the minimum number of primary // (Ollama) results required to skip the secondary (DDG/Brave) // search. When the primary backend returns fewer than this many // results, the augmented searcher also queries the secondary and // merges both result sets. Default 5. WebSearchAugmentThreshold(ctx context.Context) int // ReplyChainDepthMax is unused here; placeholder shape for // future per-tool caps. Kept off this interface — callers reach // into the convar reader directly when they need it. } // ErrPageExtractionFailed is the sentinel returned by a PageExtractor // when both Ollama and chromedp paths produce empty content. var ErrPageExtractionFailed = errors.New("page extraction failed: empty content") // ErrVideoTranscriptUnavailable is the sentinel returned by a // VideoTranscriber when no captions / transcript could be obtained. var ErrVideoTranscriptUnavailable = errors.New("video transcript unavailable") // ErrPDFNotPDF is the sentinel returned by a PDFFetcher when the // HEAD response indicates a non-PDF content-type AND the URL path // has no .pdf extension. Surfaces a clean "url_is_not_a_pdf" // rejection rather than a generic transport error. var ErrPDFNotPDF = errors.New("url does not serve a PDF") // ErrPDFEncrypted is returned by a PDFExtractor when the PDF refuses // extraction because it is password-protected. Surfaces a clean // "pdf_encrypted" rejection. var ErrPDFEncrypted = errors.New("pdf is encrypted")