Files
executus/tools/file_get_text.go
steve d0bd3ec3d9
executus CI / test (push) Has been cancelled
fix: address verified gadfly P3 review (3-cloud fleet)
All 3 cloud models converged on a real access-control bug; fixed it + the
other genuine findings (the false-positives were dropped):

Security (HIGH — all 3 models):
- create_file_url skipped ValidateScope: a same-skill caller could mint a
  PUBLIC url for a file scoped to another user/run. Now runs ValidateScope
  (admin-aware), skipped only for the descendant-grant case — mirroring the
  read tools.

Other real fixes:
- ValidateScope hard-coded `false` at every call site (admin branch dead) ->
  pass inv.CallerIsAdmin (the executor sets it via the host AdminPolicy; still
  false/fail-closed when no admin). Stale "no admin flag" comment corrected.
- create_file_url: ExpiresInSeconds clamped BEFORE the *time.Second multiply
  (huge values overflowed to a negative duration that slipped under the cap,
  minting already-expired tokens); swallowed json.Marshal error now returned.
- RegisterMeta: build the default budget WITH the configured MaxPerRun (was
  NewInMemorySearchBudget(nil) -> hardcoded 10, ignoring MetaDeps.MaxPerRun).
- classify: all-zero scores no longer return a false-positive top-1 winner;
  coerceClassifyScore uses strconv.ParseFloat (rejects trailing garbage like
  "50extra" that fmt.Sscanf silently accepted).
- file_delete: honor the descendant grant (parent can clean up a worker's
  artifacts) — was the lone cross-skill-reject-outright file tool.
- meta tools: input caps truncate at a UTF-8 rune boundary (truncateUTF8), not
  mid-rune.
- think: removed the dead `var _ = fmt.Errorf` import-keeper; file_save default
  aligned to 16 MiB (matched RegisterStore).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 00:11:54 -04:00

120 lines
4.2 KiB
Go

// file_get_text fetches a saved text file's content as plain text.
// Only succeeds for text/* MIMEs; binary MIMEs return an error so the
// agent knows to use a different path (file_get_metadata for
// reasoning, send_attachments for delivery).
//
// Why a 64 KiB cap: the v10 byte-vs-reference principle says inline
// text content stays under ~10KB ideally; we set the hard cap at 64
// KiB to handle reasonable text artifacts (logs, configs, small
// reports) without blowing the agent's context. Files larger than
// the cap return an error pointing to send_attachments.
//
// Why a separate tool (vs file_get): file_get returns base64 +
// metadata regardless of MIME, which agents misuse to dump 10MB PDFs
// into the context window. file_get_text is the agent-friendly
// alternative that explicitly fails fast on binary content.
package tools
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
const fileGetTextMaxBytes = 64 * 1024
type fileGetTextArgs struct {
FileID string `json:"file_id" description:"Opaque file ID returned by file_save or file_list."`
}
type fileGetTextResult struct {
Text string `json:"text"`
Mime string `json:"mime"`
SizeBytes int64 `json:"size_bytes"`
CreatedAt string `json:"created_at"` // RFC3339
}
// NewFileGetText constructs the file_get_text tool. storage nil →
// "not configured" at execute time.
func NewFileGetText(storage FileStorage) tool.Tool {
return tool.NewGatedTool[fileGetTextArgs](
"file_get_text",
"Fetch a saved text file's content (text/* MIMEs only, capped at 64KB). For binary content use file_get_metadata + send_attachments. Errors with 'not_text' for non-text MIMEs and 'too_large' for files > 64KB.",
tool.Permission{
AuthoringRequirement: tool.RequirementAnyone,
OperatesOn: tool.ScopeCaller,
SafeForShare: true,
Categories: []string{"storage", "read"},
},
func(ctx context.Context, inv tool.Invocation, args fileGetTextArgs) (string, error) {
if storage == nil {
return "", fmt.Errorf("file_get_text: not configured")
}
if args.FileID == "" {
return "", fmt.Errorf("file_get_text: file_id required")
}
meta, content, err := storage.FileGet(ctx, args.FileID)
if err != nil {
if errors.Is(err, ErrFileNotFound) {
return "", fmt.Errorf("file_get_text: not found")
}
return "", fmt.Errorf("file_get_text: %w", err)
}
// Descendant grant: a worker this run (transitively)
// dispatched may have produced the file — its scope is the
// WORKER's run, so the grant also stands in for the scope
// check below.
grantedViaDescendant := false
if meta.SkillID != inv.SkillID {
if !descendantFileGrant(ctx, storage, inv, meta.SkillID) {
return "", fmt.Errorf("file_get_text: file does not belong to this skill")
}
grantedViaDescendant = true
}
if !grantedViaDescendant {
if err := ValidateScope(inv, meta.Scope, inv.CallerIsAdmin); err != nil {
return "", fmt.Errorf("file_get_text: %w", err)
}
}
if !isTextMime(meta.MimeType) {
return "", fmt.Errorf("file_get_text: not_text: mime %q is not text/*", meta.MimeType)
}
if int64(len(content)) > fileGetTextMaxBytes {
return "", fmt.Errorf("file_get_text: too_large: %d bytes exceeds 64KB cap; use send_attachments to deliver this file to Discord", len(content))
}
res := fileGetTextResult{
Text: string(content),
Mime: meta.MimeType,
SizeBytes: meta.SizeBytes,
CreatedAt: meta.CreatedAt.UTC().Format(time.RFC3339),
}
b, err := json.Marshal(res)
if err != nil {
return "", fmt.Errorf("file_get_text: marshal: %w", err)
}
return string(b), nil
},
)
}
// isTextMime reports whether the given MIME is a text/* type.
// Accepts "text/plain", "text/markdown", "text/csv", "application/json"
// and "application/xml" since those are conventionally text.
func isTextMime(mime string) bool {
mime = strings.ToLower(strings.TrimSpace(mime))
if strings.HasPrefix(mime, "text/") {
return true
}
switch mime {
case "application/json", "application/xml", "application/xhtml+xml",
"application/javascript", "application/yaml", "application/x-yaml":
return true
}
return false
}