Files
executus/tools/file_save.go
T
steve d0bd3ec3d9
executus CI / test (push) Has been cancelled
fix: address verified gadfly P3 review (3-cloud fleet)
All 3 cloud models converged on a real access-control bug; fixed it + the
other genuine findings (the false-positives were dropped):

Security (HIGH — all 3 models):
- create_file_url skipped ValidateScope: a same-skill caller could mint a
  PUBLIC url for a file scoped to another user/run. Now runs ValidateScope
  (admin-aware), skipped only for the descendant-grant case — mirroring the
  read tools.

Other real fixes:
- ValidateScope hard-coded `false` at every call site (admin branch dead) ->
  pass inv.CallerIsAdmin (the executor sets it via the host AdminPolicy; still
  false/fail-closed when no admin). Stale "no admin flag" comment corrected.
- create_file_url: ExpiresInSeconds clamped BEFORE the *time.Second multiply
  (huge values overflowed to a negative duration that slipped under the cap,
  minting already-expired tokens); swallowed json.Marshal error now returned.
- RegisterMeta: build the default budget WITH the configured MaxPerRun (was
  NewInMemorySearchBudget(nil) -> hardcoded 10, ignoring MetaDeps.MaxPerRun).
- classify: all-zero scores no longer return a false-positive top-1 winner;
  coerceClassifyScore uses strconv.ParseFloat (rejects trailing garbage like
  "50extra" that fmt.Sscanf silently accepted).
- file_delete: honor the descendant grant (parent can clean up a worker's
  artifacts) — was the lone cross-skill-reject-outright file tool.
- meta tools: input caps truncate at a UTF-8 rune boundary (truncateUTF8), not
  mid-rune.
- think: removed the dead `var _ = fmt.Errorf` import-keeper; file_save default
  aligned to 16 MiB (matched RegisterStore).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 00:11:54 -04:00

172 lines
6.4 KiB
Go

// file_save persists arbitrary bytes (base64-encoded by the caller)
// against a (scope, name) tuple within the calling skill's namespace.
// Returns the new file_id, the SHA256 content hash, and the size.
//
// Why base64 over raw bytes: the LLM's tool-call wire format is JSON,
// which can't carry arbitrary bytes natively. Base64 round-trips
// cleanly through the schema.
//
// Why hash + size in the response: agents commonly want to dedup
// across runs (same hash = same content) or build a manifest. Reporting
// these inline saves an immediate file_get round-trip just to compute
// them.
//
// Per-file cap: maxFileBytes (constructor arg) enforces an upper bound
// on individual file size. 0 falls back to defaultFileMaxBytes (10 MB).
//
// Per-skill quota (sum across all files): the constructor's QuotaProvider
// arg drives the v4 Phase 4 enforcement. nil disables enforcement
// (useful for tests and admin-only deployments). The check is:
//
// used := storage.FileUsageBytes(skill)
// if used + len(new content) > filesMax → quota_exceeded
//
// Note we do NOT subtract a "prior" value here the way kv_set does:
// file_save always inserts a new file row (content-addressable dedup
// is at the blob layer, not the row layer), so every save is additive
// to FileUsageBytes.
package tools
import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/google/uuid"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
const defaultFileMaxBytes = 16 * 1024 * 1024 // 10 MiB
type fileSaveArgs struct {
Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:<your_id>' (per-caller), or 'run:<run_id>' (this run's scratchpad)."`
Name string `json:"name" description:"Filename including extension. Used for display only — the file is identified by an opaque file_id."`
ContentBase64 string `json:"content_base64" description:"Base64-encoded file content."`
Mime string `json:"mime,omitempty" description:"Optional MIME type. If omitted, detected from the first 512 bytes of content."`
}
type fileSaveResult struct {
FileID string `json:"file_id"`
Hash string `json:"hash"`
SizeBytes int64 `json:"size_bytes"`
}
// NewFileSave constructs the file_save tool.
//
// storage nil → "not configured" at execute time.
// maxFileBytes <= 0 falls back to defaultFileMaxBytes (10 MiB).
// quota nil → per-skill quota check skipped (per-file cap still applies).
//
// Permission: anyone may author; safe for share. Scope check at handler
// entry prevents cross-user writes; per-user buckets are isolated by
// inv.CallerID.
func NewFileSave(storage FileStorage, quota QuotaProvider, maxFileBytes int) tool.Tool {
if maxFileBytes <= 0 {
maxFileBytes = defaultFileMaxBytes
}
return tool.NewGatedTool[fileSaveArgs](
"file_save",
"Save base64-encoded bytes against a (scope, name) tuple. Returns file_id (opaque), SHA256 hash, and size_bytes. Content is dedup'd by hash — multiple file_save calls with identical bytes share storage. NOTE: for files produced inside code_exec, do NOT hand-encode base64 here (it corrupts) — write them to /workspace/ in the code_exec call and use the files_out file_id it returns.",
tool.Permission{
AuthoringRequirement: tool.RequirementAnyone,
OperatesOn: tool.ScopeCaller,
SafeForShare: true,
Categories: []string{"storage", "write"},
},
func(ctx context.Context, inv tool.Invocation, args fileSaveArgs) (string, error) {
if storage == nil {
return "", fmt.Errorf("file_save: not configured")
}
if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil {
return "", fmt.Errorf("file_save: %w", err)
}
// root_run is a KV-only scope (v1): file storage partitions
// by the calling skill, so a root_run file would silently be
// invisible to siblings AND escape the run-scope sweeper.
// Reject loudly instead.
if strings.HasPrefix(args.Scope, "root_run:") {
return "", fmt.Errorf("file_save: root_run scope is KV-only; save under run:<run_id> and share the file_id via kv_set in the root_run scope")
}
if args.Name == "" {
return "", fmt.Errorf("file_save: name required")
}
if args.ContentBase64 == "" {
return "", fmt.Errorf("file_save: content_base64 required")
}
// Decode + cap. Decoding twice (once to count, once to
// store) would waste cycles; we decode once and check size
// after.
content, err := base64.StdEncoding.DecodeString(args.ContentBase64)
if err != nil {
return "", fmt.Errorf("file_save: invalid base64: %w", err)
}
if len(content) > maxFileBytes {
return "", fmt.Errorf("file_save: file exceeds max %d bytes (got %d)", maxFileBytes, len(content))
}
// Per-skill quota gate (v4 Phase 4). Skipped when quota is nil
// (tests / admin opt-out) so the per-file cap above is the
// only line of defence in that mode.
if quota != nil {
_, filesMax, err := quota.EffectiveQuota(ctx, inv.SkillID)
if err != nil {
return "", fmt.Errorf("file_save: quota lookup: %w", err)
}
used, err := storage.FileUsageBytes(ctx, inv.SkillID)
if err != nil {
return "", fmt.Errorf("file_save: usage check: %w", err)
}
if used+int64(len(content)) > filesMax {
return "", fmt.Errorf("file_save: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, filesMax)
}
}
// SHA256 for content-addressable dedup at the storage layer.
h := sha256.Sum256(content)
hashHex := hex.EncodeToString(h[:])
mime := args.Mime
if mime == "" {
// http.DetectContentType is documented to read at most
// the first 512 bytes; passing the full slice is fine.
mime = http.DetectContentType(content)
}
meta := FileDomainMeta{
ID: uuid.NewString(),
SkillID: inv.SkillID,
Scope: args.Scope,
Name: args.Name,
ContentHash: hashHex,
MimeType: mime,
SizeBytes: int64(len(content)),
CreatedAt: time.Now(),
}
fileID, err := storage.FileSave(ctx, meta, content)
if err != nil {
return "", fmt.Errorf("file_save: %w", err)
}
res := fileSaveResult{
FileID: fileID,
Hash: hashHex,
SizeBytes: int64(len(content)),
}
b, err := json.Marshal(res)
if err != nil {
return "", fmt.Errorf("file_save: marshal result: %w", err)
}
return string(b), nil
},
)
}