Files
executus/tools/file_save.go
T
steve ac961e1539
executus CI / test (pull_request) Successful in 58s
Adversarial Review (Gadfly) / review (pull_request) Successful in 10m10s
P3: store group — kv_* + file_* tools (agent memory)
RegisterStore(reg, StoreDeps) registers the persistent-memory tools over the
host's KV and/or File backends:
- kv_get/set/list/delete (KVStorage seam)
- file_save/get/get_text/get_metadata/list/delete (FileStorage seam), plus
  file_search (FileSearcher) and create_file_url (FileTokenMinter) when wired.

Near-zero-config: Quota defaults to a generous static cap (staticQuota), the
per-value/per-file caps default, and the kv vs file groups register
independently (a host can take just one). Seams moved clean (interface-only):
kv_storage.go, quota_provider.go, file_descendant_grant.go. The default
in-memory KV/File backends come with contrib/store at P4.

Core go.sum still free of gorm/redis/discordgo/sqlite.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 22:06:46 -04:00

172 lines
6.4 KiB
Go

// file_save persists arbitrary bytes (base64-encoded by the caller)
// against a (scope, name) tuple within the calling skill's namespace.
// Returns the new file_id, the SHA256 content hash, and the size.
//
// Why base64 over raw bytes: the LLM's tool-call wire format is JSON,
// which can't carry arbitrary bytes natively. Base64 round-trips
// cleanly through the schema.
//
// Why hash + size in the response: agents commonly want to dedup
// across runs (same hash = same content) or build a manifest. Reporting
// these inline saves an immediate file_get round-trip just to compute
// them.
//
// Per-file cap: maxFileBytes (constructor arg) enforces an upper bound
// on individual file size. 0 falls back to defaultFileMaxBytes (10 MB).
//
// Per-skill quota (sum across all files): the constructor's QuotaProvider
// arg drives the v4 Phase 4 enforcement. nil disables enforcement
// (useful for tests and admin-only deployments). The check is:
//
// used := storage.FileUsageBytes(skill)
// if used + len(new content) > filesMax → quota_exceeded
//
// Note we do NOT subtract a "prior" value here the way kv_set does:
// file_save always inserts a new file row (content-addressable dedup
// is at the blob layer, not the row layer), so every save is additive
// to FileUsageBytes.
package tools
import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/google/uuid"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
const defaultFileMaxBytes = 10 * 1024 * 1024 // 10 MiB
type fileSaveArgs struct {
Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:<your_id>' (per-caller), or 'run:<run_id>' (this run's scratchpad)."`
Name string `json:"name" description:"Filename including extension. Used for display only — the file is identified by an opaque file_id."`
ContentBase64 string `json:"content_base64" description:"Base64-encoded file content."`
Mime string `json:"mime,omitempty" description:"Optional MIME type. If omitted, detected from the first 512 bytes of content."`
}
type fileSaveResult struct {
FileID string `json:"file_id"`
Hash string `json:"hash"`
SizeBytes int64 `json:"size_bytes"`
}
// NewFileSave constructs the file_save tool.
//
// storage nil → "not configured" at execute time.
// maxFileBytes <= 0 falls back to defaultFileMaxBytes (10 MiB).
// quota nil → per-skill quota check skipped (per-file cap still applies).
//
// Permission: anyone may author; safe for share. Scope check at handler
// entry prevents cross-user writes; per-user buckets are isolated by
// inv.CallerID.
func NewFileSave(storage FileStorage, quota QuotaProvider, maxFileBytes int) tool.Tool {
if maxFileBytes <= 0 {
maxFileBytes = defaultFileMaxBytes
}
return tool.NewGatedTool[fileSaveArgs](
"file_save",
"Save base64-encoded bytes against a (scope, name) tuple. Returns file_id (opaque), SHA256 hash, and size_bytes. Content is dedup'd by hash — multiple file_save calls with identical bytes share storage. NOTE: for files produced inside code_exec, do NOT hand-encode base64 here (it corrupts) — write them to /workspace/ in the code_exec call and use the files_out file_id it returns.",
tool.Permission{
AuthoringRequirement: tool.RequirementAnyone,
OperatesOn: tool.ScopeCaller,
SafeForShare: true,
Categories: []string{"storage", "write"},
},
func(ctx context.Context, inv tool.Invocation, args fileSaveArgs) (string, error) {
if storage == nil {
return "", fmt.Errorf("file_save: not configured")
}
if err := ValidateScope(inv, args.Scope, false); err != nil {
return "", fmt.Errorf("file_save: %w", err)
}
// root_run is a KV-only scope (v1): file storage partitions
// by the calling skill, so a root_run file would silently be
// invisible to siblings AND escape the run-scope sweeper.
// Reject loudly instead.
if strings.HasPrefix(args.Scope, "root_run:") {
return "", fmt.Errorf("file_save: root_run scope is KV-only; save under run:<run_id> and share the file_id via kv_set in the root_run scope")
}
if args.Name == "" {
return "", fmt.Errorf("file_save: name required")
}
if args.ContentBase64 == "" {
return "", fmt.Errorf("file_save: content_base64 required")
}
// Decode + cap. Decoding twice (once to count, once to
// store) would waste cycles; we decode once and check size
// after.
content, err := base64.StdEncoding.DecodeString(args.ContentBase64)
if err != nil {
return "", fmt.Errorf("file_save: invalid base64: %w", err)
}
if len(content) > maxFileBytes {
return "", fmt.Errorf("file_save: file exceeds max %d bytes (got %d)", maxFileBytes, len(content))
}
// Per-skill quota gate (v4 Phase 4). Skipped when quota is nil
// (tests / admin opt-out) so the per-file cap above is the
// only line of defence in that mode.
if quota != nil {
_, filesMax, err := quota.EffectiveQuota(ctx, inv.SkillID)
if err != nil {
return "", fmt.Errorf("file_save: quota lookup: %w", err)
}
used, err := storage.FileUsageBytes(ctx, inv.SkillID)
if err != nil {
return "", fmt.Errorf("file_save: usage check: %w", err)
}
if used+int64(len(content)) > filesMax {
return "", fmt.Errorf("file_save: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, filesMax)
}
}
// SHA256 for content-addressable dedup at the storage layer.
h := sha256.Sum256(content)
hashHex := hex.EncodeToString(h[:])
mime := args.Mime
if mime == "" {
// http.DetectContentType is documented to read at most
// the first 512 bytes; passing the full slice is fine.
mime = http.DetectContentType(content)
}
meta := FileDomainMeta{
ID: uuid.NewString(),
SkillID: inv.SkillID,
Scope: args.Scope,
Name: args.Name,
ContentHash: hashHex,
MimeType: mime,
SizeBytes: int64(len(content)),
CreatedAt: time.Now(),
}
fileID, err := storage.FileSave(ctx, meta, content)
if err != nil {
return "", fmt.Errorf("file_save: %w", err)
}
res := fileSaveResult{
FileID: fileID,
Hash: hashHex,
SizeBytes: int64(len(content)),
}
b, err := json.Marshal(res)
if err != nil {
return "", fmt.Errorf("file_save: marshal result: %w", err)
}
return string(b), nil
},
)
}