// file_save persists arbitrary bytes (base64-encoded by the caller) // against a (scope, name) tuple within the calling skill's namespace. // Returns the new file_id, the SHA256 content hash, and the size. // // Why base64 over raw bytes: the LLM's tool-call wire format is JSON, // which can't carry arbitrary bytes natively. Base64 round-trips // cleanly through the schema. // // Why hash + size in the response: agents commonly want to dedup // across runs (same hash = same content) or build a manifest. Reporting // these inline saves an immediate file_get round-trip just to compute // them. // // Per-file cap: maxFileBytes (constructor arg) enforces an upper bound // on individual file size. 0 falls back to defaultFileMaxBytes (10 MB). // // Per-skill quota (sum across all files): the constructor's QuotaProvider // arg drives the v4 Phase 4 enforcement. nil disables enforcement // (useful for tests and admin-only deployments). The check is: // // used := storage.FileUsageBytes(skill) // if used + len(new content) > filesMax → quota_exceeded // // Note we do NOT subtract a "prior" value here the way kv_set does: // file_save always inserts a new file row (content-addressable dedup // is at the blob layer, not the row layer), so every save is additive // to FileUsageBytes. package tools import ( "context" "crypto/sha256" "encoding/base64" "encoding/hex" "encoding/json" "fmt" "net/http" "strings" "time" "github.com/google/uuid" "gitea.stevedudenhoeffer.com/steve/executus/tool" ) const defaultFileMaxBytes = 16 * 1024 * 1024 // 10 MiB type fileSaveArgs struct { Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:' (per-caller), or 'run:' (this run's scratchpad)."` Name string `json:"name" description:"Filename including extension. Used for display only — the file is identified by an opaque file_id."` ContentBase64 string `json:"content_base64" description:"Base64-encoded file content."` Mime string `json:"mime,omitempty" description:"Optional MIME type. If omitted, detected from the first 512 bytes of content."` } type fileSaveResult struct { FileID string `json:"file_id"` Hash string `json:"hash"` SizeBytes int64 `json:"size_bytes"` } // NewFileSave constructs the file_save tool. // // storage nil → "not configured" at execute time. // maxFileBytes <= 0 falls back to defaultFileMaxBytes (10 MiB). // quota nil → per-skill quota check skipped (per-file cap still applies). // // Permission: anyone may author; safe for share. Scope check at handler // entry prevents cross-user writes; per-user buckets are isolated by // inv.CallerID. func NewFileSave(storage FileStorage, quota QuotaProvider, maxFileBytes int) tool.Tool { if maxFileBytes <= 0 { maxFileBytes = defaultFileMaxBytes } return tool.NewGatedTool[fileSaveArgs]( "file_save", "Save base64-encoded bytes against a (scope, name) tuple. Returns file_id (opaque), SHA256 hash, and size_bytes. Content is dedup'd by hash — multiple file_save calls with identical bytes share storage. NOTE: for files produced inside code_exec, do NOT hand-encode base64 here (it corrupts) — write them to /workspace/ in the code_exec call and use the files_out file_id it returns.", tool.Permission{ AuthoringRequirement: tool.RequirementAnyone, OperatesOn: tool.ScopeCaller, SafeForShare: true, Categories: []string{"storage", "write"}, }, func(ctx context.Context, inv tool.Invocation, args fileSaveArgs) (string, error) { if storage == nil { return "", fmt.Errorf("file_save: not configured") } if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil { return "", fmt.Errorf("file_save: %w", err) } // root_run is a KV-only scope (v1): file storage partitions // by the calling skill, so a root_run file would silently be // invisible to siblings AND escape the run-scope sweeper. // Reject loudly instead. if strings.HasPrefix(args.Scope, "root_run:") { return "", fmt.Errorf("file_save: root_run scope is KV-only; save under run: and share the file_id via kv_set in the root_run scope") } if args.Name == "" { return "", fmt.Errorf("file_save: name required") } if args.ContentBase64 == "" { return "", fmt.Errorf("file_save: content_base64 required") } // Decode + cap. Decoding twice (once to count, once to // store) would waste cycles; we decode once and check size // after. content, err := base64.StdEncoding.DecodeString(args.ContentBase64) if err != nil { return "", fmt.Errorf("file_save: invalid base64: %w", err) } if len(content) > maxFileBytes { return "", fmt.Errorf("file_save: file exceeds max %d bytes (got %d)", maxFileBytes, len(content)) } // Per-skill quota gate (v4 Phase 4). Skipped when quota is nil // (tests / admin opt-out) so the per-file cap above is the // only line of defence in that mode. if quota != nil { _, filesMax, err := quota.EffectiveQuota(ctx, inv.SkillID) if err != nil { return "", fmt.Errorf("file_save: quota lookup: %w", err) } used, err := storage.FileUsageBytes(ctx, inv.SkillID) if err != nil { return "", fmt.Errorf("file_save: usage check: %w", err) } if used+int64(len(content)) > filesMax { return "", fmt.Errorf("file_save: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, filesMax) } } // SHA256 for content-addressable dedup at the storage layer. h := sha256.Sum256(content) hashHex := hex.EncodeToString(h[:]) mime := args.Mime if mime == "" { // http.DetectContentType is documented to read at most // the first 512 bytes; passing the full slice is fine. mime = http.DetectContentType(content) } meta := FileDomainMeta{ ID: uuid.NewString(), SkillID: inv.SkillID, Scope: args.Scope, Name: args.Name, ContentHash: hashHex, MimeType: mime, SizeBytes: int64(len(content)), CreatedAt: time.Now(), } fileID, err := storage.FileSave(ctx, meta, content) if err != nil { return "", fmt.Errorf("file_save: %w", err) } res := fileSaveResult{ FileID: fileID, Hash: hashHex, SizeBytes: int64(len(content)), } b, err := json.Marshal(res) if err != nil { return "", fmt.Errorf("file_save: marshal result: %w", err) } return string(b), nil }, ) }