ee6e9ef9f8
executus CI / test (pull_request) Successful in 59s
All 3 cloud models converged on a real access-control bug; fixed it + the other genuine findings (the false-positives were dropped): Security (HIGH — all 3 models): - create_file_url skipped ValidateScope: a same-skill caller could mint a PUBLIC url for a file scoped to another user/run. Now runs ValidateScope (admin-aware), skipped only for the descendant-grant case — mirroring the read tools. Other real fixes: - ValidateScope hard-coded `false` at every call site (admin branch dead) -> pass inv.CallerIsAdmin (the executor sets it via the host AdminPolicy; still false/fail-closed when no admin). Stale "no admin flag" comment corrected. - create_file_url: ExpiresInSeconds clamped BEFORE the *time.Second multiply (huge values overflowed to a negative duration that slipped under the cap, minting already-expired tokens); swallowed json.Marshal error now returned. - RegisterMeta: build the default budget WITH the configured MaxPerRun (was NewInMemorySearchBudget(nil) -> hardcoded 10, ignoring MetaDeps.MaxPerRun). - classify: all-zero scores no longer return a false-positive top-1 winner; coerceClassifyScore uses strconv.ParseFloat (rejects trailing garbage like "50extra" that fmt.Sscanf silently accepted). - file_delete: honor the descendant grant (parent can clean up a worker's artifacts) — was the lone cross-skill-reject-outright file tool. - meta tools: input caps truncate at a UTF-8 rune boundary (truncateUTF8), not mid-rune. - think: removed the dead `var _ = fmt.Errorf` import-keeper; file_save default aligned to 16 MiB (matched RegisterStore). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
172 lines
6.4 KiB
Go
172 lines
6.4 KiB
Go
// file_save persists arbitrary bytes (base64-encoded by the caller)
|
|
// against a (scope, name) tuple within the calling skill's namespace.
|
|
// Returns the new file_id, the SHA256 content hash, and the size.
|
|
//
|
|
// Why base64 over raw bytes: the LLM's tool-call wire format is JSON,
|
|
// which can't carry arbitrary bytes natively. Base64 round-trips
|
|
// cleanly through the schema.
|
|
//
|
|
// Why hash + size in the response: agents commonly want to dedup
|
|
// across runs (same hash = same content) or build a manifest. Reporting
|
|
// these inline saves an immediate file_get round-trip just to compute
|
|
// them.
|
|
//
|
|
// Per-file cap: maxFileBytes (constructor arg) enforces an upper bound
|
|
// on individual file size. 0 falls back to defaultFileMaxBytes (10 MB).
|
|
//
|
|
// Per-skill quota (sum across all files): the constructor's QuotaProvider
|
|
// arg drives the v4 Phase 4 enforcement. nil disables enforcement
|
|
// (useful for tests and admin-only deployments). The check is:
|
|
//
|
|
// used := storage.FileUsageBytes(skill)
|
|
// if used + len(new content) > filesMax → quota_exceeded
|
|
//
|
|
// Note we do NOT subtract a "prior" value here the way kv_set does:
|
|
// file_save always inserts a new file row (content-addressable dedup
|
|
// is at the blob layer, not the row layer), so every save is additive
|
|
// to FileUsageBytes.
|
|
package tools
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
const defaultFileMaxBytes = 16 * 1024 * 1024 // 10 MiB
|
|
|
|
type fileSaveArgs struct {
|
|
Scope string `json:"scope" description:"Storage scope: 'skill' (shared across all callers of this skill), 'user:<your_id>' (per-caller), or 'run:<run_id>' (this run's scratchpad)."`
|
|
Name string `json:"name" description:"Filename including extension. Used for display only — the file is identified by an opaque file_id."`
|
|
ContentBase64 string `json:"content_base64" description:"Base64-encoded file content."`
|
|
Mime string `json:"mime,omitempty" description:"Optional MIME type. If omitted, detected from the first 512 bytes of content."`
|
|
}
|
|
|
|
type fileSaveResult struct {
|
|
FileID string `json:"file_id"`
|
|
Hash string `json:"hash"`
|
|
SizeBytes int64 `json:"size_bytes"`
|
|
}
|
|
|
|
// NewFileSave constructs the file_save tool.
|
|
//
|
|
// storage nil → "not configured" at execute time.
|
|
// maxFileBytes <= 0 falls back to defaultFileMaxBytes (10 MiB).
|
|
// quota nil → per-skill quota check skipped (per-file cap still applies).
|
|
//
|
|
// Permission: anyone may author; safe for share. Scope check at handler
|
|
// entry prevents cross-user writes; per-user buckets are isolated by
|
|
// inv.CallerID.
|
|
func NewFileSave(storage FileStorage, quota QuotaProvider, maxFileBytes int) tool.Tool {
|
|
if maxFileBytes <= 0 {
|
|
maxFileBytes = defaultFileMaxBytes
|
|
}
|
|
return tool.NewGatedTool[fileSaveArgs](
|
|
"file_save",
|
|
"Save base64-encoded bytes against a (scope, name) tuple. Returns file_id (opaque), SHA256 hash, and size_bytes. Content is dedup'd by hash — multiple file_save calls with identical bytes share storage. NOTE: for files produced inside code_exec, do NOT hand-encode base64 here (it corrupts) — write them to /workspace/ in the code_exec call and use the files_out file_id it returns.",
|
|
tool.Permission{
|
|
AuthoringRequirement: tool.RequirementAnyone,
|
|
OperatesOn: tool.ScopeCaller,
|
|
SafeForShare: true,
|
|
Categories: []string{"storage", "write"},
|
|
},
|
|
func(ctx context.Context, inv tool.Invocation, args fileSaveArgs) (string, error) {
|
|
if storage == nil {
|
|
return "", fmt.Errorf("file_save: not configured")
|
|
}
|
|
if err := ValidateScope(inv, args.Scope, inv.CallerIsAdmin); err != nil {
|
|
return "", fmt.Errorf("file_save: %w", err)
|
|
}
|
|
// root_run is a KV-only scope (v1): file storage partitions
|
|
// by the calling skill, so a root_run file would silently be
|
|
// invisible to siblings AND escape the run-scope sweeper.
|
|
// Reject loudly instead.
|
|
if strings.HasPrefix(args.Scope, "root_run:") {
|
|
return "", fmt.Errorf("file_save: root_run scope is KV-only; save under run:<run_id> and share the file_id via kv_set in the root_run scope")
|
|
}
|
|
if args.Name == "" {
|
|
return "", fmt.Errorf("file_save: name required")
|
|
}
|
|
if args.ContentBase64 == "" {
|
|
return "", fmt.Errorf("file_save: content_base64 required")
|
|
}
|
|
|
|
// Decode + cap. Decoding twice (once to count, once to
|
|
// store) would waste cycles; we decode once and check size
|
|
// after.
|
|
content, err := base64.StdEncoding.DecodeString(args.ContentBase64)
|
|
if err != nil {
|
|
return "", fmt.Errorf("file_save: invalid base64: %w", err)
|
|
}
|
|
if len(content) > maxFileBytes {
|
|
return "", fmt.Errorf("file_save: file exceeds max %d bytes (got %d)", maxFileBytes, len(content))
|
|
}
|
|
|
|
// Per-skill quota gate (v4 Phase 4). Skipped when quota is nil
|
|
// (tests / admin opt-out) so the per-file cap above is the
|
|
// only line of defence in that mode.
|
|
if quota != nil {
|
|
_, filesMax, err := quota.EffectiveQuota(ctx, inv.SkillID)
|
|
if err != nil {
|
|
return "", fmt.Errorf("file_save: quota lookup: %w", err)
|
|
}
|
|
used, err := storage.FileUsageBytes(ctx, inv.SkillID)
|
|
if err != nil {
|
|
return "", fmt.Errorf("file_save: usage check: %w", err)
|
|
}
|
|
if used+int64(len(content)) > filesMax {
|
|
return "", fmt.Errorf("file_save: quota_exceeded — %d/%d bytes used; ask admin for higher quota", used, filesMax)
|
|
}
|
|
}
|
|
|
|
// SHA256 for content-addressable dedup at the storage layer.
|
|
h := sha256.Sum256(content)
|
|
hashHex := hex.EncodeToString(h[:])
|
|
|
|
mime := args.Mime
|
|
if mime == "" {
|
|
// http.DetectContentType is documented to read at most
|
|
// the first 512 bytes; passing the full slice is fine.
|
|
mime = http.DetectContentType(content)
|
|
}
|
|
|
|
meta := FileDomainMeta{
|
|
ID: uuid.NewString(),
|
|
SkillID: inv.SkillID,
|
|
Scope: args.Scope,
|
|
Name: args.Name,
|
|
ContentHash: hashHex,
|
|
MimeType: mime,
|
|
SizeBytes: int64(len(content)),
|
|
CreatedAt: time.Now(),
|
|
}
|
|
|
|
fileID, err := storage.FileSave(ctx, meta, content)
|
|
if err != nil {
|
|
return "", fmt.Errorf("file_save: %w", err)
|
|
}
|
|
|
|
res := fileSaveResult{
|
|
FileID: fileID,
|
|
Hash: hashHex,
|
|
SizeBytes: int64(len(content)),
|
|
}
|
|
b, err := json.Marshal(res)
|
|
if err != nil {
|
|
return "", fmt.Errorf("file_save: marshal result: %w", err)
|
|
}
|
|
return string(b), nil
|
|
},
|
|
)
|
|
}
|