Files
executus/skillpack/pack.go
steve 9bb5d143f7
executus CI / test (pull_request) Successful in 3m30s
fix(skillpack): address review — symlink read, git arg-injection, dup-subscribe, nil panics
Real issues from the PR review:
- security: readTree now skips symlinks (a pack with SKILL.md -> /etc/passwd
  or scripts/x -> ../../.ssh/id_rsa could read host files); covers file and
  dir symlinks, incl. within a git subpath
- security: GitSource rejects url/ref beginning with '-' (git arg injection)
  and clones with '--' separator; --filter=blob:none (blobless partial clone)
  instead of full-history clone
- correctness: Subscribe no longer swallows a non-ErrNotFound store error from
  GetByName (would create a duplicate subscription); handles *GitSource as well
  as GitSource in the URL/subpath extraction
- correctness: pinTo no longer renames a subscription, so Apply can't silently
  collide two subscriptions when an upstream pack changes its name
- validation: isKebab rejects leading/trailing/consecutive hyphens; BOM-
  prefixed SKILL.md now parses (matches the doc comment)
- robustness: Catalog/Activate/renderPackBody/Stage guard nil/malformed packs
- test cleanup: Syncer.Store field renamed Cache (collided with the Store
  interface); test NewID returns distinct ids
- tests: symlink-skip, BOM, strict-kebab, nil-pack-safety

Deferred (advisory perf, documented): PackCache stores raw trees so activation
re-parses; CheckAll is serial. Both fine at expected scale.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-04 20:41:44 -04:00

140 lines
4.1 KiB
Go

package skillpack
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"sort"
"strings"
)
// Tree is a pack's file set: relative slash-separated path -> file bytes,
// including the SKILL.md itself. It is self-contained (no live filesystem
// handle) so it can be cached, digested, and staged without worrying about the
// lifetime of a clone or temp dir.
type Tree map[string][]byte
// Digest is the content address of the tree: a SHA-256 over every file's path
// and bytes, order-independent. Two trees with identical contents produce the
// same digest regardless of how they were fetched — this is the pin identity
// and the change-detection signal (a git SHA is provenance, but the digest is
// what says "the bytes an agent runs changed").
func (t Tree) Digest() string {
paths := t.Paths()
h := sha256.New()
for _, p := range paths {
fh := sha256.Sum256(t[p])
// path \x00 filehash \n — the NUL prevents path/content boundary games.
fmt.Fprintf(h, "%s\x00%s\n", p, hex.EncodeToString(fh[:]))
}
return hex.EncodeToString(h.Sum(nil))
}
// Paths returns the tree's file paths, sorted.
func (t Tree) Paths() []string {
out := make([]string, 0, len(t))
for p := range t {
out = append(out, p)
}
sort.Strings(out)
return out
}
// WriteTo materializes the tree under dir (creating it and any parents). It is
// how a host stages a pack's files for a sandbox; the host owns mount/read-only
// policy. Paths are cleaned and constrained to dir — a tree entry that escapes
// (via .. or an absolute path) is rejected rather than written outside dir.
func (t Tree) WriteTo(dir string) error {
for _, p := range t.Paths() {
dest := filepath.Join(dir, filepath.FromSlash(p))
if !within(dir, dest) {
return fmt.Errorf("skillpack: refusing to stage %q outside %q", p, dir)
}
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
return err
}
if err := os.WriteFile(dest, t[p], 0o644); err != nil {
return err
}
}
return nil
}
// Pack is a fetched, parsed pack: its manifest, its file tree, the tree's
// content digest, and the non-manifest ("bundled") file paths a host can stage.
type Pack struct {
Manifest *Manifest
Tree Tree
Digest string
// Bundled is every tree path except the SKILL.md, sorted — the scripts and
// reference files skill_use points the model at.
Bundled []string
}
// LoadPack parses a fetched Tree into a Pack: it requires a root SKILL.md,
// parses+validates it, computes the digest, and lists the bundled files.
func LoadPack(t Tree) (*Pack, error) {
raw, ok := t[ManifestName]
if !ok {
return nil, ErrNoManifest
}
m, err := ParseManifest(raw)
if err != nil {
return nil, err
}
bundled := make([]string, 0, len(t))
for _, p := range t.Paths() {
if p != ManifestName {
bundled = append(bundled, p)
}
}
return &Pack{Manifest: m, Tree: t, Digest: t.Digest(), Bundled: bundled}, nil
}
// readTree reads an entire fs.FS (rooted at ".") into a Tree, skipping
// directories. It is the shared reader for DirSource and GitSource, so both
// produce identical self-contained trees.
func readTree(fsys fs.FS) (Tree, error) {
t := Tree{}
err := fs.WalkDir(fsys, ".", func(p string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
// Skip symlinks. A pack must be self-contained; os.DirFS + ReadFile
// follows symlinks, so a malicious pack with `SKILL.md -> /etc/passwd`
// or `scripts/x -> ../../.ssh/id_rsa` would otherwise read host files
// into the tree. WalkDir yields a symlink-to-dir as a non-dir entry
// carrying ModeSymlink, so this one check covers file and dir symlinks.
if d.Type()&fs.ModeSymlink != 0 {
return nil
}
b, err := fs.ReadFile(fsys, p)
if err != nil {
return err
}
t[path.Clean(p)] = b
return nil
})
if err != nil {
return nil, err
}
return t, nil
}
// within reports whether dest is inside dir (defense against path traversal in
// a staged tree).
func within(dir, dest string) bool {
rel, err := filepath.Rel(dir, dest)
if err != nil {
return false
}
return rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator))
}