Files
executus/skillpack/source.go
steve 9bb5d143f7
executus CI / test (pull_request) Successful in 3m30s
fix(skillpack): address review — symlink read, git arg-injection, dup-subscribe, nil panics
Real issues from the PR review:
- security: readTree now skips symlinks (a pack with SKILL.md -> /etc/passwd
  or scripts/x -> ../../.ssh/id_rsa could read host files); covers file and
  dir symlinks, incl. within a git subpath
- security: GitSource rejects url/ref beginning with '-' (git arg injection)
  and clones with '--' separator; --filter=blob:none (blobless partial clone)
  instead of full-history clone
- correctness: Subscribe no longer swallows a non-ErrNotFound store error from
  GetByName (would create a duplicate subscription); handles *GitSource as well
  as GitSource in the URL/subpath extraction
- correctness: pinTo no longer renames a subscription, so Apply can't silently
  collide two subscriptions when an upstream pack changes its name
- validation: isKebab rejects leading/trailing/consecutive hyphens; BOM-
  prefixed SKILL.md now parses (matches the doc comment)
- robustness: Catalog/Activate/renderPackBody/Stage guard nil/malformed packs
- test cleanup: Syncer.Store field renamed Cache (collided with the Store
  interface); test NewID returns distinct ids
- tests: symlink-skip, BOM, strict-kebab, nil-pack-safety

Deferred (advisory perf, documented): PackCache stores raw trees so activation
re-parses; CheckAll is serial. Both fine at expected scale.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-04 20:41:44 -04:00

150 lines
4.9 KiB
Go

package skillpack
import (
"context"
"fmt"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
)
// Source is where a pack's files come from. Fetch retrieves the tree at ref and
// returns it together with the source's own resolved ref (a git commit SHA, or
// the content digest for a plain directory) — provenance a host can show and
// pin against. ref semantics are source-specific and may be empty ("the
// default": a dir's current contents, a repo's default branch).
type Source interface {
Fetch(ctx context.Context, ref string) (Tree, string, error)
// Kind is a short stable tag ("dir", "git") for persistence + display.
Kind() string
// String is a human-readable identifier (path or URL[/subpath]).
String() string
}
// DirSource reads a pack from a local directory. ref is ignored (a directory
// has no versions); the resolved ref is the content digest. Useful for
// first-party/builtin packs shipped on disk and for tests.
type DirSource struct {
Path string
}
func (d DirSource) Kind() string { return "dir" }
func (d DirSource) String() string { return d.Path }
func (d DirSource) Fetch(_ context.Context, _ string) (Tree, string, error) {
info, err := os.Stat(d.Path)
if err != nil {
return nil, "", fmt.Errorf("skillpack: dir source %q: %w", d.Path, err)
}
if !info.IsDir() {
return nil, "", fmt.Errorf("skillpack: dir source %q is not a directory", d.Path)
}
t, err := readTree(os.DirFS(d.Path))
if err != nil {
return nil, "", err
}
return t, t.Digest(), nil
}
// GitSource fetches a pack from a git repository, optionally from a Subpath
// within it (for repos that publish several packs). ref is any git commit-ish
// (branch, tag, or SHA); empty means the default branch. The resolved ref is
// the checked-out commit SHA.
//
// Fetch clones into a temp dir, reads the subpath tree into memory, and removes
// the clone before returning — the returned Tree is self-contained, so there is
// no clone lifetime to manage and nothing left on disk. Git runs via the system
// `git`; GitRunner is overridable for tests.
type GitSource struct {
URL string
Subpath string
// GitRunner runs a git command in dir and returns combined output. Nil uses
// the system git.
GitRunner func(ctx context.Context, dir string, args ...string) ([]byte, error)
}
func (g GitSource) Kind() string { return "git" }
func (g GitSource) String() string {
if g.Subpath != "" {
return g.URL + "//" + g.Subpath
}
return g.URL
}
func (g GitSource) run(ctx context.Context, dir string, args ...string) ([]byte, error) {
if g.GitRunner != nil {
return g.GitRunner(ctx, dir, args...)
}
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
return out, fmt.Errorf("skillpack: git %s: %w: %s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
return out, nil
}
func (g GitSource) Fetch(ctx context.Context, ref string) (Tree, string, error) {
// Argument-injection guard: a URL or ref beginning with "-" would be parsed
// by git as an option (e.g. --upload-pack=…), not a value. Reject it rather
// than rely solely on the "--" separator, which checkout does not honor for
// a rev. Hosts should also allow-list sources, but this is defense-in-depth
// for a library.
if strings.HasPrefix(g.URL, "-") {
return nil, "", fmt.Errorf("skillpack: git url must not start with '-': %q", g.URL)
}
if strings.HasPrefix(ref, "-") {
return nil, "", fmt.Errorf("skillpack: git ref must not start with '-': %q", ref)
}
tmp, err := os.MkdirTemp("", "skillpack-git-*")
if err != nil {
return nil, "", err
}
defer os.RemoveAll(tmp)
// --filter=blob:none: a blobless partial clone gets the ref graph cheaply
// and fetches only the blobs the checkout needs — much less than the full
// history, while still supporting an arbitrary commit-ish ref. "--" ends
// option parsing before the URL.
if _, err := g.run(ctx, "", "clone", "--quiet", "--filter=blob:none", "--", g.URL, tmp); err != nil {
return nil, "", err
}
if ref != "" {
if _, err := g.run(ctx, tmp, "checkout", "--quiet", "--detach", ref); err != nil {
return nil, "", err
}
}
shaOut, err := g.run(ctx, tmp, "rev-parse", "HEAD")
if err != nil {
return nil, "", err
}
sha := strings.TrimSpace(string(shaOut))
root := tmp
if g.Subpath != "" {
clean := path.Clean("/" + g.Subpath) // normalize, strip leading ../
root = filepath.Join(tmp, filepath.FromSlash(strings.TrimPrefix(clean, "/")))
if !within(tmp, root) {
return nil, "", fmt.Errorf("skillpack: subpath %q escapes the repo", g.Subpath)
}
if info, err := os.Stat(root); err != nil || !info.IsDir() {
return nil, "", fmt.Errorf("skillpack: subpath %q not found in %s", g.Subpath, g.URL)
}
}
t, err := readTree(os.DirFS(root))
if err != nil {
return nil, "", err
}
// Drop a nested .git if the subpath was the repo root.
for p := range t {
if p == ".git" || strings.HasPrefix(p, ".git/") {
delete(t, p)
}
}
return t, sha, nil
}