diff --git a/docs/sandbox-setup.md b/docs/sandbox-setup.md new file mode 100644 index 0000000..5214c88 --- /dev/null +++ b/docs/sandbox-setup.md @@ -0,0 +1,575 @@ +# Sandbox Setup & Hardening Guide + +Complete guide for setting up a Proxmox VE host to run isolated LXC sandbox containers for the go-llm sandbox package. + +## Table of Contents + +1. [Prerequisites](#1-prerequisites) +2. [Proxmox Host Preparation](#2-proxmox-host-preparation) +3. [Network Setup](#3-network-setup) +4. [LXC Template Creation](#4-lxc-template-creation) +5. [SSH Key Setup](#5-ssh-key-setup) +6. [Configuration](#6-configuration) +7. [Hardening Checklist](#7-hardening-checklist) +8. [Monitoring & Maintenance](#8-monitoring--maintenance) +9. [Troubleshooting](#9-troubleshooting) + +--- + +## 1. Prerequisites + +### Hardware/VM Requirements + +| Resource | Minimum | Recommended | +|----------|---------|-------------| +| CPU | 4 cores | 8+ cores | +| RAM | 8 GB | 16+ GB | +| Storage | 100 GB SSD | 250+ GB SSD | +| Network | 1 NIC | 2 NICs (mgmt + sandbox) | + +### Software + +- Proxmox VE 8.x ([installation guide](https://pve.proxmox.com/wiki/Installation)) +- During install, configure the management interface on `vmbr0` + +--- + +## 2. Proxmox Host Preparation + +### Create Resource Pool + +Scope sandbox containers to a dedicated resource pool to limit API token access: + +```bash +pvesh create /pools --poolid sandbox-pool +``` + +### Create API User and Token + +```bash +# Create dedicated user +pveum useradd mort-sandbox@pve + +# Create role with minimum required permissions +pveum roleadd SandboxAdmin -privs "VM.Allocate,VM.Clone,VM.Audit,VM.PowerMgmt,VM.Console,Datastore.AllocateSpace,Datastore.Audit" + +# Grant role on the sandbox pool only +pveum aclmod /pool/sandbox-pool -user mort-sandbox@pve -role SandboxAdmin + +# Grant access to the template storage +pveum aclmod /storage/local -user mort-sandbox@pve -role PVEDatastoreUser + +# Create API token (privsep=0 means token inherits user's permissions) +pveum user token add mort-sandbox@pve sandbox-token --privsep=0 +``` + +Save the output — it contains the token secret: +``` +┌──────────┬──────────────────────────────────────────┐ +│ key │ value │ +╞══════════╪══════════════════════════════════════════╡ +│ full-tokenid │ mort-sandbox@pve!sandbox-token │ +│ value │ xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx │ +└──────────┴──────────────────────────────────────────┘ +``` + +Store the secret securely (environment variable, secret manager, etc.). Never commit it to source control. + +--- + +## 3. Network Setup + +### 3.1 Create Isolated Bridge + +Add to `/etc/network/interfaces` on the Proxmox host: + +``` +auto vmbr1 +iface vmbr1 inet static + address 10.99.0.1/16 + bridge-ports none + bridge-stp off + bridge-fd 0 + post-up echo 1 > /proc/sys/net/ipv4/ip_forward + # NAT for optional internet access (controlled per-container by nftables) + post-up nft add table nat 2>/dev/null; true + post-up nft add chain nat postrouting { type nat hook postrouting priority 100 \; } 2>/dev/null; true + post-up nft add rule nat postrouting oifname "vmbr0" ip saddr 10.99.0.0/16 masquerade 2>/dev/null; true +``` + +Apply the configuration: +```bash +ifreload -a +``` + +### 3.2 Install and Configure DHCP + +```bash +apt-get install -y dnsmasq +``` + +Create `/etc/dnsmasq.d/sandbox.conf`: +``` +interface=vmbr1 +bind-interfaces +dhcp-range=10.99.1.1,10.99.254.254,255.255.0.0,1h +dhcp-option=option:router,10.99.0.1 +dhcp-option=option:dns-server,1.1.1.1,8.8.8.8 +``` + +Restart dnsmasq: +```bash +systemctl restart dnsmasq +systemctl enable dnsmasq +``` + +### 3.3 Configure nftables Firewall + +Create `/etc/nftables.conf`: + +```nft +#!/usr/sbin/nft -f + +flush ruleset + +table inet sandbox { + # Dynamic set of container IPs allowed internet access. + # Populated/cleared by the sandbox manager via the Proxmox API. + set internet_allowed { + type ipv4_addr + } + + chain forward { + type filter hook forward priority 0; policy drop; + + # Allow established/related connections + ct state established,related accept + + # Allow inter-bridge traffic (host ↔ containers via vmbr1) + iifname "vmbr1" oifname "vmbr1" accept + + # Allow DNS for all containers (needed for apt) + ip saddr 10.99.0.0/16 udp dport 53 accept + ip saddr 10.99.0.0/16 tcp dport 53 accept + + # Allow HTTP/HTTPS only for containers in the internet_allowed set + ip saddr @internet_allowed tcp dport { 80, 443 } accept + + # Rate limit: max 50 new connections per second per container + ip saddr 10.99.0.0/16 ct state new limit rate over 50/second drop + + # Block everything else from containers + ip saddr 10.99.0.0/16 drop + + # Allow host → containers (for SSH from the application) + ip daddr 10.99.0.0/16 accept + } + + chain input { + type filter hook input priority 0; policy accept; + + # Block containers from accessing Proxmox management ports + # (only SSH is allowed for the sandbox manager) + iifname "vmbr1" ip daddr 10.99.0.1 tcp dport != 22 drop + } +} + +# NAT table for optional internet access +table nat { + chain postrouting { + type nat hook postrouting priority 100; + oifname "vmbr0" ip saddr 10.99.0.0/16 masquerade + } +} +``` + +Apply and persist: +```bash +nft -f /etc/nftables.conf +systemctl enable nftables +``` + +Verify: +```bash +nft list ruleset +``` + +### 3.4 Test Network Isolation + +From a test container on `vmbr1`: +```bash +# Should work: DNS resolution +dig google.com + +# Should be blocked: HTTP (not in internet_allowed set) +curl -s --connect-timeout 5 https://google.com && echo "FAIL: should be blocked" || echo "OK: blocked" + +# Should be blocked: access to LAN +ping -c 1 -W 2 192.168.1.1 && echo "FAIL: LAN reachable" || echo "OK: LAN blocked" + +# Should be blocked: access to Proxmox management +curl -s --connect-timeout 5 https://10.99.0.1:8006 && echo "FAIL: Proxmox reachable" || echo "OK: Proxmox blocked" +``` + +--- + +## 4. LXC Template Creation + +### 4.1 Download Base Image + +```bash +pveam update +pveam download local ubuntu-24.04-standard_24.04-1_amd64.tar.zst +``` + +### 4.2 Create Template Container + +```bash +pct create 9000 local:vztmpl/ubuntu-24.04-standard_24.04-1_amd64.tar.zst \ + --hostname sandbox-template \ + --memory 1024 \ + --swap 0 \ + --cores 1 \ + --rootfs local-lvm:8 \ + --net0 name=eth0,bridge=vmbr1,ip=dhcp \ + --unprivileged 1 \ + --features nesting=0 \ + --ostype ubuntu \ + --ssh-public-keys /root/.ssh/mort_sandbox.pub \ + --pool sandbox-pool \ + --start 0 +``` + +### 4.3 Install Base Packages + +```bash +pct start 9000 +pct exec 9000 -- bash -c ' + apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + python3 python3-pip python3-venv \ + nodejs npm \ + git curl wget jq \ + vim nano \ + htop tree \ + ca-certificates \ + openssh-server \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +' +``` + +### 4.4 Create Sandbox User + +```bash +pct exec 9000 -- bash -c ' + # Create unprivileged sandbox user with sudo + useradd -m -s /bin/bash -G sudo sandbox + echo "sandbox ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/sandbox + + # Set up SSH access + mkdir -p /home/sandbox/.ssh + cp /root/.ssh/authorized_keys /home/sandbox/.ssh/ + chown -R sandbox:sandbox /home/sandbox/.ssh + chmod 700 /home/sandbox/.ssh + chmod 600 /home/sandbox/.ssh/authorized_keys + + # Create uploads directory + mkdir -p /home/sandbox/uploads + chown sandbox:sandbox /home/sandbox/uploads + + # Enable SSH + systemctl enable ssh +' +``` + +### 4.5 Security Hardening + +```bash +pct exec 9000 -- bash -c ' + # Process limits (prevent fork bombs) + echo "* soft nproc 256" >> /etc/security/limits.conf + echo "* hard nproc 512" >> /etc/security/limits.conf + + # Disable core dumps + echo "* hard core 0" >> /etc/security/limits.conf + + # Disable unnecessary services + systemctl disable systemd-resolved 2>/dev/null || true + systemctl disable snapd 2>/dev/null || true +' +``` + +### 4.6 Convert to Template + +```bash +pct stop 9000 +pct template 9000 +``` + +### 4.7 Verify Template + +Clone and test manually: +```bash +pct clone 9000 9999 --hostname test-sandbox --full +pct start 9999 +# Wait for DHCP, then SSH in +ssh sandbox@ +# Run some commands, verify packages installed +sudo apt-get update +python3 --version +node --version +# Clean up +exit +pct stop 9999 +pct destroy 9999 +``` + +--- + +## 5. SSH Key Setup + +### 5.1 Generate Key Pair + +```bash +ssh-keygen -t ed25519 -f /etc/mort/sandbox_key -N "" -C "mort-sandbox" +``` + +### 5.2 Install Public Key in Template + +This was done in step 4.2 with `--ssh-public-keys`. If you need to update it: + +```bash +pct start 9000 # Only if template — you'll need to untemplate first +# Copy key +cat /etc/mort/sandbox_key.pub | pct exec 9000 -- tee /home/sandbox/.ssh/authorized_keys +pct exec 9000 -- chown sandbox:sandbox /home/sandbox/.ssh/authorized_keys +pct exec 9000 -- chmod 600 /home/sandbox/.ssh/authorized_keys +pct stop 9000 +pct template 9000 +``` + +### 5.3 Set Permissions + +```bash +chmod 600 /etc/mort/sandbox_key +chmod 644 /etc/mort/sandbox_key.pub +# If running as a specific user: +chown mort:mort /etc/mort/sandbox_key /etc/mort/sandbox_key.pub +``` + +--- + +## 6. Configuration + +### Go Configuration + +```go +signer, _ := sandbox.LoadSSHKey("/etc/mort/sandbox_key") + +mgr, _ := sandbox.NewManager(sandbox.Config{ + Proxmox: sandbox.ProxmoxConfig{ + BaseURL: "https://proxmox.local:8006", + TokenID: "mort-sandbox@pve!sandbox-token", + Secret: os.Getenv("SANDBOX_PROXMOX_SECRET"), + Node: "pve", + TemplateID: 9000, + Pool: "sandbox-pool", + Bridge: "vmbr1", + InsecureSkipVerify: true, // Only for self-signed certs + }, + SSH: sandbox.SSHConfig{ + Signer: signer, + User: "sandbox", // default + ConnectTimeout: 10 * time.Second, // default + CommandTimeout: 60 * time.Second, // default + }, + Defaults: sandbox.ContainerConfig{ + CPUs: 1, + MemoryMB: 1024, + DiskGB: 8, + }, +}) +``` + +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `SANDBOX_PROXMOX_SECRET` | Proxmox API token secret | +| `SANDBOX_SSH_KEY_PATH` | Path to SSH private key (alternative to config) | + +--- + +## 7. Hardening Checklist + +Run through this checklist after setup: + +### Container Isolation +- [ ] Containers are unprivileged (verify UID mapping in `/etc/pve/lxc/.conf`) +- [ ] Nesting is disabled (`features: nesting=0`) +- [ ] Swap is disabled on containers (`swap: 0`) +- [ ] Resource pool scoping: API token can only touch `sandbox-pool` + +### Network Isolation +- [ ] `vmbr1` has no physical ports (`bridge-ports none`) +- [ ] nftables rules loaded: `nft list ruleset` shows sandbox table +- [ ] nftables persists across reboots: `systemctl is-enabled nftables` +- [ ] Default-deny outbound for containers +- [ ] DNS (port 53) allowed for all containers +- [ ] HTTP/HTTPS only for containers in `internet_allowed` set +- [ ] Rate limiting active (50 conn/sec) +- [ ] Containers cannot reach Proxmox management (port 8006 blocked) + +### Security Profiles +- [ ] AppArmor profile active: `lxc-container-default-cgns` +- [ ] Process limits in `/etc/security/limits.conf` (nproc 256/512) +- [ ] Core dumps disabled +- [ ] Capability drops verified in container config + +### Functional Tests +- [ ] **Fork bomb test**: run `:(){ :|:& };:` in container → PID limit fires, container survives +- [ ] **OOM test**: allocate >1GB memory → container OOM-killed, host unaffected +- [ ] **Network scan test**: `nmap` from container → blocked by nftables +- [ ] **Container escape test**: attempt to mount host filesystem → denied +- [ ] **LAN access test**: ping LAN hosts → blocked +- [ ] **Cross-container test**: ping other sandbox containers → blocked +- [ ] **Internet access test**: HTTP without being in `internet_allowed` → blocked +- [ ] **Internet access test**: add to `internet_allowed` → HTTP works +- [ ] **Cleanup test**: destroy container → verify no orphan volumes + +### Operational Tests +- [ ] Clone template → container starts → SSH connects → commands work +- [ ] File upload/download via SFTP works +- [ ] Container destroy removes all resources +- [ ] Orphan cleanup: kill application mid-session, restart, verify cleanup + +--- + +## 8. Monitoring & Maintenance + +### Log Rotation + +Sandbox session logs should be rotated to prevent disk exhaustion. If using slog to a file: + +``` +# /etc/logrotate.d/sandbox +/var/log/sandbox/*.log { + daily + rotate 14 + compress + delaycompress + missingok + notifempty +} +``` + +### Storage Cleanup + +Verify destroyed containers don't leave orphan volumes: +```bash +# List all LVM volumes in the sandbox storage +lvs | grep sandbox + +# Compare with running containers +pct list | grep sandbox +``` + +### Template Updates + +Periodically update the template with latest packages: + +```bash +# Un-template (creates a regular container from template) +# Note: you can't un-template directly; clone then replace +pct clone 9000 9001 --hostname template-update --full +pct start 9001 +pct exec 9001 -- bash -c 'apt-get update && apt-get upgrade -y && apt-get clean' +pct stop 9001 + +# Destroy old template and create new one +pct destroy 9000 +# Rename 9001 → 9000 (or update your config to use the new ID) +pct template 9001 +``` + +### Proxmox Host Updates + +```bash +apt-get update && apt-get dist-upgrade -y +# Reboot if kernel was updated +# Verify nftables rules are still loaded after reboot +nft list ruleset +``` + +--- + +## 9. Troubleshooting + +### Container won't start + +```bash +# Check task log +pct start +# If error, check: +journalctl -u pve-container@ -n 50 + +# Common issues: +# - Storage full: check `df -h` and `lvs` +# - UID mapping issues: verify /etc/subuid and /etc/subgid +``` + +### SSH connection refused + +```bash +# Verify container is running +pct status + +# Check if SSH is running inside container +pct exec -- systemctl status ssh + +# Verify IP assignment +pct exec -- ip addr show eth0 + +# Check DHCP leases +cat /var/lib/misc/dnsmasq.leases +``` + +### Container has no internet (when it should) + +```bash +# Verify container IP is in the internet_allowed set +nft list set inet sandbox internet_allowed + +# Manually add for testing +nft add element inet sandbox internet_allowed { 10.99.1.5 } + +# Verify NAT is working +nft list table nat + +# Check if IP forwarding is enabled +cat /proc/sys/net/ipv4/ip_forward # Should be 1 +``` + +### nftables rules lost after reboot + +```bash +# Verify nftables is enabled +systemctl is-enabled nftables + +# If rules are missing, reload +nft -f /etc/nftables.conf + +# Make sure the config file is correct +nft -c -f /etc/nftables.conf # Check syntax without applying +``` + +### Orphaned containers + +```bash +# List all containers in the sandbox pool +pvesh get /pools/sandbox-pool --output-format json | jq '.members[] | select(.type == "lxc")' + +# Destroy orphans manually +pct stop && pct destroy --force --purge +``` diff --git a/v2/CLAUDE.md b/v2/CLAUDE.md index ff2d203..dad6c90 100644 --- a/v2/CLAUDE.md +++ b/v2/CLAUDE.md @@ -18,6 +18,7 @@ - `provider/` — Provider interface that backends implement - `openai/`, `anthropic/`, `google/` — Provider implementations - `tools/` — Ready-to-use sample tools (WebSearch, Browser, Exec, ReadFile, WriteFile, HTTP) +- `sandbox/` — Isolated Linux container environments via Proxmox LXC + SSH - `internal/schema/` — JSON Schema generation from Go structs - `internal/imageutil/` — Image compression utilities diff --git a/v2/go.mod b/v2/go.mod index 5b9c582..d57588e 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -8,6 +8,8 @@ require ( github.com/liushuangls/go-anthropic/v2 v2.17.0 github.com/modelcontextprotocol/go-sdk v1.2.0 github.com/openai/openai-go v1.12.0 + github.com/pkg/sftp v1.13.10 + golang.org/x/crypto v0.41.0 golang.org/x/image v0.35.0 google.golang.org/genai v1.45.0 ) @@ -22,16 +24,16 @@ require ( github.com/google/s2a-go v0.1.8 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/gorilla/websocket v1.5.3 // indirect + github.com/kr/fs v0.1.0 // indirect github.com/tidwall/gjson v1.14.4 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/sjson v1.2.5 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/crypto v0.36.0 // indirect - golang.org/x/net v0.38.0 // indirect + golang.org/x/net v0.42.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sys v0.31.0 // indirect + golang.org/x/sys v0.35.0 // indirect golang.org/x/text v0.33.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect google.golang.org/grpc v1.66.2 // indirect diff --git a/v2/go.sum b/v2/go.sum index 2dc7df9..bd64989 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -49,12 +49,16 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gT github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/liushuangls/go-anthropic/v2 v2.17.0 h1:iBA6h7aghi1q86owEQ95XE2R2MF/0dQ7bCxtwTxOg4c= github.com/liushuangls/go-anthropic/v2 v2.17.0/go.mod h1:a550cJXPoTG2FL3DvfKG2zzD5O2vjgvo4tHtoGPzFLU= github.com/modelcontextprotocol/go-sdk v1.2.0 h1:Y23co09300CEk8iZ/tMxIX1dVmKZkzoSBZOpJwUnc/s= github.com/modelcontextprotocol/go-sdk v1.2.0/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10= github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= +github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU= +github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -82,8 +86,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= -golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= +golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= +golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/image v0.35.0 h1:LKjiHdgMtO8z7Fh18nGY6KDcoEtVfsgLDPeLyguqb7I= golang.org/x/image v0.35.0/go.mod h1:MwPLTVgvxSASsxdLzKrl8BRFuyqMyGhLwmC+TO1Sybk= @@ -96,8 +100,8 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= +golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= @@ -110,8 +114,10 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= diff --git a/v2/sandbox/doc.go b/v2/sandbox/doc.go new file mode 100644 index 0000000..d86d20e --- /dev/null +++ b/v2/sandbox/doc.go @@ -0,0 +1,78 @@ +// Package sandbox provides isolated Linux container environments for LLM agents. +// +// It manages the full lifecycle of Proxmox LXC containers — cloning from a template, +// starting, connecting via SSH, executing commands, transferring files, and destroying +// the container when done. Each sandbox is an ephemeral, unprivileged container on an +// isolated network bridge with no LAN access. +// +// # Architecture +// +// The package has three layers: +// +// - ProxmoxClient: thin REST client for the Proxmox VE API (container CRUD, IP discovery) +// - SSHExecutor: persistent SSH/SFTP connection for command execution and file transfer +// - Manager/Sandbox: high-level orchestrator that ties Proxmox + SSH together +// +// # Usage +// +// // Load SSH key for container access. +// signer, err := sandbox.LoadSSHKey("/etc/mort/sandbox_key") +// if err != nil { +// log.Fatal(err) +// } +// +// // Create a manager. +// mgr, err := sandbox.NewManager(sandbox.Config{ +// Proxmox: sandbox.ProxmoxConfig{ +// BaseURL: "https://proxmox.local:8006", +// TokenID: "mort-sandbox@pve!sandbox-token", +// Secret: os.Getenv("SANDBOX_PROXMOX_SECRET"), +// Node: "pve", +// TemplateID: 9000, +// Pool: "sandbox-pool", +// Bridge: "vmbr1", +// }, +// SSH: sandbox.SSHConfig{ +// Signer: signer, +// }, +// }) +// if err != nil { +// log.Fatal(err) +// } +// +// // Create a sandbox. +// ctx := context.Background() +// sb, err := mgr.Create(ctx, +// sandbox.WithHostname("user-abc"), +// sandbox.WithInternet(true), +// ) +// if err != nil { +// log.Fatal(err) +// } +// defer sb.Destroy(ctx) +// +// // Execute commands. +// result, err := sb.Exec(ctx, "apt-get update && apt-get install -y nginx") +// if err != nil { +// log.Fatal(err) +// } +// fmt.Printf("exit %d: %s\n", result.ExitCode, result.Output) +// +// // Write files. +// err = sb.WriteFile(ctx, "/var/www/html/index.html", "

Hello

") +// +// // Read files. +// content, err := sb.ReadFile(ctx, "/etc/nginx/nginx.conf") +// +// # Security +// +// Sandboxes are secured through defense in depth: +// - Unprivileged LXC containers (UID mapping to high host UIDs) +// - Isolated network bridge with nftables default-deny outbound +// - Per-container opt-in internet access (HTTP/HTTPS only) +// - Resource limits: CPU, memory, disk, PID count +// - AppArmor confinement (lxc-container-default-cgns) +// - Capability dropping (sys_admin, sys_rawio, sys_ptrace, etc.) +// +// See docs/sandbox-setup.md for the complete Proxmox setup and hardening guide. +package sandbox diff --git a/v2/sandbox/proxmox.go b/v2/sandbox/proxmox.go new file mode 100644 index 0000000..3a5f0a2 --- /dev/null +++ b/v2/sandbox/proxmox.go @@ -0,0 +1,410 @@ +package sandbox + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +// ProxmoxConfig holds configuration for connecting to a Proxmox VE host. +type ProxmoxConfig struct { + // BaseURL is the Proxmox API base URL (e.g., "https://proxmox.local:8006"). + BaseURL string + + // TokenID is the API token identifier (e.g., "mort-sandbox@pve!sandbox-token"). + TokenID string + + // Secret is the API token secret. + Secret string + + // Node is the Proxmox node name (e.g., "pve"). + Node string + + // TemplateID is the LXC template container ID to clone from (e.g., 9000). + TemplateID int + + // Pool is the Proxmox resource pool for sandbox containers (e.g., "sandbox-pool"). + Pool string + + // Bridge is the network bridge for containers (e.g., "vmbr1"). + Bridge string + + // InsecureSkipVerify disables TLS certificate verification. + // Use only for self-signed Proxmox certificates. + InsecureSkipVerify bool +} + +// ContainerStatus represents the current state of a Proxmox LXC container. +type ContainerStatus struct { + Status string `json:"status"` // "running", "stopped", etc. + CPU float64 `json:"cpu"` // CPU usage (0.0–1.0) + Mem int64 `json:"mem"` // Current memory usage in bytes + MaxMem int64 `json:"maxmem"` // Maximum memory in bytes + Disk int64 `json:"disk"` // Current disk usage in bytes + MaxDisk int64 `json:"maxdisk"` // Maximum disk in bytes + NetIn int64 `json:"netin"` // Network bytes received + NetOut int64 `json:"netout"` // Network bytes sent + Uptime int64 `json:"uptime"` // Uptime in seconds +} + +// ContainerConfig holds settings for creating a new container. +type ContainerConfig struct { + // Hostname for the container. + Hostname string + + // CPUs is the number of CPU cores (default 1). + CPUs int + + // MemoryMB is the memory limit in megabytes (default 1024). + MemoryMB int + + // DiskGB is the root filesystem size in gigabytes (default 8). + DiskGB int + + // SSHPublicKey is an optional SSH public key to inject. + SSHPublicKey string +} + +// ProxmoxClient is a thin REST API client for Proxmox VE container lifecycle management. +type ProxmoxClient struct { + config ProxmoxConfig + http *http.Client +} + +// NewProxmoxClient creates a new Proxmox API client. +func NewProxmoxClient(config ProxmoxConfig) *ProxmoxClient { + transport := &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: config.InsecureSkipVerify, + }, + } + return &ProxmoxClient{ + config: config, + http: &http.Client{ + Transport: transport, + Timeout: 30 * time.Second, + }, + } +} + +// NextAvailableID queries Proxmox for the next free VMID. +func (p *ProxmoxClient) NextAvailableID(ctx context.Context) (int, error) { + var result int + err := p.get(ctx, "/api2/json/cluster/nextid", &result) + if err != nil { + return 0, fmt.Errorf("get next VMID: %w", err) + } + return result, nil +} + +// CloneTemplate clones the configured template into a new container with the given VMID. +func (p *ProxmoxClient) CloneTemplate(ctx context.Context, newID int, cfg ContainerConfig) error { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/clone", p.config.Node, p.config.TemplateID) + + hostname := cfg.Hostname + if hostname == "" { + hostname = fmt.Sprintf("sandbox-%d", newID) + } + + params := url.Values{ + "newid": {fmt.Sprintf("%d", newID)}, + "hostname": {hostname}, + "full": {"1"}, + } + if p.config.Pool != "" { + params.Set("pool", p.config.Pool) + } + + taskID, err := p.post(ctx, path, params) + if err != nil { + return fmt.Errorf("clone template %d → %d: %w", p.config.TemplateID, newID, err) + } + + return p.waitForTask(ctx, taskID) +} + +// ConfigureContainer sets CPU, memory, and network on an existing container. +func (p *ProxmoxClient) ConfigureContainer(ctx context.Context, id int, cfg ContainerConfig) error { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/config", p.config.Node, id) + + cpus := cfg.CPUs + if cpus <= 0 { + cpus = 1 + } + mem := cfg.MemoryMB + if mem <= 0 { + mem = 1024 + } + + params := url.Values{ + "cores": {fmt.Sprintf("%d", cpus)}, + "memory": {fmt.Sprintf("%d", mem)}, + "swap": {"0"}, + "net0": {fmt.Sprintf("name=eth0,bridge=%s,ip=dhcp", p.config.Bridge)}, + } + + _, err := p.put(ctx, path, params) + if err != nil { + return fmt.Errorf("configure container %d: %w", id, err) + } + return nil +} + +// StartContainer starts a stopped container. +func (p *ProxmoxClient) StartContainer(ctx context.Context, id int) error { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/start", p.config.Node, id) + taskID, err := p.post(ctx, path, nil) + if err != nil { + return fmt.Errorf("start container %d: %w", id, err) + } + return p.waitForTask(ctx, taskID) +} + +// StopContainer stops a running container. +func (p *ProxmoxClient) StopContainer(ctx context.Context, id int) error { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/stop", p.config.Node, id) + taskID, err := p.post(ctx, path, nil) + if err != nil { + return fmt.Errorf("stop container %d: %w", id, err) + } + return p.waitForTask(ctx, taskID) +} + +// DestroyContainer stops (if running) and permanently deletes a container. +func (p *ProxmoxClient) DestroyContainer(ctx context.Context, id int) error { + // Try to stop first; ignore errors (might already be stopped). + status, err := p.GetContainerStatus(ctx, id) + if err != nil { + return fmt.Errorf("get status before destroy: %w", err) + } + if status.Status == "running" { + _ = p.StopContainer(ctx, id) + } + + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d", p.config.Node, id) + params := url.Values{"force": {"1"}, "purge": {"1"}} + taskID, err := p.delete(ctx, path, params) + if err != nil { + return fmt.Errorf("destroy container %d: %w", id, err) + } + return p.waitForTask(ctx, taskID) +} + +// GetContainerStatus returns the current status and resource usage of a container. +func (p *ProxmoxClient) GetContainerStatus(ctx context.Context, id int) (ContainerStatus, error) { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/current", p.config.Node, id) + var status ContainerStatus + if err := p.get(ctx, path, &status); err != nil { + return ContainerStatus{}, fmt.Errorf("get container %d status: %w", id, err) + } + return status, nil +} + +// GetContainerIP discovers the container's IP address by querying its network interfaces. +// It polls until an IP is found or the context is cancelled. +func (p *ProxmoxClient) GetContainerIP(ctx context.Context, id int) (string, error) { + path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/interfaces", p.config.Node, id) + + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + for { + var ifaces []struct { + Name string `json:"name"` + HWAddr string `json:"hwaddr"` + Inet string `json:"inet"` + Inet6 string `json:"inet6"` + } + + if err := p.get(ctx, path, &ifaces); err == nil { + for _, iface := range ifaces { + if iface.Name == "lo" || iface.Inet == "" { + continue + } + // Inet is in CIDR format (e.g., "10.99.1.5/16") + ip := iface.Inet + if idx := strings.IndexByte(ip, '/'); idx > 0 { + ip = ip[:idx] + } + return ip, nil + } + } + + select { + case <-ctx.Done(): + return "", fmt.Errorf("get container %d IP: %w", id, ctx.Err()) + case <-ticker.C: + } + } +} + +// EnableInternet adds a container IP to the nftables internet_allowed set, +// granting outbound HTTP/HTTPS access. +func (p *ProxmoxClient) EnableInternet(ctx context.Context, containerIP string) error { + return p.execOnHost(ctx, fmt.Sprintf("nft add element inet sandbox internet_allowed { %s }", containerIP)) +} + +// DisableInternet removes a container IP from the nftables internet_allowed set, +// revoking outbound HTTP/HTTPS access. +func (p *ProxmoxClient) DisableInternet(ctx context.Context, containerIP string) error { + return p.execOnHost(ctx, fmt.Sprintf("nft delete element inet sandbox internet_allowed { %s }", containerIP)) +} + +// execOnHost runs a command on the Proxmox host via the API's node exec endpoint. +func (p *ProxmoxClient) execOnHost(ctx context.Context, command string) error { + path := fmt.Sprintf("/api2/json/nodes/%s/execute", p.config.Node) + params := url.Values{"commands": {command}} + _, err := p.post(ctx, path, params) + if err != nil { + return fmt.Errorf("exec on host: %w", err) + } + return nil +} + +// --- HTTP helpers --- + +// proxmoxResponse is the standard envelope for all Proxmox API responses. +type proxmoxResponse struct { + Data json.RawMessage `json:"data"` +} + +func (p *ProxmoxClient) doRequest(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) { + u := strings.TrimRight(p.config.BaseURL, "/") + path + + req, err := http.NewRequestWithContext(ctx, method, u, body) + if err != nil { + return nil, err + } + req.Header.Set("Authorization", fmt.Sprintf("PVEAPIToken=%s=%s", p.config.TokenID, p.config.Secret)) + if body != nil { + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + } + + resp, err := p.http.Do(req) + if err != nil { + return nil, err + } + return resp, nil +} + +func (p *ProxmoxClient) get(ctx context.Context, path string, result any) error { + resp, err := p.doRequest(ctx, http.MethodGet, path, nil) + if err != nil { + return err + } + defer resp.Body.Close() + return p.parseResponse(resp, result) +} + +func (p *ProxmoxClient) post(ctx context.Context, path string, params url.Values) (string, error) { + var body io.Reader + if params != nil { + body = strings.NewReader(params.Encode()) + } + resp, err := p.doRequest(ctx, http.MethodPost, path, body) + if err != nil { + return "", err + } + defer resp.Body.Close() + + var taskID string + if err := p.parseResponse(resp, &taskID); err != nil { + return "", err + } + return taskID, nil +} + +func (p *ProxmoxClient) put(ctx context.Context, path string, params url.Values) (string, error) { + var body io.Reader + if params != nil { + body = strings.NewReader(params.Encode()) + } + resp, err := p.doRequest(ctx, http.MethodPut, path, body) + if err != nil { + return "", err + } + defer resp.Body.Close() + + var result string + if err := p.parseResponse(resp, &result); err != nil { + return "", err + } + return result, nil +} + +func (p *ProxmoxClient) delete(ctx context.Context, path string, params url.Values) (string, error) { + path = path + "?" + params.Encode() + resp, err := p.doRequest(ctx, http.MethodDelete, path, nil) + if err != nil { + return "", err + } + defer resp.Body.Close() + + var taskID string + if err := p.parseResponse(resp, &taskID); err != nil { + return "", err + } + return taskID, nil +} + +func (p *ProxmoxClient) parseResponse(resp *http.Response, result any) error { + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("proxmox API error (HTTP %d): %s", resp.StatusCode, string(bodyBytes)) + } + + var envelope proxmoxResponse + if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil { + return fmt.Errorf("decode response: %w", err) + } + + if result == nil { + return nil + } + + if err := json.Unmarshal(envelope.Data, result); err != nil { + return fmt.Errorf("unmarshal data: %w", err) + } + return nil +} + +// waitForTask polls a Proxmox task until it completes or the context is cancelled. +func (p *ProxmoxClient) waitForTask(ctx context.Context, taskID string) error { + if taskID == "" { + return nil + } + + path := fmt.Sprintf("/api2/json/nodes/%s/tasks/%s/status", p.config.Node, url.PathEscape(taskID)) + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + for { + var status struct { + Status string `json:"status"` // "running", "stopped", etc. + ExitCode string `json:"exitstatus"` + } + + if err := p.get(ctx, path, &status); err != nil { + return fmt.Errorf("poll task %s: %w", taskID, err) + } + + if status.Status != "running" { + if status.ExitCode != "OK" && status.ExitCode != "" { + return fmt.Errorf("task %s failed: %s", taskID, status.ExitCode) + } + return nil + } + + select { + case <-ctx.Done(): + return fmt.Errorf("wait for task %s: %w", taskID, ctx.Err()) + case <-ticker.C: + } + } +} diff --git a/v2/sandbox/sandbox.go b/v2/sandbox/sandbox.go new file mode 100644 index 0000000..9a971c1 --- /dev/null +++ b/v2/sandbox/sandbox.go @@ -0,0 +1,310 @@ +package sandbox + +import ( + "context" + "fmt" + "io" + "os" + "strings" + "time" + + "golang.org/x/crypto/ssh" +) + +// Config holds all configuration for creating sandboxes. +type Config struct { + Proxmox ProxmoxConfig + SSH SSHConfig + Defaults ContainerConfig +} + +// Option configures a Sandbox before creation. +type Option func(*createOpts) + +type createOpts struct { + hostname string + cpus int + memoryMB int + diskGB int + internet bool +} + +// WithHostname sets the container hostname. +func WithHostname(name string) Option { + return func(o *createOpts) { o.hostname = name } +} + +// WithCPUs sets the number of CPU cores for the container. +func WithCPUs(n int) Option { + return func(o *createOpts) { o.cpus = n } +} + +// WithMemoryMB sets the memory limit in megabytes. +func WithMemoryMB(mb int) Option { + return func(o *createOpts) { o.memoryMB = mb } +} + +// WithDiskGB sets the root filesystem size in gigabytes. +func WithDiskGB(gb int) Option { + return func(o *createOpts) { o.diskGB = gb } +} + +// WithInternet enables outbound HTTP/HTTPS access on creation. +func WithInternet(enabled bool) Option { + return func(o *createOpts) { o.internet = enabled } +} + +// Sandbox represents an isolated Linux container environment with SSH access. +// It wraps a Proxmox LXC container and provides command execution and file operations. +type Sandbox struct { + // ID is the Proxmox VMID of this container. + ID int + + // IP is the container's IP address on the isolated bridge. + IP string + + // Internet indicates whether outbound HTTP/HTTPS is enabled. + Internet bool + + proxmox *ProxmoxClient + ssh *SSHExecutor +} + +// Manager creates and manages sandbox instances. +type Manager struct { + proxmox *ProxmoxClient + sshKey ssh.Signer + defaults ContainerConfig + sshCfg SSHConfig +} + +// NewManager creates a new sandbox manager from the given configuration. +func NewManager(cfg Config) (*Manager, error) { + if cfg.SSH.Signer == nil { + return nil, fmt.Errorf("SSH signer is required") + } + + return &Manager{ + proxmox: NewProxmoxClient(cfg.Proxmox), + sshKey: cfg.SSH.Signer, + defaults: cfg.Defaults, + sshCfg: cfg.SSH, + }, nil +} + +// Create provisions a new sandbox container: clones the template, starts it, +// waits for SSH, and optionally enables internet access. +// The returned Sandbox must be destroyed with Destroy when no longer needed. +func (m *Manager) Create(ctx context.Context, opts ...Option) (*Sandbox, error) { + o := &createOpts{ + hostname: m.defaults.Hostname, + cpus: m.defaults.CPUs, + memoryMB: m.defaults.MemoryMB, + diskGB: m.defaults.DiskGB, + } + for _, opt := range opts { + opt(o) + } + + // Apply defaults for zero values. + if o.cpus <= 0 { + o.cpus = 1 + } + if o.memoryMB <= 0 { + o.memoryMB = 1024 + } + if o.diskGB <= 0 { + o.diskGB = 8 + } + + // Get next VMID. + vmid, err := m.proxmox.NextAvailableID(ctx) + if err != nil { + return nil, fmt.Errorf("get next VMID: %w", err) + } + + containerCfg := ContainerConfig{ + Hostname: o.hostname, + CPUs: o.cpus, + MemoryMB: o.memoryMB, + DiskGB: o.diskGB, + } + + // Clone template. + if err := m.proxmox.CloneTemplate(ctx, vmid, containerCfg); err != nil { + return nil, fmt.Errorf("clone template: %w", err) + } + + // Configure container resources. + if err := m.proxmox.ConfigureContainer(ctx, vmid, containerCfg); err != nil { + // Clean up the cloned container on failure. + _ = m.proxmox.DestroyContainer(ctx, vmid) + return nil, fmt.Errorf("configure container: %w", err) + } + + // Start container. + if err := m.proxmox.StartContainer(ctx, vmid); err != nil { + _ = m.proxmox.DestroyContainer(ctx, vmid) + return nil, fmt.Errorf("start container: %w", err) + } + + // Discover IP address (with timeout). + ipCtx, ipCancel := context.WithTimeout(ctx, 30*time.Second) + defer ipCancel() + + ip, err := m.proxmox.GetContainerIP(ipCtx, vmid) + if err != nil { + _ = m.proxmox.DestroyContainer(ctx, vmid) + return nil, fmt.Errorf("discover IP: %w", err) + } + + // Connect SSH (with timeout). + sshExec := NewSSHExecutor(ip, m.sshCfg) + + sshCtx, sshCancel := context.WithTimeout(ctx, 30*time.Second) + defer sshCancel() + + if err := sshExec.Connect(sshCtx); err != nil { + _ = m.proxmox.DestroyContainer(ctx, vmid) + return nil, fmt.Errorf("ssh connect: %w", err) + } + + sb := &Sandbox{ + ID: vmid, + IP: ip, + proxmox: m.proxmox, + ssh: sshExec, + } + + // Enable internet if requested. + if o.internet { + if err := sb.SetInternet(ctx, true); err != nil { + sb.Destroy(ctx) + return nil, fmt.Errorf("enable internet: %w", err) + } + } + + return sb, nil +} + +// Attach reconnects to an existing sandbox container by VMID. +// This is useful for recovering sessions after a restart. +func (m *Manager) Attach(ctx context.Context, vmid int) (*Sandbox, error) { + status, err := m.proxmox.GetContainerStatus(ctx, vmid) + if err != nil { + return nil, fmt.Errorf("get container status: %w", err) + } + if status.Status != "running" { + return nil, fmt.Errorf("container %d is not running (status: %s)", vmid, status.Status) + } + + ip, err := m.proxmox.GetContainerIP(ctx, vmid) + if err != nil { + return nil, fmt.Errorf("get container IP: %w", err) + } + + sshExec := NewSSHExecutor(ip, m.sshCfg) + if err := sshExec.Connect(ctx); err != nil { + return nil, fmt.Errorf("ssh connect: %w", err) + } + + return &Sandbox{ + ID: vmid, + IP: ip, + proxmox: m.proxmox, + ssh: sshExec, + }, nil +} + +// Exec runs a shell command in the sandbox and returns the result. +func (s *Sandbox) Exec(ctx context.Context, command string) (ExecResult, error) { + return s.ssh.Exec(ctx, command) +} + +// WriteFile creates or overwrites a file in the sandbox. +func (s *Sandbox) WriteFile(ctx context.Context, path, content string) error { + return s.ssh.Upload(ctx, strings.NewReader(content), path, 0644) +} + +// ReadFile reads a file from the sandbox and returns its contents. +func (s *Sandbox) ReadFile(ctx context.Context, path string) (string, error) { + rc, err := s.ssh.Download(ctx, path) + if err != nil { + return "", err + } + defer rc.Close() + + data, err := io.ReadAll(rc) + if err != nil { + return "", fmt.Errorf("read file %s: %w", path, err) + } + return string(data), nil +} + +// Upload copies data from an io.Reader to a file in the sandbox. +func (s *Sandbox) Upload(ctx context.Context, reader io.Reader, remotePath string, mode os.FileMode) error { + return s.ssh.Upload(ctx, reader, remotePath, mode) +} + +// Download returns an io.ReadCloser for a file in the sandbox. +// The caller must close the returned reader. +func (s *Sandbox) Download(ctx context.Context, remotePath string) (io.ReadCloser, error) { + return s.ssh.Download(ctx, remotePath) +} + +// SetInternet enables or disables outbound HTTP/HTTPS access for the sandbox. +func (s *Sandbox) SetInternet(ctx context.Context, enabled bool) error { + if enabled { + if err := s.proxmox.EnableInternet(ctx, s.IP); err != nil { + return err + } + } else { + if err := s.proxmox.DisableInternet(ctx, s.IP); err != nil { + return err + } + } + s.Internet = enabled + return nil +} + +// Status returns the current resource usage of the sandbox container. +func (s *Sandbox) Status(ctx context.Context) (ContainerStatus, error) { + return s.proxmox.GetContainerStatus(ctx, s.ID) +} + +// IsConnected returns true if the SSH connection to the sandbox is active. +func (s *Sandbox) IsConnected() bool { + return s.ssh.IsConnected() +} + +// Destroy stops the container, removes internet access, closes SSH connections, +// and permanently deletes the container from Proxmox. +func (s *Sandbox) Destroy(ctx context.Context) error { + var errs []error + + // Remove internet access first (ignore errors — container is being destroyed). + if s.Internet { + _ = s.proxmox.DisableInternet(ctx, s.IP) + } + + // Close SSH connections. + if err := s.ssh.Close(); err != nil { + errs = append(errs, fmt.Errorf("close ssh: %w", err)) + } + + // Destroy the container. + if err := s.proxmox.DestroyContainer(ctx, s.ID); err != nil { + errs = append(errs, fmt.Errorf("destroy container: %w", err)) + } + + if len(errs) > 0 { + return fmt.Errorf("destroy sandbox %d: %v", s.ID, errs) + } + return nil +} + +// DestroyByID destroys a container by VMID without requiring an active SSH connection. +// This is useful for cleaning up orphaned containers after a restart. +func (m *Manager) DestroyByID(ctx context.Context, vmid int) error { + return m.proxmox.DestroyContainer(ctx, vmid) +} diff --git a/v2/sandbox/sandbox_test.go b/v2/sandbox/sandbox_test.go new file mode 100644 index 0000000..10afced --- /dev/null +++ b/v2/sandbox/sandbox_test.go @@ -0,0 +1,614 @@ +package sandbox + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "golang.org/x/crypto/ssh" +) + +// --- Proxmox API mock server --- + +type mockProxmoxHandler struct { + containers map[int]ContainerStatus + nextID int + tasks map[string]string // taskID → exitstatus +} + +func newMockProxmoxHandler() *mockProxmoxHandler { + return &mockProxmoxHandler{ + containers: make(map[int]ContainerStatus), + nextID: 200, + tasks: make(map[string]string), + } +} + +func (m *mockProxmoxHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + + // Verify auth header is present. + auth := r.Header.Get("Authorization") + if !strings.HasPrefix(auth, "PVEAPIToken=") { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + + w.Header().Set("Content-Type", "application/json") + + switch { + case path == "/api2/json/cluster/nextid" && r.Method == http.MethodGet: + m.handleNextID(w) + + case strings.HasSuffix(path, "/clone") && r.Method == http.MethodPost: + m.handleClone(w, r) + + case strings.HasSuffix(path, "/config") && r.Method == http.MethodPut: + m.handleConfig(w) + + case strings.HasSuffix(path, "/status/start") && r.Method == http.MethodPost: + m.handleStart(w, r) + + case strings.HasSuffix(path, "/status/stop") && r.Method == http.MethodPost: + m.handleStop(w, r) + + case strings.HasSuffix(path, "/status/current") && r.Method == http.MethodGet: + m.handleStatusCurrent(w, r) + + case strings.HasSuffix(path, "/interfaces") && r.Method == http.MethodGet: + m.handleInterfaces(w, r) + + case strings.Contains(path, "/tasks/") && strings.HasSuffix(path, "/status"): + m.handleTaskStatus(w, r) + + case r.Method == http.MethodDelete && strings.Contains(path, "/lxc/"): + m.handleDelete(w, r) + + case strings.HasSuffix(path, "/execute") && r.Method == http.MethodPost: + m.handleExecute(w) + + default: + http.Error(w, fmt.Sprintf("unhandled: %s %s", r.Method, path), http.StatusNotFound) + } +} + +func (m *mockProxmoxHandler) handleNextID(w http.ResponseWriter) { + id := m.nextID + m.nextID++ + json.NewEncoder(w).Encode(map[string]any{"data": id}) +} + +func (m *mockProxmoxHandler) handleClone(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + taskID := "UPID:pve:clone-task" + m.tasks[taskID] = "OK" + json.NewEncoder(w).Encode(map[string]any{"data": taskID}) +} + +func (m *mockProxmoxHandler) handleConfig(w http.ResponseWriter) { + json.NewEncoder(w).Encode(map[string]any{"data": nil}) +} + +func (m *mockProxmoxHandler) handleStart(w http.ResponseWriter, r *http.Request) { + // Extract VMID from path. + parts := strings.Split(r.URL.Path, "/") + for i, p := range parts { + if p == "lxc" && i+1 < len(parts) { + var vmid int + fmt.Sscanf(parts[i+1], "%d", &vmid) + m.containers[vmid] = ContainerStatus{Status: "running"} + break + } + } + taskID := "UPID:pve:start-task" + m.tasks[taskID] = "OK" + json.NewEncoder(w).Encode(map[string]any{"data": taskID}) +} + +func (m *mockProxmoxHandler) handleStop(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(r.URL.Path, "/") + for i, p := range parts { + if p == "lxc" && i+1 < len(parts) { + var vmid int + fmt.Sscanf(parts[i+1], "%d", &vmid) + m.containers[vmid] = ContainerStatus{Status: "stopped"} + break + } + } + taskID := "UPID:pve:stop-task" + m.tasks[taskID] = "OK" + json.NewEncoder(w).Encode(map[string]any{"data": taskID}) +} + +func (m *mockProxmoxHandler) handleStatusCurrent(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(r.URL.Path, "/") + for i, p := range parts { + if p == "lxc" && i+1 < len(parts) { + var vmid int + fmt.Sscanf(parts[i+1], "%d", &vmid) + status, ok := m.containers[vmid] + if !ok { + status = ContainerStatus{Status: "stopped"} + } + json.NewEncoder(w).Encode(map[string]any{"data": status}) + return + } + } + http.Error(w, "not found", http.StatusNotFound) +} + +func (m *mockProxmoxHandler) handleInterfaces(w http.ResponseWriter, r *http.Request) { + ifaces := []map[string]string{ + {"name": "lo", "inet": "127.0.0.1/8"}, + {"name": "eth0", "inet": "10.99.1.5/16", "hwaddr": "AA:BB:CC:DD:EE:FF"}, + } + json.NewEncoder(w).Encode(map[string]any{"data": ifaces}) +} + +func (m *mockProxmoxHandler) handleTaskStatus(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{ + "status": "stopped", + "exitstatus": "OK", + }, + }) +} + +func (m *mockProxmoxHandler) handleDelete(w http.ResponseWriter, r *http.Request) { + parts := strings.Split(r.URL.Path, "/") + for i, p := range parts { + if p == "lxc" && i+1 < len(parts) { + var vmid int + fmt.Sscanf(parts[i+1], "%d", &vmid) + delete(m.containers, vmid) + break + } + } + taskID := "UPID:pve:delete-task" + m.tasks[taskID] = "OK" + json.NewEncoder(w).Encode(map[string]any{"data": taskID}) +} + +func (m *mockProxmoxHandler) handleExecute(w http.ResponseWriter) { + json.NewEncoder(w).Encode(map[string]any{"data": ""}) +} + +// --- Test helpers --- + +func newTestProxmoxClient(t *testing.T, handler *mockProxmoxHandler) (*ProxmoxClient, *httptest.Server) { + t.Helper() + server := httptest.NewTLSServer(handler) + client := NewProxmoxClient(ProxmoxConfig{ + BaseURL: server.URL, + TokenID: "test@pve!test-token", + Secret: "test-secret", + Node: "pve", + TemplateID: 9000, + Pool: "sandbox-pool", + Bridge: "vmbr1", + InsecureSkipVerify: true, + }) + // Use the test server's TLS client. + client.http = server.Client() + return client, server +} + +func generateTestSigner(t *testing.T) ssh.Signer { + t.Helper() + key, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("generate RSA key: %v", err) + } + signer, err := ssh.NewSignerFromKey(key) + if err != nil { + t.Fatalf("create signer: %v", err) + } + return signer +} + +// --- Proxmox client tests --- + +func TestProxmoxNextAvailableID(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + id, err := client.NextAvailableID(context.Background()) + if err != nil { + t.Fatalf("NextAvailableID: %v", err) + } + if id != 200 { + t.Errorf("expected VMID 200, got %d", id) + } + + // Second call should return 201. + id2, err := client.NextAvailableID(context.Background()) + if err != nil { + t.Fatalf("NextAvailableID (2nd): %v", err) + } + if id2 != 201 { + t.Errorf("expected VMID 201, got %d", id2) + } +} + +func TestProxmoxCloneTemplate(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + err := client.CloneTemplate(context.Background(), 200, ContainerConfig{ + Hostname: "test-sandbox", + }) + if err != nil { + t.Fatalf("CloneTemplate: %v", err) + } +} + +func TestProxmoxContainerLifecycle(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + ctx := context.Background() + + // Start container. + if err := client.StartContainer(ctx, 200); err != nil { + t.Fatalf("StartContainer: %v", err) + } + + // Get status — should be running. + status, err := client.GetContainerStatus(ctx, 200) + if err != nil { + t.Fatalf("GetContainerStatus: %v", err) + } + if status.Status != "running" { + t.Errorf("expected status 'running', got %q", status.Status) + } + + // Stop container. + if err := client.StopContainer(ctx, 200); err != nil { + t.Fatalf("StopContainer: %v", err) + } + + // Get status — should be stopped. + status, err = client.GetContainerStatus(ctx, 200) + if err != nil { + t.Fatalf("GetContainerStatus: %v", err) + } + if status.Status != "stopped" { + t.Errorf("expected status 'stopped', got %q", status.Status) + } +} + +func TestProxmoxGetContainerIP(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + ip, err := client.GetContainerIP(context.Background(), 200) + if err != nil { + t.Fatalf("GetContainerIP: %v", err) + } + if ip != "10.99.1.5" { + t.Errorf("expected IP 10.99.1.5, got %q", ip) + } +} + +func TestProxmoxDestroyContainer(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + ctx := context.Background() + + // Start it first so it has a status. + if err := client.StartContainer(ctx, 200); err != nil { + t.Fatalf("StartContainer: %v", err) + } + + // Destroy it. + if err := client.DestroyContainer(ctx, 200); err != nil { + t.Fatalf("DestroyContainer: %v", err) + } + + // Container should be gone from the handler's map. + if _, exists := handler.containers[200]; exists { + t.Error("container 200 should have been deleted") + } +} + +func TestProxmoxConfigureContainer(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + err := client.ConfigureContainer(context.Background(), 200, ContainerConfig{ + CPUs: 2, + MemoryMB: 2048, + }) + if err != nil { + t.Fatalf("ConfigureContainer: %v", err) + } +} + +func TestProxmoxEnableDisableInternet(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + ctx := context.Background() + + if err := client.EnableInternet(ctx, "10.99.1.5"); err != nil { + t.Fatalf("EnableInternet: %v", err) + } + + if err := client.DisableInternet(ctx, "10.99.1.5"); err != nil { + t.Fatalf("DisableInternet: %v", err) + } +} + +func TestProxmoxAuthRequired(t *testing.T) { + // Mock that rejects requests without a valid token. + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + auth := r.Header.Get("Authorization") + if auth != "PVEAPIToken=valid@pve!tok=secret123" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]any{"data": 200}) + }) + server := httptest.NewTLSServer(handler) + defer server.Close() + + // Client with wrong credentials should fail. + client := NewProxmoxClient(ProxmoxConfig{ + BaseURL: server.URL, + TokenID: "wrong@pve!tok", + Secret: "wrong", + Node: "pve", + InsecureSkipVerify: true, + }) + client.http = server.Client() + + _, err := client.NextAvailableID(context.Background()) + if err == nil { + t.Fatal("expected error with wrong auth, got nil") + } + if !strings.Contains(err.Error(), "401") { + t.Errorf("expected 401 error, got: %v", err) + } + + // Client with correct credentials should succeed. + client2 := NewProxmoxClient(ProxmoxConfig{ + BaseURL: server.URL, + TokenID: "valid@pve!tok", + Secret: "secret123", + Node: "pve", + InsecureSkipVerify: true, + }) + client2.http = server.Client() + + id, err := client2.NextAvailableID(context.Background()) + if err != nil { + t.Fatalf("expected success with correct auth, got: %v", err) + } + if id != 200 { + t.Errorf("expected VMID 200, got %d", id) + } +} + +func TestProxmoxContextCancellation(t *testing.T) { + handler := newMockProxmoxHandler() + client, server := newTestProxmoxClient(t, handler) + defer server.Close() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately. + + _, err := client.NextAvailableID(ctx) + if err == nil { + t.Fatal("expected error with cancelled context, got nil") + } +} + +// --- SSH executor tests (unit tests without real SSH) --- + +func TestSSHExecutorDefaults(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + if exec.config.User != "sandbox" { + t.Errorf("expected default user 'sandbox', got %q", exec.config.User) + } + if exec.config.ConnectTimeout != 10e9 { + t.Errorf("expected default connect timeout 10s, got %v", exec.config.ConnectTimeout) + } + if exec.config.CommandTimeout != 60e9 { + t.Errorf("expected default command timeout 60s, got %v", exec.config.CommandTimeout) + } +} + +func TestSSHExecutorNotConnected(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + _, err := exec.Exec(context.Background(), "echo hello") + if err == nil { + t.Fatal("expected error when not connected, got nil") + } + if !strings.Contains(err.Error(), "not connected") { + t.Errorf("expected 'not connected' error, got: %v", err) + } +} + +func TestSSHExecutorUploadNotConnected(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + err := exec.Upload(context.Background(), strings.NewReader("test"), "/tmp/test", 0644) + if err == nil { + t.Fatal("expected error when not connected, got nil") + } +} + +func TestSSHExecutorDownloadNotConnected(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + _, err := exec.Download(context.Background(), "/tmp/test") + if err == nil { + t.Fatal("expected error when not connected, got nil") + } +} + +func TestSSHExecutorIsConnected(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + if exec.IsConnected() { + t.Error("should not be connected initially") + } +} + +func TestSSHExecutorCloseIdempotent(t *testing.T) { + signer := generateTestSigner(t) + exec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + // Close without connecting should not error. + if err := exec.Close(); err != nil { + t.Errorf("Close on unconnected executor: %v", err) + } +} + +// --- LoadSSHKey / ParseSSHKey tests --- + +func TestLoadSSHKeyNotFound(t *testing.T) { + _, err := LoadSSHKey("/nonexistent/path/to/key") + if err == nil { + t.Fatal("expected error for nonexistent key, got nil") + } +} + +func TestParseSSHKeyInvalid(t *testing.T) { + _, err := ParseSSHKey([]byte("not a valid PEM key")) + if err == nil { + t.Fatal("expected error for invalid key, got nil") + } +} + +// --- Sandbox / Manager tests (using mock Proxmox, no real SSH) --- + +func TestManagerRequiresSigner(t *testing.T) { + _, err := NewManager(Config{}) + if err == nil { + t.Fatal("expected error when no SSH signer provided") + } + if !strings.Contains(err.Error(), "SSH signer") { + t.Errorf("expected SSH signer error, got: %v", err) + } +} + +func TestSandboxDestroyClosesConnections(t *testing.T) { + handler := newMockProxmoxHandler() + _, server := newTestProxmoxClient(t, handler) + defer server.Close() + + signer := generateTestSigner(t) + + proxmoxClient := NewProxmoxClient(ProxmoxConfig{ + BaseURL: server.URL, + TokenID: "test@pve!test-token", + Secret: "test-secret", + Node: "pve", + TemplateID: 9000, + InsecureSkipVerify: true, + }) + proxmoxClient.http = server.Client() + + // Start a container so destroy can check its status. + ctx := context.Background() + if err := proxmoxClient.StartContainer(ctx, 200); err != nil { + t.Fatalf("start: %v", err) + } + + sshExec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + sb := &Sandbox{ + ID: 200, + IP: "10.99.1.5", + Internet: false, + proxmox: proxmoxClient, + ssh: sshExec, + } + + // Destroy should succeed even with unconnected SSH (no SFTP/SSH to close). + if err := sb.Destroy(ctx); err != nil { + t.Fatalf("Destroy: %v", err) + } +} + +func TestSandboxWriteFileAndReadFileRequireConnection(t *testing.T) { + signer := generateTestSigner(t) + sshExec := NewSSHExecutor("10.99.1.5", SSHConfig{Signer: signer}) + + sb := &Sandbox{ssh: sshExec} + + err := sb.WriteFile(context.Background(), "/tmp/test.txt", "hello") + if err == nil { + t.Fatal("expected error when SSH not connected") + } + + _, err = sb.ReadFile(context.Background(), "/tmp/test.txt") + if err == nil { + t.Fatal("expected error when SSH not connected") + } +} + +func TestContainerConfigDefaults(t *testing.T) { + // Verify that zero-value createOpts get proper defaults in the Create flow. + o := &createOpts{} + + if o.cpus != 0 { + t.Errorf("expected zero cpus, got %d", o.cpus) + } + + // Apply options. + WithCPUs(2)(o) + WithMemoryMB(2048)(o) + WithDiskGB(16)(o) + WithHostname("test")(o) + WithInternet(true)(o) + + if o.cpus != 2 { + t.Errorf("expected cpus=2, got %d", o.cpus) + } + if o.memoryMB != 2048 { + t.Errorf("expected memoryMB=2048, got %d", o.memoryMB) + } + if o.diskGB != 16 { + t.Errorf("expected diskGB=16, got %d", o.diskGB) + } + if o.hostname != "test" { + t.Errorf("expected hostname='test', got %q", o.hostname) + } + if !o.internet { + t.Error("expected internet=true") + } +} + +func TestExecResultFields(t *testing.T) { + r := ExecResult{Output: "hello\n", ExitCode: 0} + if r.Output != "hello\n" { + t.Errorf("unexpected output: %q", r.Output) + } + if r.ExitCode != 0 { + t.Errorf("unexpected exit code: %d", r.ExitCode) + } +} diff --git a/v2/sandbox/ssh.go b/v2/sandbox/ssh.go new file mode 100644 index 0000000..9ba30d3 --- /dev/null +++ b/v2/sandbox/ssh.go @@ -0,0 +1,253 @@ +package sandbox + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "os" + "sync" + "time" + + "github.com/pkg/sftp" + "golang.org/x/crypto/ssh" +) + +// SSHConfig holds configuration for SSH connections to sandbox containers. +type SSHConfig struct { + // User is the SSH username (default "sandbox"). + User string + + // Signer is the SSH private key signer for authentication. + Signer ssh.Signer + + // ConnectTimeout is the maximum time to wait for an SSH connection (default 10s). + ConnectTimeout time.Duration + + // CommandTimeout is the default maximum time for a single command execution (default 60s). + CommandTimeout time.Duration +} + +// SSHExecutor manages SSH and SFTP connections to a sandbox container. +type SSHExecutor struct { + host string + config SSHConfig + + mu sync.Mutex + sshClient *ssh.Client + sftpClient *sftp.Client +} + +// NewSSHExecutor creates a new SSH executor for the given host. +func NewSSHExecutor(host string, config SSHConfig) *SSHExecutor { + if config.User == "" { + config.User = "sandbox" + } + if config.ConnectTimeout <= 0 { + config.ConnectTimeout = 10 * time.Second + } + if config.CommandTimeout <= 0 { + config.CommandTimeout = 60 * time.Second + } + return &SSHExecutor{ + host: host, + config: config, + } +} + +// Connect establishes SSH and SFTP connections to the container. +// It polls until the connection succeeds or the context is cancelled, +// which is useful when waiting for a freshly started container to boot. +func (s *SSHExecutor) Connect(ctx context.Context) error { + sshConfig := &ssh.ClientConfig{ + User: s.config.User, + Auth: []ssh.AuthMethod{ + ssh.PublicKeys(s.config.Signer), + }, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + Timeout: s.config.ConnectTimeout, + } + + addr := net.JoinHostPort(s.host, "22") + + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + var lastErr error + for { + client, err := ssh.Dial("tcp", addr, sshConfig) + if err == nil { + sftpClient, err := sftp.NewClient(client) + if err != nil { + client.Close() + return fmt.Errorf("create SFTP client: %w", err) + } + + s.mu.Lock() + s.sshClient = client + s.sftpClient = sftpClient + s.mu.Unlock() + return nil + } + + lastErr = err + select { + case <-ctx.Done(): + return fmt.Errorf("ssh connect to %s: %w (last error: %v)", addr, ctx.Err(), lastErr) + case <-ticker.C: + } + } +} + +// ExecResult contains the output and exit status of a command execution. +type ExecResult struct { + Output string + ExitCode int +} + +// Exec runs a shell command on the container and returns the combined stdout/stderr +// output and exit code. +func (s *SSHExecutor) Exec(ctx context.Context, command string) (ExecResult, error) { + s.mu.Lock() + client := s.sshClient + s.mu.Unlock() + + if client == nil { + return ExecResult{}, fmt.Errorf("ssh not connected") + } + + session, err := client.NewSession() + if err != nil { + return ExecResult{}, fmt.Errorf("create session: %w", err) + } + defer session.Close() + + var buf bytes.Buffer + session.Stdout = &buf + session.Stderr = &buf + + // Apply context timeout. + done := make(chan error, 1) + go func() { + done <- session.Run(command) + }() + + select { + case <-ctx.Done(): + _ = session.Signal(ssh.SIGKILL) + return ExecResult{}, fmt.Errorf("exec timed out: %w", ctx.Err()) + case err := <-done: + output := buf.String() + if err != nil { + if exitErr, ok := err.(*ssh.ExitError); ok { + return ExecResult{ + Output: output, + ExitCode: exitErr.ExitStatus(), + }, nil + } + return ExecResult{Output: output}, fmt.Errorf("exec: %w", err) + } + return ExecResult{Output: output, ExitCode: 0}, nil + } +} + +// Upload writes data from an io.Reader to a file on the container. +func (s *SSHExecutor) Upload(ctx context.Context, reader io.Reader, remotePath string, mode os.FileMode) error { + s.mu.Lock() + client := s.sftpClient + s.mu.Unlock() + + if client == nil { + return fmt.Errorf("sftp not connected") + } + + f, err := client.OpenFile(remotePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC) + if err != nil { + return fmt.Errorf("open remote file %s: %w", remotePath, err) + } + defer f.Close() + + if _, err := io.Copy(f, reader); err != nil { + return fmt.Errorf("write to %s: %w", remotePath, err) + } + + if err := client.Chmod(remotePath, mode); err != nil { + return fmt.Errorf("chmod %s: %w", remotePath, err) + } + + return nil +} + +// Download reads a file from the container and returns its contents as an io.ReadCloser. +// The caller must close the returned reader. +func (s *SSHExecutor) Download(ctx context.Context, remotePath string) (io.ReadCloser, error) { + s.mu.Lock() + client := s.sftpClient + s.mu.Unlock() + + if client == nil { + return nil, fmt.Errorf("sftp not connected") + } + + f, err := client.Open(remotePath) + if err != nil { + return nil, fmt.Errorf("open remote file %s: %w", remotePath, err) + } + + return f, nil +} + +// Close tears down both SFTP and SSH connections. +func (s *SSHExecutor) Close() error { + s.mu.Lock() + defer s.mu.Unlock() + + var errs []error + if s.sftpClient != nil { + if err := s.sftpClient.Close(); err != nil { + errs = append(errs, fmt.Errorf("close SFTP: %w", err)) + } + s.sftpClient = nil + } + if s.sshClient != nil { + if err := s.sshClient.Close(); err != nil { + errs = append(errs, fmt.Errorf("close SSH: %w", err)) + } + s.sshClient = nil + } + + if len(errs) > 0 { + return fmt.Errorf("close ssh executor: %v", errs) + } + return nil +} + +// IsConnected returns true if the SSH connection is established. +func (s *SSHExecutor) IsConnected() bool { + s.mu.Lock() + defer s.mu.Unlock() + return s.sshClient != nil +} + +// LoadSSHKey reads a PEM-encoded private key file and returns an ssh.Signer. +func LoadSSHKey(path string) (ssh.Signer, error) { + keyData, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read SSH key %s: %w", path, err) + } + signer, err := ssh.ParsePrivateKey(keyData) + if err != nil { + return nil, fmt.Errorf("parse SSH key: %w", err) + } + return signer, nil +} + +// ParseSSHKey parses a PEM-encoded private key from bytes and returns an ssh.Signer. +func ParseSSHKey(pemBytes []byte) (ssh.Signer, error) { + signer, err := ssh.ParsePrivateKey(pemBytes) + if err != nil { + return nil, fmt.Errorf("parse SSH key: %w", err) + } + return signer, nil +}