Provides a complete lifecycle manager for ephemeral sandbox environments: - ProxmoxClient: thin REST wrapper for container CRUD, IP discovery, internet toggle - SSHExecutor: persistent SSH/SFTP for command execution and file transfer - Manager/Sandbox: high-level orchestrator tying Proxmox + SSH together - 22 unit tests with mock Proxmox HTTP server - Proxmox setup & hardening guide (docs/sandbox-setup.md) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
411 lines
12 KiB
Go
411 lines
12 KiB
Go
package sandbox
|
||
|
||
import (
|
||
"context"
|
||
"crypto/tls"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"net/url"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
// ProxmoxConfig holds configuration for connecting to a Proxmox VE host.
|
||
type ProxmoxConfig struct {
|
||
// BaseURL is the Proxmox API base URL (e.g., "https://proxmox.local:8006").
|
||
BaseURL string
|
||
|
||
// TokenID is the API token identifier (e.g., "mort-sandbox@pve!sandbox-token").
|
||
TokenID string
|
||
|
||
// Secret is the API token secret.
|
||
Secret string
|
||
|
||
// Node is the Proxmox node name (e.g., "pve").
|
||
Node string
|
||
|
||
// TemplateID is the LXC template container ID to clone from (e.g., 9000).
|
||
TemplateID int
|
||
|
||
// Pool is the Proxmox resource pool for sandbox containers (e.g., "sandbox-pool").
|
||
Pool string
|
||
|
||
// Bridge is the network bridge for containers (e.g., "vmbr1").
|
||
Bridge string
|
||
|
||
// InsecureSkipVerify disables TLS certificate verification.
|
||
// Use only for self-signed Proxmox certificates.
|
||
InsecureSkipVerify bool
|
||
}
|
||
|
||
// ContainerStatus represents the current state of a Proxmox LXC container.
|
||
type ContainerStatus struct {
|
||
Status string `json:"status"` // "running", "stopped", etc.
|
||
CPU float64 `json:"cpu"` // CPU usage (0.0–1.0)
|
||
Mem int64 `json:"mem"` // Current memory usage in bytes
|
||
MaxMem int64 `json:"maxmem"` // Maximum memory in bytes
|
||
Disk int64 `json:"disk"` // Current disk usage in bytes
|
||
MaxDisk int64 `json:"maxdisk"` // Maximum disk in bytes
|
||
NetIn int64 `json:"netin"` // Network bytes received
|
||
NetOut int64 `json:"netout"` // Network bytes sent
|
||
Uptime int64 `json:"uptime"` // Uptime in seconds
|
||
}
|
||
|
||
// ContainerConfig holds settings for creating a new container.
|
||
type ContainerConfig struct {
|
||
// Hostname for the container.
|
||
Hostname string
|
||
|
||
// CPUs is the number of CPU cores (default 1).
|
||
CPUs int
|
||
|
||
// MemoryMB is the memory limit in megabytes (default 1024).
|
||
MemoryMB int
|
||
|
||
// DiskGB is the root filesystem size in gigabytes (default 8).
|
||
DiskGB int
|
||
|
||
// SSHPublicKey is an optional SSH public key to inject.
|
||
SSHPublicKey string
|
||
}
|
||
|
||
// ProxmoxClient is a thin REST API client for Proxmox VE container lifecycle management.
|
||
type ProxmoxClient struct {
|
||
config ProxmoxConfig
|
||
http *http.Client
|
||
}
|
||
|
||
// NewProxmoxClient creates a new Proxmox API client.
|
||
func NewProxmoxClient(config ProxmoxConfig) *ProxmoxClient {
|
||
transport := &http.Transport{
|
||
TLSClientConfig: &tls.Config{
|
||
InsecureSkipVerify: config.InsecureSkipVerify,
|
||
},
|
||
}
|
||
return &ProxmoxClient{
|
||
config: config,
|
||
http: &http.Client{
|
||
Transport: transport,
|
||
Timeout: 30 * time.Second,
|
||
},
|
||
}
|
||
}
|
||
|
||
// NextAvailableID queries Proxmox for the next free VMID.
|
||
func (p *ProxmoxClient) NextAvailableID(ctx context.Context) (int, error) {
|
||
var result int
|
||
err := p.get(ctx, "/api2/json/cluster/nextid", &result)
|
||
if err != nil {
|
||
return 0, fmt.Errorf("get next VMID: %w", err)
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
// CloneTemplate clones the configured template into a new container with the given VMID.
|
||
func (p *ProxmoxClient) CloneTemplate(ctx context.Context, newID int, cfg ContainerConfig) error {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/clone", p.config.Node, p.config.TemplateID)
|
||
|
||
hostname := cfg.Hostname
|
||
if hostname == "" {
|
||
hostname = fmt.Sprintf("sandbox-%d", newID)
|
||
}
|
||
|
||
params := url.Values{
|
||
"newid": {fmt.Sprintf("%d", newID)},
|
||
"hostname": {hostname},
|
||
"full": {"1"},
|
||
}
|
||
if p.config.Pool != "" {
|
||
params.Set("pool", p.config.Pool)
|
||
}
|
||
|
||
taskID, err := p.post(ctx, path, params)
|
||
if err != nil {
|
||
return fmt.Errorf("clone template %d → %d: %w", p.config.TemplateID, newID, err)
|
||
}
|
||
|
||
return p.waitForTask(ctx, taskID)
|
||
}
|
||
|
||
// ConfigureContainer sets CPU, memory, and network on an existing container.
|
||
func (p *ProxmoxClient) ConfigureContainer(ctx context.Context, id int, cfg ContainerConfig) error {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/config", p.config.Node, id)
|
||
|
||
cpus := cfg.CPUs
|
||
if cpus <= 0 {
|
||
cpus = 1
|
||
}
|
||
mem := cfg.MemoryMB
|
||
if mem <= 0 {
|
||
mem = 1024
|
||
}
|
||
|
||
params := url.Values{
|
||
"cores": {fmt.Sprintf("%d", cpus)},
|
||
"memory": {fmt.Sprintf("%d", mem)},
|
||
"swap": {"0"},
|
||
"net0": {fmt.Sprintf("name=eth0,bridge=%s,ip=dhcp", p.config.Bridge)},
|
||
}
|
||
|
||
_, err := p.put(ctx, path, params)
|
||
if err != nil {
|
||
return fmt.Errorf("configure container %d: %w", id, err)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// StartContainer starts a stopped container.
|
||
func (p *ProxmoxClient) StartContainer(ctx context.Context, id int) error {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/start", p.config.Node, id)
|
||
taskID, err := p.post(ctx, path, nil)
|
||
if err != nil {
|
||
return fmt.Errorf("start container %d: %w", id, err)
|
||
}
|
||
return p.waitForTask(ctx, taskID)
|
||
}
|
||
|
||
// StopContainer stops a running container.
|
||
func (p *ProxmoxClient) StopContainer(ctx context.Context, id int) error {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/stop", p.config.Node, id)
|
||
taskID, err := p.post(ctx, path, nil)
|
||
if err != nil {
|
||
return fmt.Errorf("stop container %d: %w", id, err)
|
||
}
|
||
return p.waitForTask(ctx, taskID)
|
||
}
|
||
|
||
// DestroyContainer stops (if running) and permanently deletes a container.
|
||
func (p *ProxmoxClient) DestroyContainer(ctx context.Context, id int) error {
|
||
// Try to stop first; ignore errors (might already be stopped).
|
||
status, err := p.GetContainerStatus(ctx, id)
|
||
if err != nil {
|
||
return fmt.Errorf("get status before destroy: %w", err)
|
||
}
|
||
if status.Status == "running" {
|
||
_ = p.StopContainer(ctx, id)
|
||
}
|
||
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d", p.config.Node, id)
|
||
params := url.Values{"force": {"1"}, "purge": {"1"}}
|
||
taskID, err := p.delete(ctx, path, params)
|
||
if err != nil {
|
||
return fmt.Errorf("destroy container %d: %w", id, err)
|
||
}
|
||
return p.waitForTask(ctx, taskID)
|
||
}
|
||
|
||
// GetContainerStatus returns the current status and resource usage of a container.
|
||
func (p *ProxmoxClient) GetContainerStatus(ctx context.Context, id int) (ContainerStatus, error) {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/current", p.config.Node, id)
|
||
var status ContainerStatus
|
||
if err := p.get(ctx, path, &status); err != nil {
|
||
return ContainerStatus{}, fmt.Errorf("get container %d status: %w", id, err)
|
||
}
|
||
return status, nil
|
||
}
|
||
|
||
// GetContainerIP discovers the container's IP address by querying its network interfaces.
|
||
// It polls until an IP is found or the context is cancelled.
|
||
func (p *ProxmoxClient) GetContainerIP(ctx context.Context, id int) (string, error) {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/interfaces", p.config.Node, id)
|
||
|
||
ticker := time.NewTicker(2 * time.Second)
|
||
defer ticker.Stop()
|
||
|
||
for {
|
||
var ifaces []struct {
|
||
Name string `json:"name"`
|
||
HWAddr string `json:"hwaddr"`
|
||
Inet string `json:"inet"`
|
||
Inet6 string `json:"inet6"`
|
||
}
|
||
|
||
if err := p.get(ctx, path, &ifaces); err == nil {
|
||
for _, iface := range ifaces {
|
||
if iface.Name == "lo" || iface.Inet == "" {
|
||
continue
|
||
}
|
||
// Inet is in CIDR format (e.g., "10.99.1.5/16")
|
||
ip := iface.Inet
|
||
if idx := strings.IndexByte(ip, '/'); idx > 0 {
|
||
ip = ip[:idx]
|
||
}
|
||
return ip, nil
|
||
}
|
||
}
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
return "", fmt.Errorf("get container %d IP: %w", id, ctx.Err())
|
||
case <-ticker.C:
|
||
}
|
||
}
|
||
}
|
||
|
||
// EnableInternet adds a container IP to the nftables internet_allowed set,
|
||
// granting outbound HTTP/HTTPS access.
|
||
func (p *ProxmoxClient) EnableInternet(ctx context.Context, containerIP string) error {
|
||
return p.execOnHost(ctx, fmt.Sprintf("nft add element inet sandbox internet_allowed { %s }", containerIP))
|
||
}
|
||
|
||
// DisableInternet removes a container IP from the nftables internet_allowed set,
|
||
// revoking outbound HTTP/HTTPS access.
|
||
func (p *ProxmoxClient) DisableInternet(ctx context.Context, containerIP string) error {
|
||
return p.execOnHost(ctx, fmt.Sprintf("nft delete element inet sandbox internet_allowed { %s }", containerIP))
|
||
}
|
||
|
||
// execOnHost runs a command on the Proxmox host via the API's node exec endpoint.
|
||
func (p *ProxmoxClient) execOnHost(ctx context.Context, command string) error {
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/execute", p.config.Node)
|
||
params := url.Values{"commands": {command}}
|
||
_, err := p.post(ctx, path, params)
|
||
if err != nil {
|
||
return fmt.Errorf("exec on host: %w", err)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// --- HTTP helpers ---
|
||
|
||
// proxmoxResponse is the standard envelope for all Proxmox API responses.
|
||
type proxmoxResponse struct {
|
||
Data json.RawMessage `json:"data"`
|
||
}
|
||
|
||
func (p *ProxmoxClient) doRequest(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
|
||
u := strings.TrimRight(p.config.BaseURL, "/") + path
|
||
|
||
req, err := http.NewRequestWithContext(ctx, method, u, body)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
req.Header.Set("Authorization", fmt.Sprintf("PVEAPIToken=%s=%s", p.config.TokenID, p.config.Secret))
|
||
if body != nil {
|
||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||
}
|
||
|
||
resp, err := p.http.Do(req)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return resp, nil
|
||
}
|
||
|
||
func (p *ProxmoxClient) get(ctx context.Context, path string, result any) error {
|
||
resp, err := p.doRequest(ctx, http.MethodGet, path, nil)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
defer resp.Body.Close()
|
||
return p.parseResponse(resp, result)
|
||
}
|
||
|
||
func (p *ProxmoxClient) post(ctx context.Context, path string, params url.Values) (string, error) {
|
||
var body io.Reader
|
||
if params != nil {
|
||
body = strings.NewReader(params.Encode())
|
||
}
|
||
resp, err := p.doRequest(ctx, http.MethodPost, path, body)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
var taskID string
|
||
if err := p.parseResponse(resp, &taskID); err != nil {
|
||
return "", err
|
||
}
|
||
return taskID, nil
|
||
}
|
||
|
||
func (p *ProxmoxClient) put(ctx context.Context, path string, params url.Values) (string, error) {
|
||
var body io.Reader
|
||
if params != nil {
|
||
body = strings.NewReader(params.Encode())
|
||
}
|
||
resp, err := p.doRequest(ctx, http.MethodPut, path, body)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
var result string
|
||
if err := p.parseResponse(resp, &result); err != nil {
|
||
return "", err
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
func (p *ProxmoxClient) delete(ctx context.Context, path string, params url.Values) (string, error) {
|
||
path = path + "?" + params.Encode()
|
||
resp, err := p.doRequest(ctx, http.MethodDelete, path, nil)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
var taskID string
|
||
if err := p.parseResponse(resp, &taskID); err != nil {
|
||
return "", err
|
||
}
|
||
return taskID, nil
|
||
}
|
||
|
||
func (p *ProxmoxClient) parseResponse(resp *http.Response, result any) error {
|
||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||
return fmt.Errorf("proxmox API error (HTTP %d): %s", resp.StatusCode, string(bodyBytes))
|
||
}
|
||
|
||
var envelope proxmoxResponse
|
||
if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil {
|
||
return fmt.Errorf("decode response: %w", err)
|
||
}
|
||
|
||
if result == nil {
|
||
return nil
|
||
}
|
||
|
||
if err := json.Unmarshal(envelope.Data, result); err != nil {
|
||
return fmt.Errorf("unmarshal data: %w", err)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// waitForTask polls a Proxmox task until it completes or the context is cancelled.
|
||
func (p *ProxmoxClient) waitForTask(ctx context.Context, taskID string) error {
|
||
if taskID == "" {
|
||
return nil
|
||
}
|
||
|
||
path := fmt.Sprintf("/api2/json/nodes/%s/tasks/%s/status", p.config.Node, url.PathEscape(taskID))
|
||
ticker := time.NewTicker(1 * time.Second)
|
||
defer ticker.Stop()
|
||
|
||
for {
|
||
var status struct {
|
||
Status string `json:"status"` // "running", "stopped", etc.
|
||
ExitCode string `json:"exitstatus"`
|
||
}
|
||
|
||
if err := p.get(ctx, path, &status); err != nil {
|
||
return fmt.Errorf("poll task %s: %w", taskID, err)
|
||
}
|
||
|
||
if status.Status != "running" {
|
||
if status.ExitCode != "OK" && status.ExitCode != "" {
|
||
return fmt.Errorf("task %s failed: %s", taskID, status.ExitCode)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
select {
|
||
case <-ctx.Done():
|
||
return fmt.Errorf("wait for task %s: %w", taskID, ctx.Err())
|
||
case <-ticker.C:
|
||
}
|
||
}
|
||
}
|