Files
go-llm/v2/sandbox/proxmox.go
Steve Dudenhoeffer 23c9068022
All checks were successful
CI / V2 Module (push) Successful in 11m46s
CI / Root Module (push) Successful in 11m50s
CI / Lint (push) Successful in 9m28s
Add sandbox package for isolated Linux containers via Proxmox LXC
Provides a complete lifecycle manager for ephemeral sandbox environments:
- ProxmoxClient: thin REST wrapper for container CRUD, IP discovery, internet toggle
- SSHExecutor: persistent SSH/SFTP for command execution and file transfer
- Manager/Sandbox: high-level orchestrator tying Proxmox + SSH together
- 22 unit tests with mock Proxmox HTTP server
- Proxmox setup & hardening guide (docs/sandbox-setup.md)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 00:47:45 -05:00

411 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package sandbox
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// ProxmoxConfig holds configuration for connecting to a Proxmox VE host.
type ProxmoxConfig struct {
// BaseURL is the Proxmox API base URL (e.g., "https://proxmox.local:8006").
BaseURL string
// TokenID is the API token identifier (e.g., "mort-sandbox@pve!sandbox-token").
TokenID string
// Secret is the API token secret.
Secret string
// Node is the Proxmox node name (e.g., "pve").
Node string
// TemplateID is the LXC template container ID to clone from (e.g., 9000).
TemplateID int
// Pool is the Proxmox resource pool for sandbox containers (e.g., "sandbox-pool").
Pool string
// Bridge is the network bridge for containers (e.g., "vmbr1").
Bridge string
// InsecureSkipVerify disables TLS certificate verification.
// Use only for self-signed Proxmox certificates.
InsecureSkipVerify bool
}
// ContainerStatus represents the current state of a Proxmox LXC container.
type ContainerStatus struct {
Status string `json:"status"` // "running", "stopped", etc.
CPU float64 `json:"cpu"` // CPU usage (0.01.0)
Mem int64 `json:"mem"` // Current memory usage in bytes
MaxMem int64 `json:"maxmem"` // Maximum memory in bytes
Disk int64 `json:"disk"` // Current disk usage in bytes
MaxDisk int64 `json:"maxdisk"` // Maximum disk in bytes
NetIn int64 `json:"netin"` // Network bytes received
NetOut int64 `json:"netout"` // Network bytes sent
Uptime int64 `json:"uptime"` // Uptime in seconds
}
// ContainerConfig holds settings for creating a new container.
type ContainerConfig struct {
// Hostname for the container.
Hostname string
// CPUs is the number of CPU cores (default 1).
CPUs int
// MemoryMB is the memory limit in megabytes (default 1024).
MemoryMB int
// DiskGB is the root filesystem size in gigabytes (default 8).
DiskGB int
// SSHPublicKey is an optional SSH public key to inject.
SSHPublicKey string
}
// ProxmoxClient is a thin REST API client for Proxmox VE container lifecycle management.
type ProxmoxClient struct {
config ProxmoxConfig
http *http.Client
}
// NewProxmoxClient creates a new Proxmox API client.
func NewProxmoxClient(config ProxmoxConfig) *ProxmoxClient {
transport := &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: config.InsecureSkipVerify,
},
}
return &ProxmoxClient{
config: config,
http: &http.Client{
Transport: transport,
Timeout: 30 * time.Second,
},
}
}
// NextAvailableID queries Proxmox for the next free VMID.
func (p *ProxmoxClient) NextAvailableID(ctx context.Context) (int, error) {
var result int
err := p.get(ctx, "/api2/json/cluster/nextid", &result)
if err != nil {
return 0, fmt.Errorf("get next VMID: %w", err)
}
return result, nil
}
// CloneTemplate clones the configured template into a new container with the given VMID.
func (p *ProxmoxClient) CloneTemplate(ctx context.Context, newID int, cfg ContainerConfig) error {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/clone", p.config.Node, p.config.TemplateID)
hostname := cfg.Hostname
if hostname == "" {
hostname = fmt.Sprintf("sandbox-%d", newID)
}
params := url.Values{
"newid": {fmt.Sprintf("%d", newID)},
"hostname": {hostname},
"full": {"1"},
}
if p.config.Pool != "" {
params.Set("pool", p.config.Pool)
}
taskID, err := p.post(ctx, path, params)
if err != nil {
return fmt.Errorf("clone template %d → %d: %w", p.config.TemplateID, newID, err)
}
return p.waitForTask(ctx, taskID)
}
// ConfigureContainer sets CPU, memory, and network on an existing container.
func (p *ProxmoxClient) ConfigureContainer(ctx context.Context, id int, cfg ContainerConfig) error {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/config", p.config.Node, id)
cpus := cfg.CPUs
if cpus <= 0 {
cpus = 1
}
mem := cfg.MemoryMB
if mem <= 0 {
mem = 1024
}
params := url.Values{
"cores": {fmt.Sprintf("%d", cpus)},
"memory": {fmt.Sprintf("%d", mem)},
"swap": {"0"},
"net0": {fmt.Sprintf("name=eth0,bridge=%s,ip=dhcp", p.config.Bridge)},
}
_, err := p.put(ctx, path, params)
if err != nil {
return fmt.Errorf("configure container %d: %w", id, err)
}
return nil
}
// StartContainer starts a stopped container.
func (p *ProxmoxClient) StartContainer(ctx context.Context, id int) error {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/start", p.config.Node, id)
taskID, err := p.post(ctx, path, nil)
if err != nil {
return fmt.Errorf("start container %d: %w", id, err)
}
return p.waitForTask(ctx, taskID)
}
// StopContainer stops a running container.
func (p *ProxmoxClient) StopContainer(ctx context.Context, id int) error {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/stop", p.config.Node, id)
taskID, err := p.post(ctx, path, nil)
if err != nil {
return fmt.Errorf("stop container %d: %w", id, err)
}
return p.waitForTask(ctx, taskID)
}
// DestroyContainer stops (if running) and permanently deletes a container.
func (p *ProxmoxClient) DestroyContainer(ctx context.Context, id int) error {
// Try to stop first; ignore errors (might already be stopped).
status, err := p.GetContainerStatus(ctx, id)
if err != nil {
return fmt.Errorf("get status before destroy: %w", err)
}
if status.Status == "running" {
_ = p.StopContainer(ctx, id)
}
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d", p.config.Node, id)
params := url.Values{"force": {"1"}, "purge": {"1"}}
taskID, err := p.delete(ctx, path, params)
if err != nil {
return fmt.Errorf("destroy container %d: %w", id, err)
}
return p.waitForTask(ctx, taskID)
}
// GetContainerStatus returns the current status and resource usage of a container.
func (p *ProxmoxClient) GetContainerStatus(ctx context.Context, id int) (ContainerStatus, error) {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/status/current", p.config.Node, id)
var status ContainerStatus
if err := p.get(ctx, path, &status); err != nil {
return ContainerStatus{}, fmt.Errorf("get container %d status: %w", id, err)
}
return status, nil
}
// GetContainerIP discovers the container's IP address by querying its network interfaces.
// It polls until an IP is found or the context is cancelled.
func (p *ProxmoxClient) GetContainerIP(ctx context.Context, id int) (string, error) {
path := fmt.Sprintf("/api2/json/nodes/%s/lxc/%d/interfaces", p.config.Node, id)
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
for {
var ifaces []struct {
Name string `json:"name"`
HWAddr string `json:"hwaddr"`
Inet string `json:"inet"`
Inet6 string `json:"inet6"`
}
if err := p.get(ctx, path, &ifaces); err == nil {
for _, iface := range ifaces {
if iface.Name == "lo" || iface.Inet == "" {
continue
}
// Inet is in CIDR format (e.g., "10.99.1.5/16")
ip := iface.Inet
if idx := strings.IndexByte(ip, '/'); idx > 0 {
ip = ip[:idx]
}
return ip, nil
}
}
select {
case <-ctx.Done():
return "", fmt.Errorf("get container %d IP: %w", id, ctx.Err())
case <-ticker.C:
}
}
}
// EnableInternet adds a container IP to the nftables internet_allowed set,
// granting outbound HTTP/HTTPS access.
func (p *ProxmoxClient) EnableInternet(ctx context.Context, containerIP string) error {
return p.execOnHost(ctx, fmt.Sprintf("nft add element inet sandbox internet_allowed { %s }", containerIP))
}
// DisableInternet removes a container IP from the nftables internet_allowed set,
// revoking outbound HTTP/HTTPS access.
func (p *ProxmoxClient) DisableInternet(ctx context.Context, containerIP string) error {
return p.execOnHost(ctx, fmt.Sprintf("nft delete element inet sandbox internet_allowed { %s }", containerIP))
}
// execOnHost runs a command on the Proxmox host via the API's node exec endpoint.
func (p *ProxmoxClient) execOnHost(ctx context.Context, command string) error {
path := fmt.Sprintf("/api2/json/nodes/%s/execute", p.config.Node)
params := url.Values{"commands": {command}}
_, err := p.post(ctx, path, params)
if err != nil {
return fmt.Errorf("exec on host: %w", err)
}
return nil
}
// --- HTTP helpers ---
// proxmoxResponse is the standard envelope for all Proxmox API responses.
type proxmoxResponse struct {
Data json.RawMessage `json:"data"`
}
func (p *ProxmoxClient) doRequest(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
u := strings.TrimRight(p.config.BaseURL, "/") + path
req, err := http.NewRequestWithContext(ctx, method, u, body)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("PVEAPIToken=%s=%s", p.config.TokenID, p.config.Secret))
if body != nil {
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
}
resp, err := p.http.Do(req)
if err != nil {
return nil, err
}
return resp, nil
}
func (p *ProxmoxClient) get(ctx context.Context, path string, result any) error {
resp, err := p.doRequest(ctx, http.MethodGet, path, nil)
if err != nil {
return err
}
defer resp.Body.Close()
return p.parseResponse(resp, result)
}
func (p *ProxmoxClient) post(ctx context.Context, path string, params url.Values) (string, error) {
var body io.Reader
if params != nil {
body = strings.NewReader(params.Encode())
}
resp, err := p.doRequest(ctx, http.MethodPost, path, body)
if err != nil {
return "", err
}
defer resp.Body.Close()
var taskID string
if err := p.parseResponse(resp, &taskID); err != nil {
return "", err
}
return taskID, nil
}
func (p *ProxmoxClient) put(ctx context.Context, path string, params url.Values) (string, error) {
var body io.Reader
if params != nil {
body = strings.NewReader(params.Encode())
}
resp, err := p.doRequest(ctx, http.MethodPut, path, body)
if err != nil {
return "", err
}
defer resp.Body.Close()
var result string
if err := p.parseResponse(resp, &result); err != nil {
return "", err
}
return result, nil
}
func (p *ProxmoxClient) delete(ctx context.Context, path string, params url.Values) (string, error) {
path = path + "?" + params.Encode()
resp, err := p.doRequest(ctx, http.MethodDelete, path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
var taskID string
if err := p.parseResponse(resp, &taskID); err != nil {
return "", err
}
return taskID, nil
}
func (p *ProxmoxClient) parseResponse(resp *http.Response, result any) error {
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
bodyBytes, _ := io.ReadAll(resp.Body)
return fmt.Errorf("proxmox API error (HTTP %d): %s", resp.StatusCode, string(bodyBytes))
}
var envelope proxmoxResponse
if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil {
return fmt.Errorf("decode response: %w", err)
}
if result == nil {
return nil
}
if err := json.Unmarshal(envelope.Data, result); err != nil {
return fmt.Errorf("unmarshal data: %w", err)
}
return nil
}
// waitForTask polls a Proxmox task until it completes or the context is cancelled.
func (p *ProxmoxClient) waitForTask(ctx context.Context, taskID string) error {
if taskID == "" {
return nil
}
path := fmt.Sprintf("/api2/json/nodes/%s/tasks/%s/status", p.config.Node, url.PathEscape(taskID))
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
var status struct {
Status string `json:"status"` // "running", "stopped", etc.
ExitCode string `json:"exitstatus"`
}
if err := p.get(ctx, path, &status); err != nil {
return fmt.Errorf("poll task %s: %w", taskID, err)
}
if status.Status != "running" {
if status.ExitCode != "OK" && status.ExitCode != "" {
return fmt.Errorf("task %s failed: %s", taskID, status.ExitCode)
}
return nil
}
select {
case <-ctx.Done():
return fmt.Errorf("wait for task %s: %w", taskID, ctx.Err())
case <-ticker.C:
}
}
}