executus/run/ports.go

package run

import (
	"context"
	"errors"
	"time"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"

	"gitea.stevedudenhoeffer.com/steve/executus/deliver"
)

// ErrCriticKill is the cancellation cause the executor stamps on a run the
// critic kills, so a critic kill surfaces as a distinct "killed" status (vs a
// backstop "timeout" or a caller "cancelled"). A host CriticHandle signals a
// kill via KillCause(); the executor wraps that reason with this sentinel.
var ErrCriticKill = errors.New("run: critic killed the run")

// Ports are the host seams the run executor consumes. Every field is nil-safe:
// a light host passes the zero Ports and gets a bounded, in-memory run with no
// persistence, audit, budget, critic, delegation, or delivery — which is
// exactly a gadfly swarm task. A heavy host (mort) wires each one to a battery.
//
// This struct IS the inversion: in mort, agentexec imports agents /
// agentcritic / skillaudit and skillexec imports skills / paste directly; here
// the kernel depends only on these interfaces, and the batteries implement
// them. The mort_*_adapters.go wall becomes the set of impls.
type Ports struct {
	// Audit records the run trace (start, per-step/per-tool events, final
	// stats). nil = no audit.
	Audit Audit
	// Budget gates and meters per-caller resource use. nil = unbounded.
	Budget Budget
	// Critic optionally monitors a long run for hangs/runaways. nil = none.
	Critic Critic
	// Checkpointer mints a per-run Checkpointer for durable recovery (it decides
	// per run whether the run is durable). nil = no checkpointing (a run
	// interrupted by shutdown is simply lost).
	Checkpointer CheckpointerFactory
	// Palette resolves SkillPalette / SubAgentPalette entries into delegation
	// tools (skill__<name> / agent__<name>). nil = those entries are inert.
	Palette PaletteSource
	// Delivery is where the run's output + artifacts go. nil = the caller
	// reads the Result in-process (the light-host default).
	Delivery deliver.Delivery
	// InputFiles persists non-image input attachments (audio, PDF, binary)
	// carried on Invocation.InputFiles into a host file store under run scope,
	// returning file_ids the agent can hand to a worker tool. nil = input files
	// are silently ignored (the run still proceeds, text-only). The bytes are
	// never inlined into the model context — the LLM can't read raw audio/binary.
	InputFiles InputFileStager
	// SkillPacks activates a RunnableAgent.SkillPacks (SKILL.md subscriptions)
	// for the run: it folds a catalog into the system prompt and adds a skill_use
	// loader tool. nil = SkillPacks are inert. The executus/skillpack battery
	// ships a default impl (skillpack.Activator).
	SkillPacks SkillPackActivator
}

// SkillPackActivator resolves an agent's subscribed skill-pack names for a run
// into system-prompt instructions (a catalog of what's available on demand) and
// the tools that back them (a single skill_use loader). It receives the run +
// subject ids so the impl can scope any per-run file staging. It returns "" +
// nil when nothing resolves; activation errors are non-fatal to the run. Defined
// here (the consumer) so the battery satisfies it structurally without importing
// run — the same inversion as the other ports.
type SkillPackActivator interface {
	ActivateSkillPacks(ctx context.Context, names []string, runID, subjectID string) (instructions string, tools []llm.Tool, err error)
}

// InputFileStager persists a single non-image input attachment into a host file
// store under run scope and returns a file_id the run can reference. It is the
// seam mort's skill FileStorage (and any host blob store) implements so the
// kernel can stage Invocation.InputFiles without importing a storage layer.
type InputFileStager interface {
	StageInputFile(ctx context.Context, runID, agentID, name, mime string, content []byte) (fileID string, err error)
}

// RunInfo describes a run at start time — the attribution a recorder/critic
// needs. Host-neutral rename of mort's SkillRun start fields.
type RunInfo struct {
	RunID       string
	SubjectID   string // the agent/skill id being run (audit "skill_id")
	Name        string
	CallerID    string
	ChannelID   string
	GuildID     string // the originating guild/server id (empty for DMs/triggers)
	ParentRunID string
	ModelTier   string // the run's resolved base tier (for checkpoint re-dispatch)
	Inputs      map[string]any
	StartedAt   time.Time
	// MaxIterations is the run's base tool-dispatch step ceiling, so a critic can
	// raise it relative to the baseline (see CriticHandle.MaxSteps).
	MaxIterations int
}

// RunStats is the terminal roll-up a recorder's Close writes. Mirrors mort's
// skillaudit/skillexec RunStats.
type RunStats struct {
	Status         string // ok | error | timeout | budget_exceeded | cancelled | dry_run
	Output         string
	Error          string
	ToolCalls      int
	RuntimeSeconds float64
	InputTokens    int64
	OutputTokens   int64
	ThinkingTokens int64
}

// --- Audit ---

// Audit begins recording a run. StartRun returns a per-run RunRecorder (or nil
// to skip recording this run). The audit battery wires its Storage behind this.
type Audit interface {
	StartRun(ctx context.Context, info RunInfo) RunRecorder
}

// RunRecorder records the events of one in-flight run and its final stats. It
// satisfies RunTally so the kernel can surface live token/tool counts to the
// self-status tool. Mirrors mort's skillaudit.Writer.
type RunRecorder interface {
	RunTally
	// OnStep records one completed agent-loop iteration's model response.
	OnStep(iter int, resp *llm.Response)
	// OnTool records one executed tool call + its result.
	OnTool(call llm.ToolCall, result string)
	// LogEvent / LogError append structured events to the run log.
	LogEvent(eventType string, payload map[string]any)
	LogError(msg string)
	// Close writes the terminal roll-up. Detaches from the caller's context
	// internally so a cancelled run still records.
	Close(ctx context.Context, stats RunStats)
}

// --- Budget ---

// Budget gates and meters per-caller resource use. Mirrors mort's
// skillexec.BudgetTracker.
type Budget interface {
	// Check reports whether the caller has remaining budget (nil = allowed).
	Check(ctx context.Context, callerID string) error
	// Commit records that the caller spent runtimeSeconds on this run.
	Commit(ctx context.Context, callerID string, runtimeSeconds float64)
}

// --- Critic ---

// Critic optionally monitors a long-running run (the two-tier soft/hard
// timeout). Monitor returns a handle the executor feeds progress into and
// queries for steer/deadline decisions; a nil handle means "not monitored".
//
// The exact wiring (how the handle's Steer/Deadline bind into majordomo's
// agent.WithSteer / agent.WithMaxStepsFunc / run-context cancellation) is
// finalized in the executor; this is the seam the agentcritic battery adapts.
type Critic interface {
	Monitor(ctx context.Context, info RunInfo, softTimeout time.Duration) CriticHandle
}

// CriticHandle is the executor's live link to a run's critic.
//
// Concurrency: the executor calls RecordStep/RecordToolStart/Steer from the run
// goroutine while a separate watch goroutine polls Deadline() and the run's end
// calls Stop() — so implementations MUST be safe for concurrent use across these
// methods (the critic battery's handle guards its state with a mutex).
type CriticHandle interface {
	// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
	// healthy-but-slow run is not mistaken for a hang. RecordStep also carries the
	// completed step's model response (nil-safe) so the critic's Trace can show
	// what the agent actually produced, not just an iteration count.
	RecordStep(iter int, resp *llm.Response)
	RecordToolStart(name, args string)
	// Steer returns any messages the critic wants injected into the loop (a
	// nudge), drained before each step — matches majordomo agent.WithSteer.
	Steer() []llm.Message
	// Deadline returns the current hard-kill deadline (the critic may extend
	// it); the executor binds the run context to it. Zero = no hard deadline.
	Deadline() time.Time
	// MaxSteps returns the current tool-dispatch step ceiling, polled by the
	// executor each step (via majordomo WithMaxStepsFunc) so a critic can raise a
	// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
	// the run's base MaxIterations.
	MaxSteps() int
	// KillCause returns a non-nil reason iff the critic has decided to KILL this
	// run (as opposed to letting the hard-deadline backstop expire). The executor
	// reads it when the deadline passes: non-nil → cancel the run with
	// ErrCriticKill (status "killed"); nil → the backstop expired naturally
	// (status "timeout"). Hosts that never distinguish the two may return nil.
	KillCause() error
	// Stop ends monitoring when the run finishes.
	Stop()
}

// --- Checkpointer ---

// CheckpointerFactory decides, per run, whether the run is durable and (if so)
// mints the per-run Checkpointer that records its progress. It returns (nil, nil)
// for a non-durable run (the common short-run case — no checkpointing overhead).
// A storage error should be logged and degraded to (nil, nil) so a failing
// checkpoint store never fails the run. Mirrors mort's
// agentexec.CheckpointerFactory.
type CheckpointerFactory interface {
	Begin(ctx context.Context, info RunInfo) (Checkpointer, error)
}

// Checkpointer persists a run's resumable progress for durable recovery.
// Mirrors mort's agentexec.RunCheckpointer.
type Checkpointer interface {
	// Save persists the run's current resumable progress (throttled).
	Save(ctx context.Context, st RunCheckpointState) error
	// Complete clears the checkpoint on success.
	Complete(ctx context.Context) error
	// Fail clears the checkpoint on terminal failure. A run interrupted by
	// shutdown is left untouched so boot recovery picks it up.
	Fail(ctx context.Context, err error) error
}

// RunCheckpointState is the resumable snapshot a Checkpointer persists.
type RunCheckpointState struct {
	// Messages is the running transcript of a SINGLE-LOOP run (grows each step;
	// resumed via WithHistory). nil for multi-phase runs — phase recovery is
	// boundary-granular (see CompletedPhases), not mid-phase transcript.
	Messages  []llm.Message
	Iteration int
	// CompletedPhases is set only for multi-phase runs: the outputs of phases
	// already finished, in phase order, so a resumed run skips them and re-runs
	// the interrupted phase from its start. nil for single-loop runs.
	CompletedPhases []PhaseOutput
}

// PhaseOutput is one completed pipeline phase's name and output text, recorded in
// a checkpoint so a resumed multi-phase run can skip already-finished phases.
type PhaseOutput struct {
	Name   string
	Output string
}

// --- PaletteSource ---

// PaletteSource resolves a RunnableAgent's SkillPalette / SubAgentPalette names
// into delegation tools and invokes them. Mirrors mort's
// SkillInvokerForPalette + AgentInvokerForPalette. nil Palette => palette
// entries are inert ("not configured" at first call).
type PaletteSource interface {
	ResolveSkill(ctx context.Context, callerID, name string) (skillID string, err error)
	InvokeSkill(ctx context.Context, callerID, channelID, name string,
		inputs map[string]any, parentRunID string) (output, runID, status string, err error)

	ResolveAgent(ctx context.Context, callerID, name string) (agentID string, err error)
	InvokeAgent(ctx context.Context, callerID, channelID, name string,
		prompt, parentRunID, modelTierOverride, promptPrepend string,
		toolsSubset []string,
		onEvent func(ctx context.Context, event, emoji string)) (output, runID, status string, err error)
}