executus/skill/validate.go

package skill

import (
	"fmt"
	"strings"
	"time"

	"gitea.stevedudenhoeffer.com/steve/executus/model"
)

// ChannelFilterChecker is the subset of ChannelFilterRegistry used by
// Validate to check that a skill references a registered channel filter.
//
// Why: kept narrow so tests can pass a tiny stub; full registry is
// declared in channel_filters.go.
type ChannelFilterChecker interface {
	Has(name string) bool
}

// ModelTierChecker reports whether the given model tier or
// "provider/model" spec is recognised. Validate uses this to reject
// typos at save time.
//
// Why: tiers come from llms.tier.* convars (fast/standard/thinking by
// default) but admins may add custom tiers; explicit "provider/model"
// is also valid. Validate accepts anything non-empty matching either
// pattern — finer correctness is the LLM call's job.
type ModelTierChecker interface {
	IsValid(spec string) bool
}

// defaultModelTierChecker accepts all registered tier names (via
// model.IsTierName) plus any "provider/model" form (string contains "/").
// Tests can substitute a strict checker via ValidateOpts.ModelTierChecker.
type defaultModelTierChecker struct{}

func (defaultModelTierChecker) IsValid(spec string) bool {
	if spec == "" {
		return false
	}
	if model.IsTierName(spec) {
		return true
	}
	// Accept tier-with-reasoning (e.g. "thinking:high")
	if i := strings.IndexByte(spec, ':'); i > 0 {
		if model.IsTierName(spec[:i]) {
			return true
		}
	}
	// Accept explicit "provider/model" or "provider/model:reasoning"
	return strings.ContainsRune(spec, '/')
}

// ValidateOpts customises what Validate accepts. All fields are optional;
// nil checkers fall back to permissive defaults.
//
// Why: Validate is called from save paths (which know the registries) and
// from tests (which want to control acceptance). Bundling the deps here
// keeps the Skill API stable.
type ValidateOpts struct {
	// Filters is consulted when the skill declares a chatbot channel
	// filter. nil → channel-filter validity is not checked (use only in
	// tests).
	Filters ChannelFilterChecker
	// ModelTier checks the ModelTier spec. nil → defaultModelTierChecker.
	ModelTier ModelTierChecker
	// MinIntervalMinutes is the floor on the smallest gap between
	// consecutive fires of a skill's cron schedule. Zero → use the
	// package default (defaultMinScheduleIntervalMinutes). Tests pass an
	// explicit value to exercise the boundary.
	MinIntervalMinutes int

	// AuthorIsAdmin tells Validate the author has admin privileges and
	// may save with extended-tier bounds without ExtendedBounds=true.
	// SaveUserSkill passes this from s.admin.IsAdmin(sk.AuthoredBy).
	// Builtin loader sets this true to bypass the per-skill flag check
	// (builtins are trusted infrastructure).
	AuthorIsAdmin bool

	// DefaultMaxIterations / DefaultMaxToolCalls / DefaultMaxRuntimeSecs
	// override the package-default tier-1 caps. Zero → fall back to the
	// constants below. Production wiring populates these from convars
	// (skills.default_max_iterations etc.) so admins can adjust the
	// default tier without a redeploy.
	DefaultMaxIterations  int
	DefaultMaxToolCalls   int
	DefaultMaxRuntimeSecs int

	// ExtendedMaxIterations / ExtendedMaxToolCalls / ExtendedMaxRuntimeSecs
	// override the package-default tier-2 caps (the ceilings allowed when
	// ExtendedBounds=true OR AuthorIsAdmin=true). Zero → fall back to the
	// constants below.
	ExtendedMaxIterations  int
	ExtendedMaxToolCalls   int
	ExtendedMaxRuntimeSecs int
}

// Tiered cap defaults. The DEFAULT tier is what a non-admin author sees
// without an explicit grant; the EXTENDED tier is what admin authors and
// admin-granted skills may use. Values are tuned in the v3 spec
// "Governance: tiered resource caps" section.
//
// The package's existing absolute ceilings (maxIterationsLimit=50 and
// maxRuntime=10m) act as outer floors / sanity bounds; the tier caps
// are the active gate at save time. Extended caps respect the absolute
// ceilings naturally (50 iter, 600s = 10min runtime).
const (
	// Default tier — non-admin authors of skills without ExtendedBounds.
	DefaultMaxIterations  = 12
	DefaultMaxToolCalls   = 30
	DefaultMaxRuntimeSecs = 60

	// Extended tier — admin authors OR ExtendedBounds=true.
	ExtendedMaxIterations  = 50
	ExtendedMaxToolCalls   = 150
	ExtendedMaxRuntimeSecs = 600 // 10m

	maxIterationsLimit                = 50
	minRuntime                        = time.Second
	maxRuntime                        = 10 * time.Minute
	defaultMinScheduleIntervalMinutes = 30

	// MaxTagsPerSkill caps the number of organisation tags any single
	// skill may carry. Generous compared to typical taxonomies (GitHub
	// allows ~10 topics/repo). The cap exists to prevent the list
	// page's chip rendering from becoming unmanageable.
	MaxTagsPerSkill = 16

	// MaxTagLength is the per-tag character ceiling. Long enough for
	// hyphenated phrases ("retro-gaming") but short enough that the
	// list-page tag dropdown stays readable.
	MaxTagLength = 32
)

// Validate enforces the skill spec invariants documented in the design
// spec ("Skill domain model" section). It is called at save time; the
// builtin loader skips authoring/share-safety checks but still runs
// Validate, so all callers can rely on a saved skill being well-formed.
//
// Why: spec rules are easy to violate by hand and silently break
// downstream (e.g. an unknown channel filter never exposes the skill to
// the chatbot). Every rule fails loudly here.
//
// What: returns the first error found; callers may surface it directly to
// users. opts may be the zero value, in which case channel-filter
// validation is skipped (tests).
//
// Test: each rejection branch has a dedicated unit test in
// validate_test.go.
func (s *Skill) Validate(opts ValidateOpts) error {
	if s == nil {
		return fmt.Errorf("skill is nil")
	}
	if strings.TrimSpace(s.Name) == "" {
		return fmt.Errorf("skill name is required")
	}
	if strings.TrimSpace(s.SystemPrompt) == "" {
		return fmt.Errorf("skill system prompt is required")
	}

	// ModelTier
	tierCheck := opts.ModelTier
	if tierCheck == nil {
		tierCheck = defaultModelTierChecker{}
	}
	if !tierCheck.IsValid(s.ModelTier) {
		return fmt.Errorf("unknown model tier %q (expected a tier alias or provider/model)", s.ModelTier)
	}

	// Schedule — empty means on-demand only. A non-empty value must be
	// a valid cron expression (or one of the "daily" / "weekly"
	// shorthands) AND have a smallest fire-gap >= the configured
	// min-interval floor. Both checks share the package-level
	// ParseSchedule helper so the scheduler runner uses the same parser.
	if expr := strings.TrimSpace(s.Schedule); expr != "" {
		sched, err := ParseSchedule(expr)
		if err != nil {
			return fmt.Errorf("schedule: %w", err)
		}
		minMinutes := opts.MinIntervalMinutes
		if minMinutes == 0 {
			minMinutes = defaultMinScheduleIntervalMinutes
		}
		floor := time.Duration(minMinutes) * time.Minute
		if interval := ScheduleMinInterval(sched); interval < floor {
			return fmt.Errorf(
				"schedule %q runs more often than the minimum (every %s, floor is %s)",
				expr, interval.Round(time.Second), floor)
		}
	}

	// Iteration / call / runtime budgets. Zero is allowed — the executor
	// substitutes a convar-backed default. Negative is always wrong.
	// The absolute ceilings (maxIterationsLimit=50, maxRuntime=10m) are
	// outer sanity bounds; the tier caps below are the active gate.
	//
	// Why admin bypass on the outer ceilings: builtins are trusted
	// infrastructure (per the v2 "Builtin loader must bypass save-time
	// gates" lesson). The builtin loader passes AuthorIsAdmin=true so
	// trusted skills like `deepresearch` (max_iterations=100,
	// max_runtime=45m) and `research` (max_runtime=15m) can validate
	// without re-tuning the package-wide outer floor for everyone.
	// Non-admin authors still hit the original ceilings AND the
	// tier-based cap (default 12 iter / 60s runtime, extended 50 iter /
	// 600s runtime) — both layers stay intact for the untrusted path.
	if s.MaxIterations < 0 {
		return fmt.Errorf("max_iterations must be >= 0, got %d", s.MaxIterations)
	}
	if !opts.AuthorIsAdmin && s.MaxIterations > maxIterationsLimit {
		return fmt.Errorf("max_iterations must be 0..%d, got %d", maxIterationsLimit, s.MaxIterations)
	}
	if s.MaxToolCalls < 0 {
		return fmt.Errorf("max_tool_calls must be >= 0, got %d", s.MaxToolCalls)
	}
	if s.MaxRuntime < 0 {
		return fmt.Errorf("max_runtime must be 0 or positive, got %s", s.MaxRuntime)
	}
	if s.MaxRuntime > 0 && s.MaxRuntime < minRuntime {
		return fmt.Errorf("max_runtime must be 0 or >= %s, got %s", minRuntime, s.MaxRuntime)
	}
	if !opts.AuthorIsAdmin && s.MaxRuntime > maxRuntime {
		return fmt.Errorf("max_runtime must be 0 or in [%s..%s], got %s", minRuntime, maxRuntime, s.MaxRuntime)
	}

	// Tiered cap resolution: a skill saved by an admin OR a skill with
	// ExtendedBounds=true (admin-granted) may use the extended tier;
	// everything else saturates at the default tier. Builtins go through
	// the loader's bypass path (AuthorIsAdmin=true).
	defIter := opts.DefaultMaxIterations
	if defIter == 0 {
		defIter = DefaultMaxIterations
	}
	defCalls := opts.DefaultMaxToolCalls
	if defCalls == 0 {
		defCalls = DefaultMaxToolCalls
	}
	defRuntime := opts.DefaultMaxRuntimeSecs
	if defRuntime == 0 {
		defRuntime = DefaultMaxRuntimeSecs
	}
	extIter := opts.ExtendedMaxIterations
	if extIter == 0 {
		extIter = ExtendedMaxIterations
	}
	extCalls := opts.ExtendedMaxToolCalls
	if extCalls == 0 {
		extCalls = ExtendedMaxToolCalls
	}
	extRuntime := opts.ExtendedMaxRuntimeSecs
	if extRuntime == 0 {
		extRuntime = ExtendedMaxRuntimeSecs
	}
	maxIter := defIter
	maxCalls := defCalls
	maxRuntimeSecs := defRuntime
	tier := "default"
	hint := "; ask an admin to grant extended_bounds for higher"
	if s.ExtendedBounds || opts.AuthorIsAdmin {
		maxIter = extIter
		maxCalls = extCalls
		maxRuntimeSecs = extRuntime
		tier = "extended"
		hint = "" // already at the highest tier — no upgrade path
	}
	// Admin bypass on the tier cap: trusted infrastructure (builtins,
	// admin-authored skills) may exceed the extended tier. The
	// non-admin author still hits the tier cap above. See the
	// "trusted infrastructure" rationale on the outer-ceiling block.
	if !opts.AuthorIsAdmin {
		if s.MaxIterations > maxIter {
			return fmt.Errorf("max_iterations %d exceeds %s cap (%d)%s",
				s.MaxIterations, tier, maxIter, hint)
		}
		if s.MaxToolCalls > maxCalls {
			return fmt.Errorf("max_tool_calls %d exceeds %s cap (%d)%s",
				s.MaxToolCalls, tier, maxCalls, hint)
		}
		if s.MaxRuntime > 0 && s.MaxRuntime > time.Duration(maxRuntimeSecs)*time.Second {
			return fmt.Errorf("max_runtime %s exceeds %s cap (%ds)%s",
				s.MaxRuntime, tier, maxRuntimeSecs, hint)
		}
	}

	// Output target
	if !IsKnownOutputKind(s.OutputTarget.Kind) {
		return fmt.Errorf("unknown output_target.kind %q", s.OutputTarget.Kind)
	}

	// Input schema
	seenInput := map[string]struct{}{}
	for i, p := range s.InputSchema {
		if strings.TrimSpace(p.Name) == "" {
			return fmt.Errorf("input_schema[%d]: Name is required", i)
		}
		if !IsKnownInputType(p.Type) {
			return fmt.Errorf("input_schema[%d] (%q): unknown type %q", i, p.Name, p.Type)
		}
		if _, dup := seenInput[p.Name]; dup {
			return fmt.Errorf("input_schema: duplicate parameter name %q", p.Name)
		}
		seenInput[p.Name] = struct{}{}
	}

	// Tools
	seenTool := map[string]struct{}{}
	for _, t := range s.Tools {
		if strings.TrimSpace(t) == "" {
			return fmt.Errorf("tools: empty tool name")
		}
		if _, dup := seenTool[t]; dup {
			return fmt.Errorf("tools: duplicate tool name %q", t)
		}
		seenTool[t] = struct{}{}
	}

	// Tags — normalise + bounds-check. The caller may pass user input
	// directly; we trim, lowercase, dedup, and bound count + per-tag
	// length. Mutating the slice in place is intentional so callers
	// don't need a separate normalise pass.
	//
	// Why caps (16 tags / 32 chars): both are generous for human-
	// curated organisation labels (compare to GitHub's 10 topics/repo
	// + ~50 chars). The aim is rejecting accidental data dumps and
	// keeping the list-page chip rendering manageable, not strict
	// taxonomy enforcement.
	if len(s.Tags) > MaxTagsPerSkill {
		return fmt.Errorf("tags: too many (max %d, got %d)", MaxTagsPerSkill, len(s.Tags))
	}
	if len(s.Tags) > 0 {
		seenTag := map[string]struct{}{}
		out := make([]string, 0, len(s.Tags))
		for _, raw := range s.Tags {
			t := strings.ToLower(strings.TrimSpace(raw))
			if t == "" {
				continue
			}
			if len(t) > MaxTagLength {
				return fmt.Errorf("tags: %q exceeds %d chars", t, MaxTagLength)
			}
			if _, dup := seenTag[t]; dup {
				continue
			}
			seenTag[t] = struct{}{}
			out = append(out, t)
		}
		s.Tags = out
	}

	// Visibility
	if !IsKnownVisibility(s.Visibility) {
		return fmt.Errorf("unknown visibility %q", s.Visibility)
	}
	if s.Visibility == VisibilityShared && len(s.SharedWith) == 0 {
		return fmt.Errorf("visibility=shared requires non-empty shared_with")
	}

	// Chatbot exposure
	if s.ExposeAsChatbotTool {
		if strings.TrimSpace(s.ChatbotToolName) == "" {
			return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_tool_name")
		}
		if strings.TrimSpace(s.ChatbotToolDescription) == "" {
			return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_tool_description")
		}
		if strings.TrimSpace(s.ChatbotChannelFilter) == "" {
			return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_channel_filter")
		}
		if opts.Filters != nil && !opts.Filters.Has(s.ChatbotChannelFilter) {
			return fmt.Errorf("unknown chatbot_channel_filter %q (not registered)", s.ChatbotChannelFilter)
		}
	}

	return nil
}