Files
executus/skill/validate.go
T
steve c8559676ed
executus CI / test (push) Has been cancelled
P4b: skill noun + contrib/store (SQLite for budget/persona/skill/audit)
Merges the skill half of the persona/skill pair plus the second nested module.
(Squashed onto main from phase-4b-skill; the audit/budget/persona batteries it
was stacked on already landed via the P4 merge.)

- skill/: clean-redesign Skill noun + LEAN SkillStore (lifecycle/versions/
  schedule only) + ToRunnable + Memory default.
- contrib/store/: separate go.mod carrying modernc.org/sqlite, so the driver
  never enters the core go.sum. db.Budget()/Personas()/Skills()/Audit() back
  all four store seams (JSON-blob + indexed columns; round-trip tested).
  Includes the verified gadfly #5 fixes (AppendVersion tx+UNIQUE+error,
  Mark*ScheduledRun atomic json_set, busy_timeout, NaN guard).
- CI: builds + tests the nested module and asserts it owns the sqlite driver.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 00:15:00 -04:00

375 lines
13 KiB
Go

package skill
import (
"fmt"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/executus/model"
)
// ChannelFilterChecker is the subset of ChannelFilterRegistry used by
// Validate to check that a skill references a registered channel filter.
//
// Why: kept narrow so tests can pass a tiny stub; full registry is
// declared in channel_filters.go.
type ChannelFilterChecker interface {
Has(name string) bool
}
// ModelTierChecker reports whether the given model tier or
// "provider/model" spec is recognised. Validate uses this to reject
// typos at save time.
//
// Why: tiers come from llms.tier.* convars (fast/standard/thinking by
// default) but admins may add custom tiers; explicit "provider/model"
// is also valid. Validate accepts anything non-empty matching either
// pattern — finer correctness is the LLM call's job.
type ModelTierChecker interface {
IsValid(spec string) bool
}
// defaultModelTierChecker accepts all registered tier names (via
// model.IsTierName) plus any "provider/model" form (string contains "/").
// Tests can substitute a strict checker via ValidateOpts.ModelTierChecker.
type defaultModelTierChecker struct{}
func (defaultModelTierChecker) IsValid(spec string) bool {
if spec == "" {
return false
}
if model.IsTierName(spec) {
return true
}
// Accept tier-with-reasoning (e.g. "thinking:high")
if i := strings.IndexByte(spec, ':'); i > 0 {
if model.IsTierName(spec[:i]) {
return true
}
}
// Accept explicit "provider/model" or "provider/model:reasoning"
return strings.ContainsRune(spec, '/')
}
// ValidateOpts customises what Validate accepts. All fields are optional;
// nil checkers fall back to permissive defaults.
//
// Why: Validate is called from save paths (which know the registries) and
// from tests (which want to control acceptance). Bundling the deps here
// keeps the Skill API stable.
type ValidateOpts struct {
// Filters is consulted when the skill declares a chatbot channel
// filter. nil → channel-filter validity is not checked (use only in
// tests).
Filters ChannelFilterChecker
// ModelTier checks the ModelTier spec. nil → defaultModelTierChecker.
ModelTier ModelTierChecker
// MinIntervalMinutes is the floor on the smallest gap between
// consecutive fires of a skill's cron schedule. Zero → use the
// package default (defaultMinScheduleIntervalMinutes). Tests pass an
// explicit value to exercise the boundary.
MinIntervalMinutes int
// AuthorIsAdmin tells Validate the author has admin privileges and
// may save with extended-tier bounds without ExtendedBounds=true.
// SaveUserSkill passes this from s.admin.IsAdmin(sk.AuthoredBy).
// Builtin loader sets this true to bypass the per-skill flag check
// (builtins are trusted infrastructure).
AuthorIsAdmin bool
// DefaultMaxIterations / DefaultMaxToolCalls / DefaultMaxRuntimeSecs
// override the package-default tier-1 caps. Zero → fall back to the
// constants below. Production wiring populates these from convars
// (skills.default_max_iterations etc.) so admins can adjust the
// default tier without a redeploy.
DefaultMaxIterations int
DefaultMaxToolCalls int
DefaultMaxRuntimeSecs int
// ExtendedMaxIterations / ExtendedMaxToolCalls / ExtendedMaxRuntimeSecs
// override the package-default tier-2 caps (the ceilings allowed when
// ExtendedBounds=true OR AuthorIsAdmin=true). Zero → fall back to the
// constants below.
ExtendedMaxIterations int
ExtendedMaxToolCalls int
ExtendedMaxRuntimeSecs int
}
// Tiered cap defaults. The DEFAULT tier is what a non-admin author sees
// without an explicit grant; the EXTENDED tier is what admin authors and
// admin-granted skills may use. Values are tuned in the v3 spec
// "Governance: tiered resource caps" section.
//
// The package's existing absolute ceilings (maxIterationsLimit=50 and
// maxRuntime=10m) act as outer floors / sanity bounds; the tier caps
// are the active gate at save time. Extended caps respect the absolute
// ceilings naturally (50 iter, 600s = 10min runtime).
const (
// Default tier — non-admin authors of skills without ExtendedBounds.
DefaultMaxIterations = 12
DefaultMaxToolCalls = 30
DefaultMaxRuntimeSecs = 60
// Extended tier — admin authors OR ExtendedBounds=true.
ExtendedMaxIterations = 50
ExtendedMaxToolCalls = 150
ExtendedMaxRuntimeSecs = 600 // 10m
maxIterationsLimit = 50
minRuntime = time.Second
maxRuntime = 10 * time.Minute
defaultMinScheduleIntervalMinutes = 30
// MaxTagsPerSkill caps the number of organisation tags any single
// skill may carry. Generous compared to typical taxonomies (GitHub
// allows ~10 topics/repo). The cap exists to prevent the list
// page's chip rendering from becoming unmanageable.
MaxTagsPerSkill = 16
// MaxTagLength is the per-tag character ceiling. Long enough for
// hyphenated phrases ("retro-gaming") but short enough that the
// list-page tag dropdown stays readable.
MaxTagLength = 32
)
// Validate enforces the skill spec invariants documented in the design
// spec ("Skill domain model" section). It is called at save time; the
// builtin loader skips authoring/share-safety checks but still runs
// Validate, so all callers can rely on a saved skill being well-formed.
//
// Why: spec rules are easy to violate by hand and silently break
// downstream (e.g. an unknown channel filter never exposes the skill to
// the chatbot). Every rule fails loudly here.
//
// What: returns the first error found; callers may surface it directly to
// users. opts may be the zero value, in which case channel-filter
// validation is skipped (tests).
//
// Test: each rejection branch has a dedicated unit test in
// validate_test.go.
func (s *Skill) Validate(opts ValidateOpts) error {
if s == nil {
return fmt.Errorf("skill is nil")
}
if strings.TrimSpace(s.Name) == "" {
return fmt.Errorf("skill name is required")
}
if strings.TrimSpace(s.SystemPrompt) == "" {
return fmt.Errorf("skill system prompt is required")
}
// ModelTier
tierCheck := opts.ModelTier
if tierCheck == nil {
tierCheck = defaultModelTierChecker{}
}
if !tierCheck.IsValid(s.ModelTier) {
return fmt.Errorf("unknown model tier %q (expected a tier alias or provider/model)", s.ModelTier)
}
// Schedule — empty means on-demand only. A non-empty value must be
// a valid cron expression (or one of the "daily" / "weekly"
// shorthands) AND have a smallest fire-gap >= the configured
// min-interval floor. Both checks share the package-level
// ParseSchedule helper so the scheduler runner uses the same parser.
if expr := strings.TrimSpace(s.Schedule); expr != "" {
sched, err := ParseSchedule(expr)
if err != nil {
return fmt.Errorf("schedule: %w", err)
}
minMinutes := opts.MinIntervalMinutes
if minMinutes == 0 {
minMinutes = defaultMinScheduleIntervalMinutes
}
floor := time.Duration(minMinutes) * time.Minute
if interval := ScheduleMinInterval(sched); interval < floor {
return fmt.Errorf(
"schedule %q runs more often than the minimum (every %s, floor is %s)",
expr, interval.Round(time.Second), floor)
}
}
// Iteration / call / runtime budgets. Zero is allowed — the executor
// substitutes a convar-backed default. Negative is always wrong.
// The absolute ceilings (maxIterationsLimit=50, maxRuntime=10m) are
// outer sanity bounds; the tier caps below are the active gate.
//
// Why admin bypass on the outer ceilings: builtins are trusted
// infrastructure (per the v2 "Builtin loader must bypass save-time
// gates" lesson). The builtin loader passes AuthorIsAdmin=true so
// trusted skills like `deepresearch` (max_iterations=100,
// max_runtime=45m) and `research` (max_runtime=15m) can validate
// without re-tuning the package-wide outer floor for everyone.
// Non-admin authors still hit the original ceilings AND the
// tier-based cap (default 12 iter / 60s runtime, extended 50 iter /
// 600s runtime) — both layers stay intact for the untrusted path.
if s.MaxIterations < 0 {
return fmt.Errorf("max_iterations must be >= 0, got %d", s.MaxIterations)
}
if !opts.AuthorIsAdmin && s.MaxIterations > maxIterationsLimit {
return fmt.Errorf("max_iterations must be 0..%d, got %d", maxIterationsLimit, s.MaxIterations)
}
if s.MaxToolCalls < 0 {
return fmt.Errorf("max_tool_calls must be >= 0, got %d", s.MaxToolCalls)
}
if s.MaxRuntime < 0 {
return fmt.Errorf("max_runtime must be 0 or positive, got %s", s.MaxRuntime)
}
if s.MaxRuntime > 0 && s.MaxRuntime < minRuntime {
return fmt.Errorf("max_runtime must be 0 or >= %s, got %s", minRuntime, s.MaxRuntime)
}
if !opts.AuthorIsAdmin && s.MaxRuntime > maxRuntime {
return fmt.Errorf("max_runtime must be 0 or in [%s..%s], got %s", minRuntime, maxRuntime, s.MaxRuntime)
}
// Tiered cap resolution: a skill saved by an admin OR a skill with
// ExtendedBounds=true (admin-granted) may use the extended tier;
// everything else saturates at the default tier. Builtins go through
// the loader's bypass path (AuthorIsAdmin=true).
defIter := opts.DefaultMaxIterations
if defIter == 0 {
defIter = DefaultMaxIterations
}
defCalls := opts.DefaultMaxToolCalls
if defCalls == 0 {
defCalls = DefaultMaxToolCalls
}
defRuntime := opts.DefaultMaxRuntimeSecs
if defRuntime == 0 {
defRuntime = DefaultMaxRuntimeSecs
}
extIter := opts.ExtendedMaxIterations
if extIter == 0 {
extIter = ExtendedMaxIterations
}
extCalls := opts.ExtendedMaxToolCalls
if extCalls == 0 {
extCalls = ExtendedMaxToolCalls
}
extRuntime := opts.ExtendedMaxRuntimeSecs
if extRuntime == 0 {
extRuntime = ExtendedMaxRuntimeSecs
}
maxIter := defIter
maxCalls := defCalls
maxRuntimeSecs := defRuntime
tier := "default"
hint := "; ask an admin to grant extended_bounds for higher"
if s.ExtendedBounds || opts.AuthorIsAdmin {
maxIter = extIter
maxCalls = extCalls
maxRuntimeSecs = extRuntime
tier = "extended"
hint = "" // already at the highest tier — no upgrade path
}
// Admin bypass on the tier cap: trusted infrastructure (builtins,
// admin-authored skills) may exceed the extended tier. The
// non-admin author still hits the tier cap above. See the
// "trusted infrastructure" rationale on the outer-ceiling block.
if !opts.AuthorIsAdmin {
if s.MaxIterations > maxIter {
return fmt.Errorf("max_iterations %d exceeds %s cap (%d)%s",
s.MaxIterations, tier, maxIter, hint)
}
if s.MaxToolCalls > maxCalls {
return fmt.Errorf("max_tool_calls %d exceeds %s cap (%d)%s",
s.MaxToolCalls, tier, maxCalls, hint)
}
if s.MaxRuntime > 0 && s.MaxRuntime > time.Duration(maxRuntimeSecs)*time.Second {
return fmt.Errorf("max_runtime %s exceeds %s cap (%ds)%s",
s.MaxRuntime, tier, maxRuntimeSecs, hint)
}
}
// Output target
if !IsKnownOutputKind(s.OutputTarget.Kind) {
return fmt.Errorf("unknown output_target.kind %q", s.OutputTarget.Kind)
}
// Input schema
seenInput := map[string]struct{}{}
for i, p := range s.InputSchema {
if strings.TrimSpace(p.Name) == "" {
return fmt.Errorf("input_schema[%d]: Name is required", i)
}
if !IsKnownInputType(p.Type) {
return fmt.Errorf("input_schema[%d] (%q): unknown type %q", i, p.Name, p.Type)
}
if _, dup := seenInput[p.Name]; dup {
return fmt.Errorf("input_schema: duplicate parameter name %q", p.Name)
}
seenInput[p.Name] = struct{}{}
}
// Tools
seenTool := map[string]struct{}{}
for _, t := range s.Tools {
if strings.TrimSpace(t) == "" {
return fmt.Errorf("tools: empty tool name")
}
if _, dup := seenTool[t]; dup {
return fmt.Errorf("tools: duplicate tool name %q", t)
}
seenTool[t] = struct{}{}
}
// Tags — normalise + bounds-check. The caller may pass user input
// directly; we trim, lowercase, dedup, and bound count + per-tag
// length. Mutating the slice in place is intentional so callers
// don't need a separate normalise pass.
//
// Why caps (16 tags / 32 chars): both are generous for human-
// curated organisation labels (compare to GitHub's 10 topics/repo
// + ~50 chars). The aim is rejecting accidental data dumps and
// keeping the list-page chip rendering manageable, not strict
// taxonomy enforcement.
if len(s.Tags) > MaxTagsPerSkill {
return fmt.Errorf("tags: too many (max %d, got %d)", MaxTagsPerSkill, len(s.Tags))
}
if len(s.Tags) > 0 {
seenTag := map[string]struct{}{}
out := make([]string, 0, len(s.Tags))
for _, raw := range s.Tags {
t := strings.ToLower(strings.TrimSpace(raw))
if t == "" {
continue
}
if len(t) > MaxTagLength {
return fmt.Errorf("tags: %q exceeds %d chars", t, MaxTagLength)
}
if _, dup := seenTag[t]; dup {
continue
}
seenTag[t] = struct{}{}
out = append(out, t)
}
s.Tags = out
}
// Visibility
if !IsKnownVisibility(s.Visibility) {
return fmt.Errorf("unknown visibility %q", s.Visibility)
}
if s.Visibility == VisibilityShared && len(s.SharedWith) == 0 {
return fmt.Errorf("visibility=shared requires non-empty shared_with")
}
// Chatbot exposure
if s.ExposeAsChatbotTool {
if strings.TrimSpace(s.ChatbotToolName) == "" {
return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_tool_name")
}
if strings.TrimSpace(s.ChatbotToolDescription) == "" {
return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_tool_description")
}
if strings.TrimSpace(s.ChatbotChannelFilter) == "" {
return fmt.Errorf("expose_as_chatbot_tool=true requires chatbot_channel_filter")
}
if opts.Filters != nil && !opts.Filters.Has(s.ChatbotChannelFilter) {
return fmt.Errorf("unknown chatbot_channel_filter %q (not registered)", s.ChatbotChannelFilter)
}
}
return nil
}