fix: address verified gadfly P3 review (3-cloud fleet)

All 3 cloud models converged on a real access-control bug; fixed it + the other genuine findings (the false-positives were dropped): Security (HIGH — all 3 models): - create_file_url skipped ValidateScope: a same-skill caller could mint a PUBLIC url for a file scoped to another user/run. Now runs ValidateScope (admin-aware), skipped only for the descendant-grant case — mirroring the read tools. Other real fixes: - ValidateScope hard-coded `false` at every call site (admin branch dead) -> pass inv.CallerIsAdmin (the executor sets it via the host AdminPolicy; still false/fail-closed when no admin). Stale "no admin flag" comment corrected. - create_file_url: ExpiresInSeconds clamped BEFORE the *time.Second multiply (huge values overflowed to a negative duration that slipped under the cap, minting already-expired tokens); swallowed json.Marshal error now returned. - RegisterMeta: build the default budget WITH the configured MaxPerRun (was NewInMemorySearchBudget(nil) -> hardcoded 10, ignoring MetaDeps.MaxPerRun). - classify: all-zero scores no longer return a false-positive top-1 winner; coerceClassifyScore uses strconv.ParseFloat (rejects trailing garbage like "50extra" that fmt.Sscanf silently accepted). - file_delete: honor the descendant grant (parent can clean up a worker's artifacts) — was the lone cross-skill-reject-outright file tool. - meta tools: input caps truncate at a UTF-8 rune boundary (truncateUTF8), not mid-rune. - think: removed the dead `var _ = fmt.Errorf` import-keeper; file_save default aligned to 16 MiB (matched RegisterStore). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 22:31:59 -04:00
parent 78e6858751
commit d0bd3ec3d9
19 changed files with 100 additions and 34 deletions
@@ -25,6 +25,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strconv"
 	"strings"

 	"gitea.stevedudenhoeffer.com/steve/executus/llmmeta"
@@ -111,7 +112,7 @@ func NewClassify(helper *llmmeta.Helper, cfg ClassifyConfig, budget SearchBudget
 			}

 			if len(text) > classifyMaxInputBytes {
-				text = text[:classifyMaxInputBytes]
+				text = truncateUTF8(text, classifyMaxInputBytes)
 			}

 			// Per-run budget gate.
@@ -255,10 +256,14 @@ func coerceClassifyScore(raw any) (float64, bool) {
 	case int64:
 		return float64(v), true
 	case string:
-		s := strings.TrimSuffix(strings.TrimSpace(v), "%")
-		var f float64
-		if _, err := fmt.Sscanf(s, "%f", &f); err == nil {
-			if strings.HasSuffix(strings.TrimSpace(v), "%") {
+		trimmed := strings.TrimSpace(v)
+		hasPct := strings.HasSuffix(trimmed, "%")
+		s := strings.TrimSuffix(trimmed, "%")
+		// strconv.ParseFloat (unlike fmt.Sscanf %f) rejects trailing garbage,
+		// so "50extra" / "0.5x" are refused instead of silently parsed as 50/0.5.
+		f, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
+		if err == nil {
+			if hasPct {
 				f = f / 100.0
 			}
 			return f, true
@@ -292,7 +297,10 @@ func selectClassifyLabels(scores map[string]float64, categories []string, multiL
 			bestCat = c
 		}
 	}
-	if bestCat == "" {
+	// No category fit: an all-zero score set must not yield a false-positive
+	// top-1 (the first category trivially beats the -1.0 sentinel). Returning
+	// no label keeps "nothing matched" distinguishable from "category A won".
+	if bestCat == "" || bestScore <= 0 {
 		return nil
 	}
 	return []string{bestCat}