P3: meta + primitive tool group (think/now/cite + classify/extract/summarize)

Grow executus/tools into a real generic tool library: - Register(reg): the always-available, zero-config tools — think, now (UTC unless a CurrentTimeProvider is wired), cite (inert unless a CitationStorage is wired). All nil-safe; a light host calls Register and is useful. - RegisterMeta(reg, MetaDeps): the LLM-backed meta tools — classify, extract_entities, summarize — over the llmmeta helper. Budget defaults to the shipped in-memory per-run cap; Files optional; caps default. - Seams moved (interface/type-only, no host coupling): research_providers.go (CurrentTimeProvider/CitationStorage/SearchBudget/PageExtractor/PDFFetcher/…) and file_storage.go (FileStorage + FileDomainMeta). Plus the in-memory budget default (research_defaults.go) and scope_validate.go. calculate deferred (drags github.com/Krognol/go-wolfram + a module-path replace — not worth it in the lean core for one tool). Core go.sum still free of gorm/redis/discordgo/sqlite/wolfram. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 21:00:45 -04:00
parent df95425bb5
commit 1e201550b3
11 changed files with 1802 additions and 17 deletions
@@ -0,0 +1,342 @@
+// Package tools — v12 extract_entities.
+//
+// Structured-output workhorse: text + field schema → typed JSON
+// object. The author specifies which fields they want and what
+// types; the tool builds an appropriate prompt, asks for JSON, and
+// validates + coerces the response back into the requested types.
+//
+// Why a structured-output tool (vs forcing the agent to write its
+// own prompt): every agentic skill that needs to "pull X, Y, Z out
+// of unstructured text" otherwise re-invents the same prompt-
+// engineering pattern. extract_entities centralises it so authors
+// just describe the schema.
+//
+// Type coercion: an LLM responding with "42" when an int field was
+// requested is normal noise. The tool coerces strings to
+// int/float/bool when possible; coercion failures land the field in
+// missing_fields rather than the entities map.
+//
+// Test: extract_entities_test.go covers happy path, missing optional
+// field, missing required field surfaces in missing_fields, malformed
+// JSON retry, second-attempt failure, type coercion (string→int,
+// string→bool), unknown field type rejected at args validation.
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/executus/llmmeta"
+	"gitea.stevedudenhoeffer.com/steve/executus/tool"
+)
+
+// extractEntitiesMaxInputBytes is the hard input cap.
+const extractEntitiesMaxInputBytes = 32 * 1024
+
+// extractEntitiesFallbackMaxPerRun is the per-run cap when
+// ExtractEntitiesConfig is nil.
+const extractEntitiesFallbackMaxPerRun = 10
+
+// ExtractEntitiesConfig is the narrow per-deployment config surface
+// extract_entities reads at execute time.
+type ExtractEntitiesConfig interface {
+	MaxPerRun(ctx context.Context) int
+}
+
+// extractField is one row in the schema the agent supplies. The four
+// supported types match the JSON-shape primitives we can validate +
+// coerce reliably.
+//
+// Why an enum-shaped Type field (vs free-form): we need to know how
+// to validate the LLM's reply. Free-form ("integer", "Number",
+// "boolean") would invite typos that silently miss the validation.
+type extractField struct {
+	Name        string `json:"name" description:"Field name to populate (e.g. 'author', 'year_published'). Becomes a key in the returned entities object."`
+	Description string `json:"description" description:"Short description of what to extract (e.g. 'the book author', 'the year the article was published'). Helps the model find the right value."`
+	Type        string `json:"type" description:"One of: 'string', 'int', 'float', 'bool', 'list_of_strings'. Determines how the LLM's reply is validated and coerced."`
+	Required    bool   `json:"required,omitempty" description:"When true, a missing/uncoercible value lands in missing_fields rather than skipping silently."`
+}
+
+// extractEntitiesArgs is the LLM-facing param struct.
+type extractEntitiesArgs struct {
+	Text   string         `json:"text" description:"The text to extract from. Required. Capped at 32KB."`
+	Fields []extractField `json:"fields" description:"Schema describing what to extract. Each field has name, description, type, and optional required flag."`
+}
+
+type extractEntitiesResult struct {
+	Entities      map[string]any `json:"entities,omitempty"`
+	MissingFields []string       `json:"missing_fields,omitempty"`
+	ModelUsed     string         `json:"model_used,omitempty"`
+	RawReply      string         `json:"raw_reply,omitempty"`
+	Error         string         `json:"error,omitempty"`
+	BudgetMsg     string         `json:"budget_message,omitempty"`
+}
+
+// validExtractTypes is the closed set of Type strings the tool
+// accepts. Anything else is rejected at args validation.
+var validExtractTypes = map[string]bool{
+	"string":          true,
+	"int":             true,
+	"float":           true,
+	"bool":            true,
+	"list_of_strings": true,
+}
+
+// NewExtractEntities constructs the extract_entities tool.
+func NewExtractEntities(helper *llmmeta.Helper, cfg ExtractEntitiesConfig, budget SearchBudget) tool.Tool {
+	return tool.NewGatedTool[extractEntitiesArgs](
+		"extract_entities",
+		"Extract structured fields from unstructured text via a fast LLM. Caller supplies a schema (each field has name + description + type + required); tool returns an entities object with values matching the requested types. Types: string, int, float, bool, list_of_strings. Counts against per-run and 7-day cost budgets.",
+		tool.Permission{
+			AuthoringRequirement: tool.RequirementAnyone,
+			OperatesOn:           tool.ScopeCaller,
+			SafeForShare:         true,
+			Categories:           []string{"llm-meta", "cost-bearing"},
+		},
+		func(ctx context.Context, inv tool.Invocation, args extractEntitiesArgs) (string, error) {
+			if helper == nil {
+				return "", fmt.Errorf("extract_entities: not configured")
+			}
+			text := args.Text
+			if strings.TrimSpace(text) == "" {
+				return marshalExtractEntities(extractEntitiesResult{Error: "text is empty"}), nil
+			}
+			if len(args.Fields) == 0 {
+				return marshalExtractEntities(extractEntitiesResult{Error: "fields is empty"}), nil
+			}
+			// Validate each field's Type before paying for an LLM
+			// call.
+			for _, f := range args.Fields {
+				if strings.TrimSpace(f.Name) == "" {
+					return marshalExtractEntities(extractEntitiesResult{Error: "field with empty name"}), nil
+				}
+				if !validExtractTypes[strings.ToLower(strings.TrimSpace(f.Type))] {
+					return marshalExtractEntities(extractEntitiesResult{
+						Error: fmt.Sprintf("field %q has unsupported type %q (allowed: string|int|float|bool|list_of_strings)", f.Name, f.Type),
+					}), nil
+				}
+			}
+
+			if len(text) > extractEntitiesMaxInputBytes {
+				text = text[:extractEntitiesMaxInputBytes]
+			}
+
+			// Per-run budget gate.
+			if budget == nil {
+				maxPerRun := extractEntitiesFallbackMaxPerRun
+				if cfg != nil {
+					maxPerRun = cfg.MaxPerRun(ctx)
+				}
+				budget = NewInMemorySearchBudget(map[string]int{
+					"extract_entities": maxPerRun,
+				})
+			}
+			count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "extract_entities")
+			if exceeded {
+				return marshalExtractEntities(extractEntitiesResult{
+					Error:     "extract_entities_budget_exceeded",
+					BudgetMsg: fmt.Sprintf("per-run extract_entities budget exceeded (%d/%d). Ask an admin to raise skills.extract_entities.max_per_run.", count, max),
+				}), nil
+			}
+
+			systemPrompt := "You extract structured data from unstructured text. Return ONLY valid JSON with the requested keys. If a value is not present in the text, omit the key. Do NOT invent values."
+			userPrompt := buildExtractPrompt(text, args.Fields)
+
+			res, callErr := helper.Call(ctx, llmmeta.CallSpec{
+				Tier:                 "fast",
+				SystemPrompt:         systemPrompt,
+				UserPrompt:           userPrompt,
+				MaxOutputTokens:      4096,
+				ResponseFormat:       "json",
+				RetryOnMalformedJSON: true,
+				ToolName:             "extract_entities",
+				RunID:                inv.RunID,
+				SkillID:              inv.SkillID,
+				CallerID:             inv.CallerID,
+			})
+			if callErr != nil {
+				return "", callErr
+			}
+			if !res.Success {
+				kind := res.ErrorKind
+				if kind == "" {
+					kind = "llm_unavailable"
+				}
+				return marshalExtractEntities(extractEntitiesResult{Error: kind}), nil
+			}
+
+			// Second-failure malformed JSON (success=true but parsed
+			// is nil and ErrorKind=malformed_json). Surface the raw
+			// reply so the agent can salvage.
+			if res.ErrorKind == llmmeta.ErrorKindMalformedJSON || res.Parsed == nil {
+				return marshalExtractEntities(extractEntitiesResult{
+					Error:     "extraction_failed",
+					RawReply:  res.Text,
+					ModelUsed: res.ModelUsed,
+				}), nil
+			}
+
+			parsedMap, ok := res.Parsed.(map[string]any)
+			if !ok {
+				return marshalExtractEntities(extractEntitiesResult{
+					Error:     "extraction_failed_not_object",
+					RawReply:  res.Text,
+					ModelUsed: res.ModelUsed,
+				}), nil
+			}
+
+			entities, missing := coerceExtractedEntities(parsedMap, args.Fields)
+			return marshalExtractEntities(extractEntitiesResult{
+				Entities:      entities,
+				MissingFields: missing,
+				ModelUsed:     res.ModelUsed,
+			}), nil
+		},
+	)
+}
+
+// buildExtractPrompt composes the user message describing the schema
+// + source text.
+func buildExtractPrompt(text string, fields []extractField) string {
+	var sb strings.Builder
+	sb.WriteString("Extract the following fields from the text below. Return a JSON object with the field names as keys.\n\nFields:\n")
+	for _, f := range fields {
+		fmt.Fprintf(&sb, "- %s (%s): %s", f.Name, f.Type, f.Description)
+		if f.Required {
+			sb.WriteString(" [required]")
+		}
+		sb.WriteString("\n")
+	}
+	sb.WriteString("\nText:\n")
+	sb.WriteString(text)
+	return sb.String()
+}
+
+// coerceExtractedEntities walks the LLM's response, validating + (when
+// possible) coercing each value to the requested type. Required fields
+// missing or uncoercible land in missing[]; optional fields silently
+// drop.
+func coerceExtractedEntities(parsed map[string]any, fields []extractField) (map[string]any, []string) {
+	entities := make(map[string]any, len(fields))
+	var missing []string
+	for _, f := range fields {
+		raw, present := parsed[f.Name]
+		if !present || raw == nil {
+			if f.Required {
+				missing = append(missing, f.Name)
+			}
+			continue
+		}
+		value, ok := coerceFieldValue(raw, f.Type)
+		if !ok {
+			if f.Required {
+				missing = append(missing, f.Name)
+			}
+			continue
+		}
+		entities[f.Name] = value
+	}
+	return entities, missing
+}
+
+// coerceFieldValue attempts to convert raw to the requested type.
+// Returns (value, true) on success or (nil, false) on failure.
+//
+// Why coerce (vs strict reject): LLMs frequently reply with strings
+// that contain numbers ("42") or pseudo-booleans ("yes"). Strict
+// rejection would force every author to clean the response themselves.
+// Coercion is conservative — string "42" → int 42 succeeds; string
+// "forty-two" → int 42 fails (the agent never asked for word-form
+// parsing).
+func coerceFieldValue(raw any, fieldType string) (any, bool) {
+	switch strings.ToLower(strings.TrimSpace(fieldType)) {
+	case "string":
+		switch v := raw.(type) {
+		case string:
+			return v, true
+		case float64:
+			return strconv.FormatFloat(v, 'f', -1, 64), true
+		case bool:
+			return strconv.FormatBool(v), true
+		}
+		return nil, false
+
+	case "int":
+		switch v := raw.(type) {
+		case float64:
+			// JSON numbers are float64 by default.
+			if v == float64(int64(v)) {
+				return int64(v), true
+			}
+			return nil, false
+		case string:
+			if n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64); err == nil {
+				return n, true
+			}
+			// Try float-string-with-zero-fractional ("42.0").
+			if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil && f == float64(int64(f)) {
+				return int64(f), true
+			}
+		}
+		return nil, false
+
+	case "float":
+		switch v := raw.(type) {
+		case float64:
+			return v, true
+		case string:
+			if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+				return f, true
+			}
+		}
+		return nil, false
+
+	case "bool":
+		switch v := raw.(type) {
+		case bool:
+			return v, true
+		case string:
+			s := strings.ToLower(strings.TrimSpace(v))
+			switch s {
+			case "true", "yes", "1", "y":
+				return true, true
+			case "false", "no", "0", "n":
+				return false, true
+			}
+		case float64:
+			return v != 0, true
+		}
+		return nil, false
+
+	case "list_of_strings":
+		switch v := raw.(type) {
+		case []any:
+			out := make([]string, 0, len(v))
+			for _, e := range v {
+				if s, ok := e.(string); ok {
+					out = append(out, s)
+				} else {
+					// Mixed-type lists fail the type contract.
+					return nil, false
+				}
+			}
+			return out, true
+		case string:
+			// Single-string can be lifted into a one-element list.
+			return []string{v}, true
+		}
+		return nil, false
+	}
+	return nil, false
+}
+
+func marshalExtractEntities(r extractEntitiesResult) string {
+	b, err := json.Marshal(r)
+	if err != nil {
+		return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err)
+	}
+	return string(b)
+}