Files
executus/tools/extract_entities.go
T
steve 1e201550b3 P3: meta + primitive tool group (think/now/cite + classify/extract/summarize)
Grow executus/tools into a real generic tool library:

- Register(reg): the always-available, zero-config tools — think, now (UTC
  unless a CurrentTimeProvider is wired), cite (inert unless a CitationStorage
  is wired). All nil-safe; a light host calls Register and is useful.
- RegisterMeta(reg, MetaDeps): the LLM-backed meta tools — classify,
  extract_entities, summarize — over the llmmeta helper. Budget defaults to the
  shipped in-memory per-run cap; Files optional; caps default.
- Seams moved (interface/type-only, no host coupling): research_providers.go
  (CurrentTimeProvider/CitationStorage/SearchBudget/PageExtractor/PDFFetcher/…)
  and file_storage.go (FileStorage + FileDomainMeta). Plus the in-memory budget
  default (research_defaults.go) and scope_validate.go.

calculate deferred (drags github.com/Krognol/go-wolfram + a module-path replace
— not worth it in the lean core for one tool). Core go.sum still free of
gorm/redis/discordgo/sqlite/wolfram.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 00:11:54 -04:00

343 lines
12 KiB
Go

// Package tools — v12 extract_entities.
//
// Structured-output workhorse: text + field schema → typed JSON
// object. The author specifies which fields they want and what
// types; the tool builds an appropriate prompt, asks for JSON, and
// validates + coerces the response back into the requested types.
//
// Why a structured-output tool (vs forcing the agent to write its
// own prompt): every agentic skill that needs to "pull X, Y, Z out
// of unstructured text" otherwise re-invents the same prompt-
// engineering pattern. extract_entities centralises it so authors
// just describe the schema.
//
// Type coercion: an LLM responding with "42" when an int field was
// requested is normal noise. The tool coerces strings to
// int/float/bool when possible; coercion failures land the field in
// missing_fields rather than the entities map.
//
// Test: extract_entities_test.go covers happy path, missing optional
// field, missing required field surfaces in missing_fields, malformed
// JSON retry, second-attempt failure, type coercion (string→int,
// string→bool), unknown field type rejected at args validation.
package tools
import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
"gitea.stevedudenhoeffer.com/steve/executus/llmmeta"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
// extractEntitiesMaxInputBytes is the hard input cap.
const extractEntitiesMaxInputBytes = 32 * 1024
// extractEntitiesFallbackMaxPerRun is the per-run cap when
// ExtractEntitiesConfig is nil.
const extractEntitiesFallbackMaxPerRun = 10
// ExtractEntitiesConfig is the narrow per-deployment config surface
// extract_entities reads at execute time.
type ExtractEntitiesConfig interface {
MaxPerRun(ctx context.Context) int
}
// extractField is one row in the schema the agent supplies. The four
// supported types match the JSON-shape primitives we can validate +
// coerce reliably.
//
// Why an enum-shaped Type field (vs free-form): we need to know how
// to validate the LLM's reply. Free-form ("integer", "Number",
// "boolean") would invite typos that silently miss the validation.
type extractField struct {
Name string `json:"name" description:"Field name to populate (e.g. 'author', 'year_published'). Becomes a key in the returned entities object."`
Description string `json:"description" description:"Short description of what to extract (e.g. 'the book author', 'the year the article was published'). Helps the model find the right value."`
Type string `json:"type" description:"One of: 'string', 'int', 'float', 'bool', 'list_of_strings'. Determines how the LLM's reply is validated and coerced."`
Required bool `json:"required,omitempty" description:"When true, a missing/uncoercible value lands in missing_fields rather than skipping silently."`
}
// extractEntitiesArgs is the LLM-facing param struct.
type extractEntitiesArgs struct {
Text string `json:"text" description:"The text to extract from. Required. Capped at 32KB."`
Fields []extractField `json:"fields" description:"Schema describing what to extract. Each field has name, description, type, and optional required flag."`
}
type extractEntitiesResult struct {
Entities map[string]any `json:"entities,omitempty"`
MissingFields []string `json:"missing_fields,omitempty"`
ModelUsed string `json:"model_used,omitempty"`
RawReply string `json:"raw_reply,omitempty"`
Error string `json:"error,omitempty"`
BudgetMsg string `json:"budget_message,omitempty"`
}
// validExtractTypes is the closed set of Type strings the tool
// accepts. Anything else is rejected at args validation.
var validExtractTypes = map[string]bool{
"string": true,
"int": true,
"float": true,
"bool": true,
"list_of_strings": true,
}
// NewExtractEntities constructs the extract_entities tool.
func NewExtractEntities(helper *llmmeta.Helper, cfg ExtractEntitiesConfig, budget SearchBudget) tool.Tool {
return tool.NewGatedTool[extractEntitiesArgs](
"extract_entities",
"Extract structured fields from unstructured text via a fast LLM. Caller supplies a schema (each field has name + description + type + required); tool returns an entities object with values matching the requested types. Types: string, int, float, bool, list_of_strings. Counts against per-run and 7-day cost budgets.",
tool.Permission{
AuthoringRequirement: tool.RequirementAnyone,
OperatesOn: tool.ScopeCaller,
SafeForShare: true,
Categories: []string{"llm-meta", "cost-bearing"},
},
func(ctx context.Context, inv tool.Invocation, args extractEntitiesArgs) (string, error) {
if helper == nil {
return "", fmt.Errorf("extract_entities: not configured")
}
text := args.Text
if strings.TrimSpace(text) == "" {
return marshalExtractEntities(extractEntitiesResult{Error: "text is empty"}), nil
}
if len(args.Fields) == 0 {
return marshalExtractEntities(extractEntitiesResult{Error: "fields is empty"}), nil
}
// Validate each field's Type before paying for an LLM
// call.
for _, f := range args.Fields {
if strings.TrimSpace(f.Name) == "" {
return marshalExtractEntities(extractEntitiesResult{Error: "field with empty name"}), nil
}
if !validExtractTypes[strings.ToLower(strings.TrimSpace(f.Type))] {
return marshalExtractEntities(extractEntitiesResult{
Error: fmt.Sprintf("field %q has unsupported type %q (allowed: string|int|float|bool|list_of_strings)", f.Name, f.Type),
}), nil
}
}
if len(text) > extractEntitiesMaxInputBytes {
text = text[:extractEntitiesMaxInputBytes]
}
// Per-run budget gate.
if budget == nil {
maxPerRun := extractEntitiesFallbackMaxPerRun
if cfg != nil {
maxPerRun = cfg.MaxPerRun(ctx)
}
budget = NewInMemorySearchBudget(map[string]int{
"extract_entities": maxPerRun,
})
}
count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "extract_entities")
if exceeded {
return marshalExtractEntities(extractEntitiesResult{
Error: "extract_entities_budget_exceeded",
BudgetMsg: fmt.Sprintf("per-run extract_entities budget exceeded (%d/%d). Ask an admin to raise skills.extract_entities.max_per_run.", count, max),
}), nil
}
systemPrompt := "You extract structured data from unstructured text. Return ONLY valid JSON with the requested keys. If a value is not present in the text, omit the key. Do NOT invent values."
userPrompt := buildExtractPrompt(text, args.Fields)
res, callErr := helper.Call(ctx, llmmeta.CallSpec{
Tier: "fast",
SystemPrompt: systemPrompt,
UserPrompt: userPrompt,
MaxOutputTokens: 4096,
ResponseFormat: "json",
RetryOnMalformedJSON: true,
ToolName: "extract_entities",
RunID: inv.RunID,
SkillID: inv.SkillID,
CallerID: inv.CallerID,
})
if callErr != nil {
return "", callErr
}
if !res.Success {
kind := res.ErrorKind
if kind == "" {
kind = "llm_unavailable"
}
return marshalExtractEntities(extractEntitiesResult{Error: kind}), nil
}
// Second-failure malformed JSON (success=true but parsed
// is nil and ErrorKind=malformed_json). Surface the raw
// reply so the agent can salvage.
if res.ErrorKind == llmmeta.ErrorKindMalformedJSON || res.Parsed == nil {
return marshalExtractEntities(extractEntitiesResult{
Error: "extraction_failed",
RawReply: res.Text,
ModelUsed: res.ModelUsed,
}), nil
}
parsedMap, ok := res.Parsed.(map[string]any)
if !ok {
return marshalExtractEntities(extractEntitiesResult{
Error: "extraction_failed_not_object",
RawReply: res.Text,
ModelUsed: res.ModelUsed,
}), nil
}
entities, missing := coerceExtractedEntities(parsedMap, args.Fields)
return marshalExtractEntities(extractEntitiesResult{
Entities: entities,
MissingFields: missing,
ModelUsed: res.ModelUsed,
}), nil
},
)
}
// buildExtractPrompt composes the user message describing the schema
// + source text.
func buildExtractPrompt(text string, fields []extractField) string {
var sb strings.Builder
sb.WriteString("Extract the following fields from the text below. Return a JSON object with the field names as keys.\n\nFields:\n")
for _, f := range fields {
fmt.Fprintf(&sb, "- %s (%s): %s", f.Name, f.Type, f.Description)
if f.Required {
sb.WriteString(" [required]")
}
sb.WriteString("\n")
}
sb.WriteString("\nText:\n")
sb.WriteString(text)
return sb.String()
}
// coerceExtractedEntities walks the LLM's response, validating + (when
// possible) coercing each value to the requested type. Required fields
// missing or uncoercible land in missing[]; optional fields silently
// drop.
func coerceExtractedEntities(parsed map[string]any, fields []extractField) (map[string]any, []string) {
entities := make(map[string]any, len(fields))
var missing []string
for _, f := range fields {
raw, present := parsed[f.Name]
if !present || raw == nil {
if f.Required {
missing = append(missing, f.Name)
}
continue
}
value, ok := coerceFieldValue(raw, f.Type)
if !ok {
if f.Required {
missing = append(missing, f.Name)
}
continue
}
entities[f.Name] = value
}
return entities, missing
}
// coerceFieldValue attempts to convert raw to the requested type.
// Returns (value, true) on success or (nil, false) on failure.
//
// Why coerce (vs strict reject): LLMs frequently reply with strings
// that contain numbers ("42") or pseudo-booleans ("yes"). Strict
// rejection would force every author to clean the response themselves.
// Coercion is conservative — string "42" → int 42 succeeds; string
// "forty-two" → int 42 fails (the agent never asked for word-form
// parsing).
func coerceFieldValue(raw any, fieldType string) (any, bool) {
switch strings.ToLower(strings.TrimSpace(fieldType)) {
case "string":
switch v := raw.(type) {
case string:
return v, true
case float64:
return strconv.FormatFloat(v, 'f', -1, 64), true
case bool:
return strconv.FormatBool(v), true
}
return nil, false
case "int":
switch v := raw.(type) {
case float64:
// JSON numbers are float64 by default.
if v == float64(int64(v)) {
return int64(v), true
}
return nil, false
case string:
if n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64); err == nil {
return n, true
}
// Try float-string-with-zero-fractional ("42.0").
if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil && f == float64(int64(f)) {
return int64(f), true
}
}
return nil, false
case "float":
switch v := raw.(type) {
case float64:
return v, true
case string:
if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
return f, true
}
}
return nil, false
case "bool":
switch v := raw.(type) {
case bool:
return v, true
case string:
s := strings.ToLower(strings.TrimSpace(v))
switch s {
case "true", "yes", "1", "y":
return true, true
case "false", "no", "0", "n":
return false, true
}
case float64:
return v != 0, true
}
return nil, false
case "list_of_strings":
switch v := raw.(type) {
case []any:
out := make([]string, 0, len(v))
for _, e := range v {
if s, ok := e.(string); ok {
out = append(out, s)
} else {
// Mixed-type lists fail the type contract.
return nil, false
}
}
return out, true
case string:
// Single-string can be lifted into a one-element list.
return []string{v}, true
}
return nil, false
}
return nil, false
}
func marshalExtractEntities(r extractEntitiesResult) string {
b, err := json.Marshal(r)
if err != nil {
return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err)
}
return string(b)
}