// Package tools — v12 extract_entities. // // Structured-output workhorse: text + field schema → typed JSON // object. The author specifies which fields they want and what // types; the tool builds an appropriate prompt, asks for JSON, and // validates + coerces the response back into the requested types. // // Why a structured-output tool (vs forcing the agent to write its // own prompt): every agentic skill that needs to "pull X, Y, Z out // of unstructured text" otherwise re-invents the same prompt- // engineering pattern. extract_entities centralises it so authors // just describe the schema. // // Type coercion: an LLM responding with "42" when an int field was // requested is normal noise. The tool coerces strings to // int/float/bool when possible; coercion failures land the field in // missing_fields rather than the entities map. // // Test: extract_entities_test.go covers happy path, missing optional // field, missing required field surfaces in missing_fields, malformed // JSON retry, second-attempt failure, type coercion (string→int, // string→bool), unknown field type rejected at args validation. package tools import ( "context" "encoding/json" "fmt" "strconv" "strings" "gitea.stevedudenhoeffer.com/steve/executus/llmmeta" "gitea.stevedudenhoeffer.com/steve/executus/tool" ) // extractEntitiesMaxInputBytes is the hard input cap. const extractEntitiesMaxInputBytes = 32 * 1024 // extractEntitiesFallbackMaxPerRun is the per-run cap when // ExtractEntitiesConfig is nil. const extractEntitiesFallbackMaxPerRun = 10 // ExtractEntitiesConfig is the narrow per-deployment config surface // extract_entities reads at execute time. type ExtractEntitiesConfig interface { MaxPerRun(ctx context.Context) int } // extractField is one row in the schema the agent supplies. The four // supported types match the JSON-shape primitives we can validate + // coerce reliably. // // Why an enum-shaped Type field (vs free-form): we need to know how // to validate the LLM's reply. Free-form ("integer", "Number", // "boolean") would invite typos that silently miss the validation. type extractField struct { Name string `json:"name" description:"Field name to populate (e.g. 'author', 'year_published'). Becomes a key in the returned entities object."` Description string `json:"description" description:"Short description of what to extract (e.g. 'the book author', 'the year the article was published'). Helps the model find the right value."` Type string `json:"type" description:"One of: 'string', 'int', 'float', 'bool', 'list_of_strings'. Determines how the LLM's reply is validated and coerced."` Required bool `json:"required,omitempty" description:"When true, a missing/uncoercible value lands in missing_fields rather than skipping silently."` } // extractEntitiesArgs is the LLM-facing param struct. type extractEntitiesArgs struct { Text string `json:"text" description:"The text to extract from. Required. Capped at 32KB."` Fields []extractField `json:"fields" description:"Schema describing what to extract. Each field has name, description, type, and optional required flag."` } type extractEntitiesResult struct { Entities map[string]any `json:"entities,omitempty"` MissingFields []string `json:"missing_fields,omitempty"` ModelUsed string `json:"model_used,omitempty"` RawReply string `json:"raw_reply,omitempty"` Error string `json:"error,omitempty"` BudgetMsg string `json:"budget_message,omitempty"` } // validExtractTypes is the closed set of Type strings the tool // accepts. Anything else is rejected at args validation. var validExtractTypes = map[string]bool{ "string": true, "int": true, "float": true, "bool": true, "list_of_strings": true, } // NewExtractEntities constructs the extract_entities tool. func NewExtractEntities(helper *llmmeta.Helper, cfg ExtractEntitiesConfig, budget SearchBudget) tool.Tool { return tool.NewGatedTool[extractEntitiesArgs]( "extract_entities", "Extract structured fields from unstructured text via a fast LLM. Caller supplies a schema (each field has name + description + type + required); tool returns an entities object with values matching the requested types. Types: string, int, float, bool, list_of_strings. Counts against per-run and 7-day cost budgets.", tool.Permission{ AuthoringRequirement: tool.RequirementAnyone, OperatesOn: tool.ScopeCaller, SafeForShare: true, Categories: []string{"llm-meta", "cost-bearing"}, }, func(ctx context.Context, inv tool.Invocation, args extractEntitiesArgs) (string, error) { if helper == nil { return "", fmt.Errorf("extract_entities: not configured") } text := args.Text if strings.TrimSpace(text) == "" { return marshalExtractEntities(extractEntitiesResult{Error: "text is empty"}), nil } if len(args.Fields) == 0 { return marshalExtractEntities(extractEntitiesResult{Error: "fields is empty"}), nil } // Validate each field's Type before paying for an LLM // call. for _, f := range args.Fields { if strings.TrimSpace(f.Name) == "" { return marshalExtractEntities(extractEntitiesResult{Error: "field with empty name"}), nil } if !validExtractTypes[strings.ToLower(strings.TrimSpace(f.Type))] { return marshalExtractEntities(extractEntitiesResult{ Error: fmt.Sprintf("field %q has unsupported type %q (allowed: string|int|float|bool|list_of_strings)", f.Name, f.Type), }), nil } } if len(text) > extractEntitiesMaxInputBytes { text = truncateUTF8(text, extractEntitiesMaxInputBytes) } // Per-run budget gate. if budget == nil { maxPerRun := extractEntitiesFallbackMaxPerRun if cfg != nil { maxPerRun = cfg.MaxPerRun(ctx) } budget = NewInMemorySearchBudget(map[string]int{ "extract_entities": maxPerRun, }) } count, max, exceeded := budget.CheckAndIncrement(ctx, inv.RunID, "extract_entities") if exceeded { return marshalExtractEntities(extractEntitiesResult{ Error: "extract_entities_budget_exceeded", BudgetMsg: fmt.Sprintf("per-run extract_entities budget exceeded (%d/%d). Ask an admin to raise skills.extract_entities.max_per_run.", count, max), }), nil } systemPrompt := "You extract structured data from unstructured text. Return ONLY valid JSON with the requested keys. If a value is not present in the text, omit the key. Do NOT invent values." userPrompt := buildExtractPrompt(text, args.Fields) res, callErr := helper.Call(ctx, llmmeta.CallSpec{ Tier: "fast", SystemPrompt: systemPrompt, UserPrompt: userPrompt, MaxOutputTokens: 4096, ResponseFormat: "json", RetryOnMalformedJSON: true, ToolName: "extract_entities", RunID: inv.RunID, SkillID: inv.SkillID, CallerID: inv.CallerID, }) if callErr != nil { return "", callErr } if !res.Success { kind := res.ErrorKind if kind == "" { kind = "llm_unavailable" } return marshalExtractEntities(extractEntitiesResult{Error: kind}), nil } // Second-failure malformed JSON (success=true but parsed // is nil and ErrorKind=malformed_json). Surface the raw // reply so the agent can salvage. if res.ErrorKind == llmmeta.ErrorKindMalformedJSON || res.Parsed == nil { return marshalExtractEntities(extractEntitiesResult{ Error: "extraction_failed", RawReply: res.Text, ModelUsed: res.ModelUsed, }), nil } parsedMap, ok := res.Parsed.(map[string]any) if !ok { return marshalExtractEntities(extractEntitiesResult{ Error: "extraction_failed_not_object", RawReply: res.Text, ModelUsed: res.ModelUsed, }), nil } entities, missing := coerceExtractedEntities(parsedMap, args.Fields) return marshalExtractEntities(extractEntitiesResult{ Entities: entities, MissingFields: missing, ModelUsed: res.ModelUsed, }), nil }, ) } // buildExtractPrompt composes the user message describing the schema // + source text. func buildExtractPrompt(text string, fields []extractField) string { var sb strings.Builder sb.WriteString("Extract the following fields from the text below. Return a JSON object with the field names as keys.\n\nFields:\n") for _, f := range fields { fmt.Fprintf(&sb, "- %s (%s): %s", f.Name, f.Type, f.Description) if f.Required { sb.WriteString(" [required]") } sb.WriteString("\n") } sb.WriteString("\nText:\n") sb.WriteString(text) return sb.String() } // coerceExtractedEntities walks the LLM's response, validating + (when // possible) coercing each value to the requested type. Required fields // missing or uncoercible land in missing[]; optional fields silently // drop. func coerceExtractedEntities(parsed map[string]any, fields []extractField) (map[string]any, []string) { entities := make(map[string]any, len(fields)) var missing []string for _, f := range fields { raw, present := parsed[f.Name] if !present || raw == nil { if f.Required { missing = append(missing, f.Name) } continue } value, ok := coerceFieldValue(raw, f.Type) if !ok { if f.Required { missing = append(missing, f.Name) } continue } entities[f.Name] = value } return entities, missing } // coerceFieldValue attempts to convert raw to the requested type. // Returns (value, true) on success or (nil, false) on failure. // // Why coerce (vs strict reject): LLMs frequently reply with strings // that contain numbers ("42") or pseudo-booleans ("yes"). Strict // rejection would force every author to clean the response themselves. // Coercion is conservative — string "42" → int 42 succeeds; string // "forty-two" → int 42 fails (the agent never asked for word-form // parsing). func coerceFieldValue(raw any, fieldType string) (any, bool) { switch strings.ToLower(strings.TrimSpace(fieldType)) { case "string": switch v := raw.(type) { case string: return v, true case float64: return strconv.FormatFloat(v, 'f', -1, 64), true case bool: return strconv.FormatBool(v), true } return nil, false case "int": switch v := raw.(type) { case float64: // JSON numbers are float64 by default. if v == float64(int64(v)) { return int64(v), true } return nil, false case string: if n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64); err == nil { return n, true } // Try float-string-with-zero-fractional ("42.0"). if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil && f == float64(int64(f)) { return int64(f), true } } return nil, false case "float": switch v := raw.(type) { case float64: return v, true case string: if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil { return f, true } } return nil, false case "bool": switch v := raw.(type) { case bool: return v, true case string: s := strings.ToLower(strings.TrimSpace(v)) switch s { case "true", "yes", "1", "y": return true, true case "false", "no", "0", "n": return false, true } case float64: return v != 0, true } return nil, false case "list_of_strings": switch v := raw.(type) { case []any: out := make([]string, 0, len(v)) for _, e := range v { if s, ok := e.(string); ok { out = append(out, s) } else { // Mixed-type lists fail the type contract. return nil, false } } return out, true case string: // Single-string can be lifted into a one-element list. return []string{v}, true } return nil, false } return nil, false } func marshalExtractEntities(r extractEntitiesResult) string { b, err := json.Marshal(r) if err != nil { return fmt.Sprintf(`{"error":"marshal_failed: %v"}`, err) } return string(b) }