1fd7109a42
The agent loop took the final answer only from the terminal (no-tool-call)
turn. Models that "front-load" their answer into an earlier turn that also
calls a tool — then close with a trivial pointer like "(Already answered
above.)" — had their real answer discarded and the pointer delivered. This
recurs across several open-weight models (glm-5.2, etc.); well-behaved models
(Claude/GPT) defer their answer to the terminal turn and are unaffected.
finalOutput() now falls back to the last substantive assistant content in the
transcript when the terminal text is weak (empty, or a short back-reference).
The predicate is narrow and back-reference-gated so short-but-correct answers
("42", "It's down, restarting now.") are never overridden; recovery only picks
a prior turn that reads like a real answer, not a preamble. Zero extra model
calls. Terminal-answer behavior for normal runs is unchanged.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
99 lines
3.7 KiB
Go
99 lines
3.7 KiB
Go
package agent
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// finalOutput selects the user-facing answer when the loop reaches a clean
|
|
// terminal turn (one with no tool calls).
|
|
//
|
|
// Normally that terminal turn's text IS the answer: well-behaved models defer
|
|
// their answer to the final, tool-free turn. But some models — notably several
|
|
// open-weight ones — "front-load" their full answer into an earlier turn that
|
|
// ALSO calls a tool (e.g. answer text alongside a citation call), then close
|
|
// with a trivial pointer such as "(Already answered above.)". Returning only
|
|
// the terminal text would discard the real answer, which is still present
|
|
// earlier in the transcript. When the terminal text is weak (empty, or a short
|
|
// back-reference) fall back to the last substantive assistant content in msgs.
|
|
//
|
|
// msgs must already include the terminal assistant message as its last element
|
|
// (the loop appends it before calling this); terminal is that message's text.
|
|
func finalOutput(msgs []llm.Message, terminal string) string {
|
|
if !isWeakFinal(terminal) {
|
|
return terminal
|
|
}
|
|
if rec, ok := lastSubstantiveAssistantText(msgs, terminal); ok {
|
|
return rec
|
|
}
|
|
return terminal
|
|
}
|
|
|
|
// backRefRe matches a terminal turn that merely points back to an earlier
|
|
// message instead of stating the answer ("(Already answered above.)",
|
|
// "see above", "as I said", ...).
|
|
var backRefRe = regexp.MustCompile(`(?i)(already answered|see above|as (i )?(said|mentioned|stated|noted)|answered (that )?above|per my (previous|earlier))`)
|
|
|
|
// preambleRe matches intent-announcing prefixes ("Let me search...", "I'll
|
|
// check...") so a preamble is never mistaken for the answer during recovery.
|
|
var preambleRe = regexp.MustCompile(`(?i)^(let me|let'?s|i'?ll|i will|first[, ]|sure[,. ]|okay[,. ]|on it|checking)`)
|
|
|
|
const (
|
|
// weakFinalMaxChars bounds how long a back-reference closer can be. A
|
|
// genuine final answer that merely contains "as I said" mid-sentence is
|
|
// longer than this, so it is never treated as weak.
|
|
weakFinalMaxChars = 120
|
|
// recoverMinChars: a prior assistant turn this long is treated as a real
|
|
// answer regardless of how it opens.
|
|
recoverMinChars = 200
|
|
// recoverFloorChars / recoverRatio gate the borderline band: a shorter
|
|
// prior turn must still clearly dwarf the (very short) terminal and not
|
|
// look like a preamble.
|
|
recoverFloorChars = 80
|
|
recoverRatio = 3
|
|
)
|
|
|
|
// isWeakFinal reports whether a terminal turn's text fails to stand on its own
|
|
// as the answer: empty/whitespace, or a short pure back-reference.
|
|
func isWeakFinal(s string) bool {
|
|
t := strings.TrimSpace(s)
|
|
if t == "" {
|
|
return true
|
|
}
|
|
return len(t) <= weakFinalMaxChars && backRefRe.MatchString(t)
|
|
}
|
|
|
|
// lastSubstantiveAssistantText scans msgs newest→oldest (skipping the terminal
|
|
// turn and empty tool-only turns) for the most recent assistant turn whose text
|
|
// reads like a real answer. Returns ("", false) when nothing qualifies.
|
|
func lastSubstantiveAssistantText(msgs []llm.Message, terminal string) (string, bool) {
|
|
tt := strings.TrimSpace(terminal)
|
|
for i := len(msgs) - 1; i >= 0; i-- {
|
|
m := msgs[i]
|
|
if m.Role != llm.RoleAssistant {
|
|
continue
|
|
}
|
|
txt := strings.TrimSpace(m.Text())
|
|
if txt == "" || txt == tt {
|
|
continue // the terminal turn itself, or an empty tool-only turn
|
|
}
|
|
if isSubstantiveAnswer(txt, tt) {
|
|
return txt, true
|
|
}
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
// isSubstantiveAnswer reports whether txt (a prior assistant turn) reads like a
|
|
// real answer rather than a preamble, relative to the terminal text.
|
|
func isSubstantiveAnswer(txt, terminal string) bool {
|
|
if len(txt) >= recoverMinChars {
|
|
return true
|
|
}
|
|
return len(txt) >= recoverFloorChars &&
|
|
len(txt) >= recoverRatio*len(terminal) &&
|
|
!preambleRe.MatchString(txt)
|
|
}
|