From bd24b0a423f3f98cfdeb25ddadf7c0bbfae60cd0 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 27 Jun 2026 18:39:00 -0400 Subject: [PATCH] fix: fold in PR #5 review findings (thinking-env test + keep-list) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The swarm reviewed PR #5 (8 reviewers; the telemetry fix from #4 is now live, so only 13 findings vs 43 on the comparably-clean #4 — the fix works). Folded in the two warranted ones: - engine: keep MAX_THINKING_TOKENS in claudeEnv() so a globally-set value reaches the CLI too (not just the per-spec :max append). (minimax) - test: TestRunPassInjectsThinkingTokens verifies runPass actually puts MAX_THINKING_TOKENS in the subprocess env (31999 for :max, unset for a plain spec) — the parse was tested, the injection wasn't. (minimax) The MAX_THINKING_TOKENS-is-unverified concern (minimax, qwen) is the same caveat already documented; left as-is. gofmt/vet/test -race green. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/gadfly/engine.go | 2 +- cmd/gadfly/engine_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cmd/gadfly/engine.go b/cmd/gadfly/engine.go index 4c6d8b0..7fe4c09 100644 --- a/cmd/gadfly/engine.go +++ b/cmd/gadfly/engine.go @@ -221,7 +221,7 @@ func (e *claudeCodeEngine) runPass(ctx context.Context, system, task string, _ i func claudeEnv() []string { keep := func(k string) bool { switch k { - case "PATH", "HOME", "USER", "LOGNAME", "TMPDIR", "LANG", "TERM", "SHELL": + case "PATH", "HOME", "USER", "LOGNAME", "TMPDIR", "LANG", "TERM", "SHELL", "MAX_THINKING_TOKENS": return true } return strings.HasPrefix(k, "LC_") || diff --git a/cmd/gadfly/engine_test.go b/cmd/gadfly/engine_test.go index c4b974e..5acc756 100644 --- a/cmd/gadfly/engine_test.go +++ b/cmd/gadfly/engine_test.go @@ -252,3 +252,30 @@ func TestClaudeCodeThinkingEnvOverrideKeepsSuffix(t *testing.T) { t.Errorf("thinkingTokens = %d, want %d (suffix still applies)", e.thinkingTokens, maxThinkingTokens) } } + +// TestRunPassInjectsThinkingTokens verifies the engine actually puts +// MAX_THINKING_TOKENS into the subprocess env for a ":max" spec (and not for a +// plain spec). The stub echoes the value it received back as the result. +func TestRunPassInjectsThinkingTokens(t *testing.T) { + t.Setenv("GADFLY_CLAUDE_MODEL", "") + t.Setenv("MAX_THINKING_TOKENS", "") // not in the test's own env + dir := t.TempDir() + stub := dir + "/claude-stub.sh" + // Report the env var the CLI was launched with. + script := "#!/bin/sh\nprintf '{\"result\":\"MTT=%s\",\"is_error\":false}' \"${MAX_THINKING_TOKENS:-unset}\"\n" + if err := os.WriteFile(stub, []byte(script), 0o755); err != nil { + t.Fatal(err) + } + + maxEng := newClaudeCodeEngine("claude-code/opus:max", dir) + maxEng.bin = stub + if out, err := maxEng.runPass(context.Background(), "s", "t", 0); err != nil || out != "MTT=31999" { + t.Fatalf(":max run: got (%q, %v), want (MTT=31999, nil)", out, err) + } + + plainEng := newClaudeCodeEngine("claude-code/opus", dir) + plainEng.bin = stub + if out, err := plainEng.runPass(context.Background(), "s", "t", 0); err != nil || out != "MTT=unset" { + t.Fatalf("plain run: got (%q, %v), want (MTT=unset, nil)", out, err) + } +}