feat: add claude-code/opus reviewer + max-thinking spec support (#5)
Build & push image / build-and-push (push) Successful in 15s
Build & push image / build-and-push (push) Successful in 15s
Adds claude-code/opus to gadfly's dogfood swarm (both sonnet and opus run end-to-end), bumps the image pin to :sha-80d8f53 so the clean-lens telemetry fix is live, and adds engine support for a "claude-code/<model>:max" extended-thinking spec (MAX_THINKING_TOKENS, best-effort). Validated: only 13 findings on this clean PR vs 43 on the comparable #4 — the telemetry fix works. Folded in the swarm's two real findings: a runPass env-injection test and keeping MAX_THINKING_TOKENS in claudeEnv. Follow-up enables claude-code/opus:max once this image builds. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com> Co-committed-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
This commit was merged in pull request #5.
This commit is contained in:
+46
-9
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"unicode/utf8"
|
||||
@@ -56,8 +57,14 @@ type claudeCodeEngine struct {
|
||||
permissionMode string // --permission-mode (default "plan": read-only, no edits)
|
||||
allowedTools string // --allowedTools value, passed verbatim ("" = omit)
|
||||
extraArgs []string // appended verbatim (GADFLY_CLAUDE_EXTRA_ARGS)
|
||||
thinkingTokens int // MAX_THINKING_TOKENS for the subprocess; 0 = leave default
|
||||
}
|
||||
|
||||
// maxThinkingTokens is the extended-thinking budget used for a "claude-code/<model>:max"
|
||||
// spec — Claude Code's high "ultrathink" tier. Set as MAX_THINKING_TOKENS on the
|
||||
// subprocess; harmless (a no-op) if the CLI build doesn't honor it.
|
||||
const maxThinkingTokens = 31999
|
||||
|
||||
// isClaudeCodeSpec reports whether a GADFLY_MODEL spec selects the claude-code
|
||||
// engine: the bare id "claude-code" or a "claude-code/<model>" form.
|
||||
func isClaudeCodeSpec(model string) bool {
|
||||
@@ -66,17 +73,30 @@ func isClaudeCodeSpec(model string) bool {
|
||||
}
|
||||
|
||||
// newClaudeCodeEngine builds the engine from the GADFLY_MODEL spec and the
|
||||
// optional GADFLY_CLAUDE_* overrides. The model after the slash in
|
||||
// "claude-code/<model>" becomes --model (e.g. "claude-code/sonnet" → "sonnet");
|
||||
// GADFLY_CLAUDE_MODEL overrides it. It does not verify the CLI is installed —
|
||||
// a missing binary surfaces as a normal pass error (advisory, never fatal).
|
||||
// optional GADFLY_CLAUDE_* overrides. The part after the slash in
|
||||
// "claude-code/<model>[:<thinking>]" becomes --model (e.g. "claude-code/sonnet"
|
||||
// → "sonnet"), with an optional thinking suffix: ":max" forces the high
|
||||
// extended-thinking budget and ":<n>" sets a specific MAX_THINKING_TOKENS.
|
||||
// GADFLY_CLAUDE_MODEL overrides the model id (the thinking suffix still applies).
|
||||
// It does not verify the CLI is installed — a missing binary surfaces as a normal
|
||||
// pass error (advisory, never fatal).
|
||||
func newClaudeCodeEngine(spec, repoDir string) *claudeCodeEngine {
|
||||
model := strings.TrimSpace(os.Getenv("GADFLY_CLAUDE_MODEL"))
|
||||
if model == "" {
|
||||
if _, after, ok := strings.Cut(strings.TrimSpace(spec), "/"); ok {
|
||||
model = strings.TrimSpace(after)
|
||||
var model string
|
||||
thinking := 0
|
||||
if _, after, ok := strings.Cut(strings.TrimSpace(spec), "/"); ok {
|
||||
after = strings.TrimSpace(after)
|
||||
// Optional ":<thinking>" suffix. Claude model aliases/ids contain no ":",
|
||||
// so a colon unambiguously separates the thinking tier here.
|
||||
if m, t, hasColon := strings.Cut(after, ":"); hasColon {
|
||||
model = strings.TrimSpace(m)
|
||||
thinking = parseThinking(strings.TrimSpace(t))
|
||||
} else {
|
||||
model = after
|
||||
}
|
||||
}
|
||||
if env := strings.TrimSpace(os.Getenv("GADFLY_CLAUDE_MODEL")); env != "" {
|
||||
model = env
|
||||
}
|
||||
return &claudeCodeEngine{
|
||||
bin: envOr("GADFLY_CLAUDE_BIN", "claude"),
|
||||
model: model,
|
||||
@@ -84,9 +104,22 @@ func newClaudeCodeEngine(spec, repoDir string) *claudeCodeEngine {
|
||||
permissionMode: envOr("GADFLY_CLAUDE_PERMISSION_MODE", "plan"),
|
||||
allowedTools: strings.TrimSpace(os.Getenv("GADFLY_CLAUDE_ALLOWED_TOOLS")),
|
||||
extraArgs: strings.Fields(os.Getenv("GADFLY_CLAUDE_EXTRA_ARGS")),
|
||||
thinkingTokens: thinking,
|
||||
}
|
||||
}
|
||||
|
||||
// parseThinking maps a spec thinking suffix to a MAX_THINKING_TOKENS budget:
|
||||
// "max" → maxThinkingTokens, a positive integer → itself, anything else → 0 (off).
|
||||
func parseThinking(s string) int {
|
||||
if strings.EqualFold(s, "max") {
|
||||
return maxThinkingTokens
|
||||
}
|
||||
if n, err := strconv.Atoi(s); err == nil && n > 0 {
|
||||
return n
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// args assembles the `claude` argv for one pass. Factored out (and pure) so it
|
||||
// can be unit-tested without invoking the CLI. The system prompt is layered on
|
||||
// top of Claude Code's own via --append-system-prompt; the task is the -p
|
||||
@@ -116,6 +149,10 @@ func (e *claudeCodeEngine) runPass(ctx context.Context, system, task string, _ i
|
||||
cmd := exec.CommandContext(ctx, e.bin, e.args(system, task)...)
|
||||
cmd.Dir = e.repoDir
|
||||
cmd.Env = claudeEnv() // minimal env — don't hand GITEA_TOKEN et al. to the CLI
|
||||
if e.thinkingTokens > 0 {
|
||||
// Force an extended-thinking budget for this run (a "...:max" spec).
|
||||
cmd.Env = append(cmd.Env, "MAX_THINKING_TOKENS="+strconv.Itoa(e.thinkingTokens))
|
||||
}
|
||||
// Put the CLI and the Node children it spawns in their own process group and
|
||||
// kill the WHOLE group on context cancel, so a timed-out lens can't leave
|
||||
// orphaned claude/node processes behind in the container.
|
||||
@@ -184,7 +221,7 @@ func (e *claudeCodeEngine) runPass(ctx context.Context, system, task string, _ i
|
||||
func claudeEnv() []string {
|
||||
keep := func(k string) bool {
|
||||
switch k {
|
||||
case "PATH", "HOME", "USER", "LOGNAME", "TMPDIR", "LANG", "TERM", "SHELL":
|
||||
case "PATH", "HOME", "USER", "LOGNAME", "TMPDIR", "LANG", "TERM", "SHELL", "MAX_THINKING_TOKENS":
|
||||
return true
|
||||
}
|
||||
return strings.HasPrefix(k, "LC_") ||
|
||||
|
||||
@@ -217,3 +217,65 @@ func TestRunPassNonZeroNoJSON(t *testing.T) {
|
||||
t.Fatalf("non-zero exit should error with detail, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeCodeThinking(t *testing.T) {
|
||||
t.Setenv("GADFLY_CLAUDE_MODEL", "")
|
||||
cases := []struct {
|
||||
spec string
|
||||
wantModel string
|
||||
wantThink int
|
||||
}{
|
||||
{"claude-code/opus", "opus", 0},
|
||||
{"claude-code/opus:max", "opus", maxThinkingTokens},
|
||||
{"claude-code/sonnet:20000", "sonnet", 20000},
|
||||
{"claude-code/opus:bogus", "opus", 0}, // unrecognized suffix -> off
|
||||
{"claude-code", "", 0},
|
||||
}
|
||||
for _, c := range cases {
|
||||
e := newClaudeCodeEngine(c.spec, "/repo")
|
||||
if e.model != c.wantModel || e.thinkingTokens != c.wantThink {
|
||||
t.Errorf("newClaudeCodeEngine(%q) = (model %q, think %d), want (%q, %d)",
|
||||
c.spec, e.model, e.thinkingTokens, c.wantModel, c.wantThink)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeCodeThinkingEnvOverrideKeepsSuffix(t *testing.T) {
|
||||
// GADFLY_CLAUDE_MODEL overrides the model id, but the :max thinking from the
|
||||
// spec still applies.
|
||||
t.Setenv("GADFLY_CLAUDE_MODEL", "claude-opus-4-8")
|
||||
e := newClaudeCodeEngine("claude-code/opus:max", "/repo")
|
||||
if e.model != "claude-opus-4-8" {
|
||||
t.Errorf("model = %q, want claude-opus-4-8 (env override)", e.model)
|
||||
}
|
||||
if e.thinkingTokens != maxThinkingTokens {
|
||||
t.Errorf("thinkingTokens = %d, want %d (suffix still applies)", e.thinkingTokens, maxThinkingTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunPassInjectsThinkingTokens verifies the engine actually puts
|
||||
// MAX_THINKING_TOKENS into the subprocess env for a ":max" spec (and not for a
|
||||
// plain spec). The stub echoes the value it received back as the result.
|
||||
func TestRunPassInjectsThinkingTokens(t *testing.T) {
|
||||
t.Setenv("GADFLY_CLAUDE_MODEL", "")
|
||||
t.Setenv("MAX_THINKING_TOKENS", "") // not in the test's own env
|
||||
dir := t.TempDir()
|
||||
stub := dir + "/claude-stub.sh"
|
||||
// Report the env var the CLI was launched with.
|
||||
script := "#!/bin/sh\nprintf '{\"result\":\"MTT=%s\",\"is_error\":false}' \"${MAX_THINKING_TOKENS:-unset}\"\n"
|
||||
if err := os.WriteFile(stub, []byte(script), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
maxEng := newClaudeCodeEngine("claude-code/opus:max", dir)
|
||||
maxEng.bin = stub
|
||||
if out, err := maxEng.runPass(context.Background(), "s", "t", 0); err != nil || out != "MTT=31999" {
|
||||
t.Fatalf(":max run: got (%q, %v), want (MTT=31999, nil)", out, err)
|
||||
}
|
||||
|
||||
plainEng := newClaudeCodeEngine("claude-code/opus", dir)
|
||||
plainEng.bin = stub
|
||||
if out, err := plainEng.runPass(context.Background(), "s", "t", 0); err != nil || out != "MTT=unset" {
|
||||
t.Fatalf("plain run: got (%q, %v), want (MTT=unset, nil)", out, err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user