feature: add stealth mode, launch args, and init scripts to BrowserOptions
All checks were successful
CI / vet (pull_request) Successful in 1m18s
CI / build (pull_request) Successful in 1m22s
CI / test (pull_request) Successful in 1m28s

Add anti-bot-detection evasion support to reduce blocking by sites like
archive.ph. Stealth mode is enabled by default for all browsers and applies
common evasions: navigator.webdriver override, plugin/mimeType spoofing,
window.chrome stub, and outerWidth/outerHeight fixes. For Chromium,
--disable-blink-features=AutomationControlled is also added.

New BrowserOptions fields:
- Stealth *bool: toggle stealth presets (default true)
- LaunchArgs []string: custom browser launch arguments
- InitScripts []string: JavaScript injected before page scripts

Closes #56

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-17 20:10:58 +00:00
parent e8f4d64eb9
commit e94665ff25
6 changed files with 183 additions and 2 deletions

View File

@@ -52,6 +52,21 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
return nil, ErrInvalidBrowserSelection return nil, ErrInvalidBrowserSelection
} }
// Collect launch args and init scripts, starting with any stealth-mode presets.
stealth := opt.Stealth == nil || *opt.Stealth
var launchArgs []string
var initScripts []string
if stealth {
if opt.Browser == BrowserChromium {
launchArgs = append(launchArgs, stealthChromiumArgs...)
}
initScripts = append(initScripts, stealthInitScripts...)
}
launchArgs = append(launchArgs, opt.LaunchArgs...)
initScripts = append(initScripts, opt.InitScripts...)
var browser playwright.Browser var browser playwright.Browser
launch := true launch := true
@@ -71,9 +86,13 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
if launch { if launch {
headless := opt.ShowBrowser == nil || !*opt.ShowBrowser headless := opt.ShowBrowser == nil || !*opt.ShowBrowser
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{ launchOpts := playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(headless), Headless: playwright.Bool(headless),
}) }
if len(launchArgs) > 0 {
launchOpts.Args = launchArgs
}
browser, err = bt.Launch(launchOpts)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to launch browser: %w", err) return nil, fmt.Errorf("failed to launch browser: %w", err)
} }
@@ -103,6 +122,12 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
return nil, fmt.Errorf("failed to create browser context: %w", err) return nil, fmt.Errorf("failed to create browser context: %w", err)
} }
for _, script := range initScripts {
if err := bctx.AddInitScript(playwright.Script{Content: &script}); err != nil {
return nil, fmt.Errorf("failed to add init script: %w", err)
}
}
if opt.CookieJar != nil { if opt.CookieJar != nil {
cookies, err := opt.CookieJar.GetAll() cookies, err := opt.CookieJar.GetAll()
if err != nil { if err != nil {
@@ -158,6 +183,15 @@ func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
if o.ShowBrowser != nil { if o.ShowBrowser != nil {
base.ShowBrowser = o.ShowBrowser base.ShowBrowser = o.ShowBrowser
} }
if len(o.LaunchArgs) > 0 {
base.LaunchArgs = append(base.LaunchArgs, o.LaunchArgs...)
}
if len(o.InitScripts) > 0 {
base.InitScripts = append(base.InitScripts, o.InitScripts...)
}
if o.Stealth != nil {
base.Stealth = o.Stealth
}
} }
return base return base
} }

View File

@@ -41,6 +41,11 @@ var Flags = BrowserFlags{
Usage: "If set, the browser will be visible, if not set, the browser will be headless", Usage: "If set, the browser will be visible, if not set, the browser will be headless",
DefaultText: "false", DefaultText: "false",
}, },
&cli.BoolFlag{
Name: "no-stealth",
Usage: "Disable stealth mode (anti-bot-detection evasions are enabled by default)",
DefaultText: "false",
},
} }
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) { func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
@@ -74,5 +79,9 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
opts.ShowBrowser = extractor.Bool(cmd.Bool("visible")) opts.ShowBrowser = extractor.Bool(cmd.Bool("visible"))
} }
if cmd.IsSet("no-stealth") && cmd.Bool("no-stealth") {
opts.Stealth = extractor.Bool(false)
}
return extractor.NewBrowser(ctx, opts) return extractor.NewBrowser(ctx, opts)
} }

View File

@@ -62,6 +62,7 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
UserAgent: DefaultUserAgent, UserAgent: DefaultUserAgent,
Browser: BrowserFirefox, Browser: BrowserFirefox,
Timeout: &thirtySeconds, Timeout: &thirtySeconds,
Stealth: Bool(true),
Dimensions: Size{ Dimensions: Size{
Width: 1280, Width: 1280,
Height: 720, Height: 720,

View File

@@ -70,6 +70,20 @@ type BrowserOptions struct {
// UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser. // UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
UseLocalOnly bool UseLocalOnly bool
// LaunchArgs are additional command-line arguments passed to the browser process.
// For example: []string{"--disable-blink-features=AutomationControlled"}
LaunchArgs []string
// InitScripts are JavaScript snippets injected into every new browser context
// before any page scripts run. Useful for overriding detectable properties like
// navigator.webdriver.
InitScripts []string
// Stealth enables anti-bot-detection measures. When non-nil and true, common
// evasions are applied automatically (launch args + init scripts). When nil,
// defaults to true in NewBrowser / NewInteractiveBrowser.
Stealth *bool
} }
func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute { func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute {
@@ -134,6 +148,7 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
UserAgent: DefaultUserAgent, UserAgent: DefaultUserAgent,
Browser: BrowserFirefox, Browser: BrowserFirefox,
Timeout: &thirtySeconds, Timeout: &thirtySeconds,
Stealth: Bool(true),
}, opts) }, opts)
if err := ctx.Err(); err != nil { if err := ctx.Err(); err != nil {

50
stealth.go Normal file
View File

@@ -0,0 +1,50 @@
package extractor
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
var stealthChromiumArgs = []string{
"--disable-blink-features=AutomationControlled",
}
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
var stealthInitScripts = []string{
// Override navigator.webdriver to return undefined (the real-browser value).
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
`Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = [
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
];
arr.item = (i) => arr[i] || null;
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
arr.refresh = () => {};
return arr;
},
})`,
// Populate navigator.mimeTypes to match the fake plugins above.
`Object.defineProperty(navigator, 'mimeTypes', {
get: () => {
const arr = [
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
];
arr.item = (i) => arr[i] || null;
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
return arr;
},
})`,
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
`if (!window.chrome) {
window.chrome = { runtime: {} };
}`,
// Fix outerWidth/outerHeight which are 0 in headless mode.
`if (window.outerWidth === 0) {
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
}`,
}

72
stealth_test.go Normal file
View File

@@ -0,0 +1,72 @@
package extractor
import (
"testing"
)
func TestMergeOptions_StealthDefault(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, nil)
if got.Stealth == nil || !*got.Stealth {
t.Fatal("expected stealth to default to true")
}
}
func TestMergeOptions_StealthOverrideFalse(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, []BrowserOptions{{Stealth: Bool(false)}})
if got.Stealth == nil || *got.Stealth {
t.Fatal("expected stealth to be overridden to false")
}
}
func TestMergeOptions_LaunchArgsAppend(t *testing.T) {
base := BrowserOptions{LaunchArgs: []string{"--arg1"}}
got := mergeOptions(base, []BrowserOptions{{LaunchArgs: []string{"--arg2", "--arg3"}}})
if len(got.LaunchArgs) != 3 {
t.Fatalf("expected 3 launch args, got %d", len(got.LaunchArgs))
}
if got.LaunchArgs[0] != "--arg1" || got.LaunchArgs[1] != "--arg2" || got.LaunchArgs[2] != "--arg3" {
t.Fatalf("unexpected launch args: %v", got.LaunchArgs)
}
}
func TestMergeOptions_InitScriptsAppend(t *testing.T) {
base := BrowserOptions{InitScripts: []string{"script1"}}
got := mergeOptions(base, []BrowserOptions{{InitScripts: []string{"script2"}}})
if len(got.InitScripts) != 2 {
t.Fatalf("expected 2 init scripts, got %d", len(got.InitScripts))
}
if got.InitScripts[0] != "script1" || got.InitScripts[1] != "script2" {
t.Fatalf("unexpected init scripts: %v", got.InitScripts)
}
}
func TestMergeOptions_StealthNilDoesNotOverride(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, []BrowserOptions{{Stealth: nil}})
if got.Stealth == nil || !*got.Stealth {
t.Fatal("expected stealth to remain true when override is nil")
}
}
func TestStealthChromiumArgs(t *testing.T) {
if len(stealthChromiumArgs) == 0 {
t.Fatal("expected at least one chromium stealth arg")
}
found := false
for _, arg := range stealthChromiumArgs {
if arg == "--disable-blink-features=AutomationControlled" {
found = true
}
}
if !found {
t.Fatal("expected --disable-blink-features=AutomationControlled in stealth chromium args")
}
}
func TestStealthInitScripts(t *testing.T) {
if len(stealthInitScripts) == 0 {
t.Fatal("expected at least one stealth init script")
}
}