diff --git a/browser_init.go b/browser_init.go index fa3d96a..de4cb77 100644 --- a/browser_init.go +++ b/browser_init.go @@ -52,6 +52,21 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) { return nil, ErrInvalidBrowserSelection } + // Collect launch args and init scripts, starting with any stealth-mode presets. + stealth := opt.Stealth == nil || *opt.Stealth + var launchArgs []string + var initScripts []string + + if stealth { + if opt.Browser == BrowserChromium { + launchArgs = append(launchArgs, stealthChromiumArgs...) + } + initScripts = append(initScripts, stealthInitScripts...) + } + + launchArgs = append(launchArgs, opt.LaunchArgs...) + initScripts = append(initScripts, opt.InitScripts...) + var browser playwright.Browser launch := true @@ -71,9 +86,13 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) { if launch { headless := opt.ShowBrowser == nil || !*opt.ShowBrowser - browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{ + launchOpts := playwright.BrowserTypeLaunchOptions{ Headless: playwright.Bool(headless), - }) + } + if len(launchArgs) > 0 { + launchOpts.Args = launchArgs + } + browser, err = bt.Launch(launchOpts) if err != nil { return nil, fmt.Errorf("failed to launch browser: %w", err) } @@ -103,6 +122,12 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) { return nil, fmt.Errorf("failed to create browser context: %w", err) } + for _, script := range initScripts { + if err := bctx.AddInitScript(playwright.Script{Content: &script}); err != nil { + return nil, fmt.Errorf("failed to add init script: %w", err) + } + } + if opt.CookieJar != nil { cookies, err := opt.CookieJar.GetAll() if err != nil { @@ -158,6 +183,15 @@ func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions { if o.ShowBrowser != nil { base.ShowBrowser = o.ShowBrowser } + if len(o.LaunchArgs) > 0 { + base.LaunchArgs = append(base.LaunchArgs, o.LaunchArgs...) + } + if len(o.InitScripts) > 0 { + base.InitScripts = append(base.InitScripts, o.InitScripts...) + } + if o.Stealth != nil { + base.Stealth = o.Stealth + } } return base } diff --git a/cmd/browser/pkg/browser/flags.go b/cmd/browser/pkg/browser/flags.go index 730dbdf..56b0f51 100644 --- a/cmd/browser/pkg/browser/flags.go +++ b/cmd/browser/pkg/browser/flags.go @@ -41,6 +41,11 @@ var Flags = BrowserFlags{ Usage: "If set, the browser will be visible, if not set, the browser will be headless", DefaultText: "false", }, + &cli.BoolFlag{ + Name: "no-stealth", + Usage: "Disable stealth mode (anti-bot-detection evasions are enabled by default)", + DefaultText: "false", + }, } func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) { @@ -74,5 +79,9 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro opts.ShowBrowser = extractor.Bool(cmd.Bool("visible")) } + if cmd.IsSet("no-stealth") && cmd.Bool("no-stealth") { + opts.Stealth = extractor.Bool(false) + } + return extractor.NewBrowser(ctx, opts) } diff --git a/interactive.go b/interactive.go index a286996..b2e0889 100644 --- a/interactive.go +++ b/interactive.go @@ -62,6 +62,7 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac UserAgent: DefaultUserAgent, Browser: BrowserFirefox, Timeout: &thirtySeconds, + Stealth: Bool(true), Dimensions: Size{ Width: 1280, Height: 720, diff --git a/playwright.go b/playwright.go index 17e571e..b8e0a81 100644 --- a/playwright.go +++ b/playwright.go @@ -70,6 +70,20 @@ type BrowserOptions struct { // UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser. UseLocalOnly bool + + // LaunchArgs are additional command-line arguments passed to the browser process. + // For example: []string{"--disable-blink-features=AutomationControlled"} + LaunchArgs []string + + // InitScripts are JavaScript snippets injected into every new browser context + // before any page scripts run. Useful for overriding detectable properties like + // navigator.webdriver. + InitScripts []string + + // Stealth enables anti-bot-detection measures. When non-nil and true, common + // evasions are applied automatically (launch args + init scripts). When nil, + // defaults to true in NewBrowser / NewInteractiveBrowser. + Stealth *bool } func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute { @@ -134,6 +148,7 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) { UserAgent: DefaultUserAgent, Browser: BrowserFirefox, Timeout: &thirtySeconds, + Stealth: Bool(true), }, opts) if err := ctx.Err(); err != nil { diff --git a/stealth.go b/stealth.go new file mode 100644 index 0000000..b527d9e --- /dev/null +++ b/stealth.go @@ -0,0 +1,50 @@ +package extractor + +// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers. +var stealthChromiumArgs = []string{ + "--disable-blink-features=AutomationControlled", +} + +// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals. +var stealthInitScripts = []string{ + // Override navigator.webdriver to return undefined (the real-browser value). + `Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`, + + // Populate navigator.plugins with a realistic entry so plugins.length > 0. + `Object.defineProperty(navigator, 'plugins', { + get: () => { + const arr = [ + { name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' }, + { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' }, + { name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' }, + ]; + arr.item = (i) => arr[i] || null; + arr.namedItem = (n) => arr.find(p => p.name === n) || null; + arr.refresh = () => {}; + return arr; + }, + })`, + + // Populate navigator.mimeTypes to match the fake plugins above. + `Object.defineProperty(navigator, 'mimeTypes', { + get: () => { + const arr = [ + { type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' }, + ]; + arr.item = (i) => arr[i] || null; + arr.namedItem = (n) => arr.find(m => m.type === n) || null; + return arr; + }, + })`, + + // Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines). + `if (!window.chrome) { + window.chrome = { runtime: {} }; + }`, + + // Fix outerWidth/outerHeight which are 0 in headless mode. + `if (window.outerWidth === 0) { + Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth }); + Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight }); + }`, +} diff --git a/stealth_test.go b/stealth_test.go new file mode 100644 index 0000000..b547966 --- /dev/null +++ b/stealth_test.go @@ -0,0 +1,72 @@ +package extractor + +import ( + "testing" +) + +func TestMergeOptions_StealthDefault(t *testing.T) { + base := BrowserOptions{Stealth: Bool(true)} + got := mergeOptions(base, nil) + if got.Stealth == nil || !*got.Stealth { + t.Fatal("expected stealth to default to true") + } +} + +func TestMergeOptions_StealthOverrideFalse(t *testing.T) { + base := BrowserOptions{Stealth: Bool(true)} + got := mergeOptions(base, []BrowserOptions{{Stealth: Bool(false)}}) + if got.Stealth == nil || *got.Stealth { + t.Fatal("expected stealth to be overridden to false") + } +} + +func TestMergeOptions_LaunchArgsAppend(t *testing.T) { + base := BrowserOptions{LaunchArgs: []string{"--arg1"}} + got := mergeOptions(base, []BrowserOptions{{LaunchArgs: []string{"--arg2", "--arg3"}}}) + if len(got.LaunchArgs) != 3 { + t.Fatalf("expected 3 launch args, got %d", len(got.LaunchArgs)) + } + if got.LaunchArgs[0] != "--arg1" || got.LaunchArgs[1] != "--arg2" || got.LaunchArgs[2] != "--arg3" { + t.Fatalf("unexpected launch args: %v", got.LaunchArgs) + } +} + +func TestMergeOptions_InitScriptsAppend(t *testing.T) { + base := BrowserOptions{InitScripts: []string{"script1"}} + got := mergeOptions(base, []BrowserOptions{{InitScripts: []string{"script2"}}}) + if len(got.InitScripts) != 2 { + t.Fatalf("expected 2 init scripts, got %d", len(got.InitScripts)) + } + if got.InitScripts[0] != "script1" || got.InitScripts[1] != "script2" { + t.Fatalf("unexpected init scripts: %v", got.InitScripts) + } +} + +func TestMergeOptions_StealthNilDoesNotOverride(t *testing.T) { + base := BrowserOptions{Stealth: Bool(true)} + got := mergeOptions(base, []BrowserOptions{{Stealth: nil}}) + if got.Stealth == nil || !*got.Stealth { + t.Fatal("expected stealth to remain true when override is nil") + } +} + +func TestStealthChromiumArgs(t *testing.T) { + if len(stealthChromiumArgs) == 0 { + t.Fatal("expected at least one chromium stealth arg") + } + found := false + for _, arg := range stealthChromiumArgs { + if arg == "--disable-blink-features=AutomationControlled" { + found = true + } + } + if !found { + t.Fatal("expected --disable-blink-features=AutomationControlled in stealth chromium args") + } +} + +func TestStealthInitScripts(t *testing.T) { + if len(stealthInitScripts) == 0 { + t.Fatal("expected at least one stealth init script") + } +}