feature: add stealth mode and launch args/init scripts #57
@@ -52,6 +52,21 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
||||
return nil, ErrInvalidBrowserSelection
|
||||
}
|
||||
|
||||
// Collect launch args and init scripts, starting with any stealth-mode presets.
|
||||
stealth := opt.Stealth == nil || *opt.Stealth
|
||||
var launchArgs []string
|
||||
var initScripts []string
|
||||
|
||||
if stealth {
|
||||
if opt.Browser == BrowserChromium {
|
||||
launchArgs = append(launchArgs, stealthChromiumArgs...)
|
||||
}
|
||||
initScripts = append(initScripts, stealthInitScripts...)
|
||||
}
|
||||
|
||||
launchArgs = append(launchArgs, opt.LaunchArgs...)
|
||||
initScripts = append(initScripts, opt.InitScripts...)
|
||||
|
||||
var browser playwright.Browser
|
||||
launch := true
|
||||
|
||||
@@ -71,9 +86,13 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
||||
|
||||
if launch {
|
||||
headless := opt.ShowBrowser == nil || !*opt.ShowBrowser
|
||||
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
||||
launchOpts := playwright.BrowserTypeLaunchOptions{
|
||||
Headless: playwright.Bool(headless),
|
||||
})
|
||||
}
|
||||
if len(launchArgs) > 0 {
|
||||
launchOpts.Args = launchArgs
|
||||
}
|
||||
browser, err = bt.Launch(launchOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
||||
}
|
||||
@@ -103,6 +122,12 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
||||
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
||||
}
|
||||
|
||||
for _, script := range initScripts {
|
||||
if err := bctx.AddInitScript(playwright.Script{Content: &script}); err != nil {
|
||||
return nil, fmt.Errorf("failed to add init script: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if opt.CookieJar != nil {
|
||||
cookies, err := opt.CookieJar.GetAll()
|
||||
if err != nil {
|
||||
@@ -158,6 +183,15 @@ func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
|
||||
if o.ShowBrowser != nil {
|
||||
base.ShowBrowser = o.ShowBrowser
|
||||
}
|
||||
if len(o.LaunchArgs) > 0 {
|
||||
base.LaunchArgs = append(base.LaunchArgs, o.LaunchArgs...)
|
||||
}
|
||||
if len(o.InitScripts) > 0 {
|
||||
base.InitScripts = append(base.InitScripts, o.InitScripts...)
|
||||
}
|
||||
if o.Stealth != nil {
|
||||
base.Stealth = o.Stealth
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
@@ -41,6 +41,11 @@ var Flags = BrowserFlags{
|
||||
Usage: "If set, the browser will be visible, if not set, the browser will be headless",
|
||||
DefaultText: "false",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "no-stealth",
|
||||
Usage: "Disable stealth mode (anti-bot-detection evasions are enabled by default)",
|
||||
DefaultText: "false",
|
||||
},
|
||||
}
|
||||
|
||||
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
||||
@@ -74,5 +79,9 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
|
||||
opts.ShowBrowser = extractor.Bool(cmd.Bool("visible"))
|
||||
}
|
||||
|
||||
if cmd.IsSet("no-stealth") && cmd.Bool("no-stealth") {
|
||||
opts.Stealth = extractor.Bool(false)
|
||||
}
|
||||
|
||||
return extractor.NewBrowser(ctx, opts)
|
||||
}
|
||||
|
||||
@@ -62,6 +62,7 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
|
||||
UserAgent: DefaultUserAgent,
|
||||
Browser: BrowserFirefox,
|
||||
Timeout: &thirtySeconds,
|
||||
Stealth: Bool(true),
|
||||
Dimensions: Size{
|
||||
Width: 1280,
|
||||
Height: 720,
|
||||
|
||||
@@ -70,6 +70,20 @@ type BrowserOptions struct {
|
||||
|
||||
// UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
|
||||
UseLocalOnly bool
|
||||
|
||||
// LaunchArgs are additional command-line arguments passed to the browser process.
|
||||
// For example: []string{"--disable-blink-features=AutomationControlled"}
|
||||
LaunchArgs []string
|
||||
|
||||
// InitScripts are JavaScript snippets injected into every new browser context
|
||||
// before any page scripts run. Useful for overriding detectable properties like
|
||||
// navigator.webdriver.
|
||||
InitScripts []string
|
||||
|
||||
// Stealth enables anti-bot-detection measures. When non-nil and true, common
|
||||
// evasions are applied automatically (launch args + init scripts). When nil,
|
||||
// defaults to true in NewBrowser / NewInteractiveBrowser.
|
||||
Stealth *bool
|
||||
}
|
||||
|
||||
func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute {
|
||||
@@ -134,6 +148,7 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
|
||||
UserAgent: DefaultUserAgent,
|
||||
Browser: BrowserFirefox,
|
||||
Timeout: &thirtySeconds,
|
||||
Stealth: Bool(true),
|
||||
}, opts)
|
||||
|
||||
if err := ctx.Err(); err != nil {
|
||||
|
||||
50
stealth.go
Normal file
50
stealth.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package extractor
|
||||
|
||||
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
|
||||
var stealthChromiumArgs = []string{
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
}
|
||||
|
||||
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
|
||||
var stealthInitScripts = []string{
|
||||
// Override navigator.webdriver to return undefined (the real-browser value).
|
||||
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
|
||||
|
||||
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
|
||||
`Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const arr = [
|
||||
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
|
||||
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
];
|
||||
arr.item = (i) => arr[i] || null;
|
||||
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
||||
arr.refresh = () => {};
|
||||
return arr;
|
||||
},
|
||||
})`,
|
||||
|
||||
// Populate navigator.mimeTypes to match the fake plugins above.
|
||||
`Object.defineProperty(navigator, 'mimeTypes', {
|
||||
get: () => {
|
||||
const arr = [
|
||||
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
|
||||
];
|
||||
arr.item = (i) => arr[i] || null;
|
||||
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
|
||||
return arr;
|
||||
},
|
||||
})`,
|
||||
|
||||
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
|
||||
`if (!window.chrome) {
|
||||
window.chrome = { runtime: {} };
|
||||
}`,
|
||||
|
||||
// Fix outerWidth/outerHeight which are 0 in headless mode.
|
||||
`if (window.outerWidth === 0) {
|
||||
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
|
||||
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
|
||||
}`,
|
||||
}
|
||||
72
stealth_test.go
Normal file
72
stealth_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMergeOptions_StealthDefault(t *testing.T) {
|
||||
base := BrowserOptions{Stealth: Bool(true)}
|
||||
got := mergeOptions(base, nil)
|
||||
if got.Stealth == nil || !*got.Stealth {
|
||||
t.Fatal("expected stealth to default to true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeOptions_StealthOverrideFalse(t *testing.T) {
|
||||
base := BrowserOptions{Stealth: Bool(true)}
|
||||
got := mergeOptions(base, []BrowserOptions{{Stealth: Bool(false)}})
|
||||
if got.Stealth == nil || *got.Stealth {
|
||||
t.Fatal("expected stealth to be overridden to false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeOptions_LaunchArgsAppend(t *testing.T) {
|
||||
base := BrowserOptions{LaunchArgs: []string{"--arg1"}}
|
||||
got := mergeOptions(base, []BrowserOptions{{LaunchArgs: []string{"--arg2", "--arg3"}}})
|
||||
if len(got.LaunchArgs) != 3 {
|
||||
t.Fatalf("expected 3 launch args, got %d", len(got.LaunchArgs))
|
||||
}
|
||||
if got.LaunchArgs[0] != "--arg1" || got.LaunchArgs[1] != "--arg2" || got.LaunchArgs[2] != "--arg3" {
|
||||
t.Fatalf("unexpected launch args: %v", got.LaunchArgs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeOptions_InitScriptsAppend(t *testing.T) {
|
||||
base := BrowserOptions{InitScripts: []string{"script1"}}
|
||||
got := mergeOptions(base, []BrowserOptions{{InitScripts: []string{"script2"}}})
|
||||
if len(got.InitScripts) != 2 {
|
||||
t.Fatalf("expected 2 init scripts, got %d", len(got.InitScripts))
|
||||
}
|
||||
if got.InitScripts[0] != "script1" || got.InitScripts[1] != "script2" {
|
||||
t.Fatalf("unexpected init scripts: %v", got.InitScripts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeOptions_StealthNilDoesNotOverride(t *testing.T) {
|
||||
base := BrowserOptions{Stealth: Bool(true)}
|
||||
got := mergeOptions(base, []BrowserOptions{{Stealth: nil}})
|
||||
if got.Stealth == nil || !*got.Stealth {
|
||||
t.Fatal("expected stealth to remain true when override is nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStealthChromiumArgs(t *testing.T) {
|
||||
if len(stealthChromiumArgs) == 0 {
|
||||
t.Fatal("expected at least one chromium stealth arg")
|
||||
}
|
||||
found := false
|
||||
for _, arg := range stealthChromiumArgs {
|
||||
if arg == "--disable-blink-features=AutomationControlled" {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatal("expected --disable-blink-features=AutomationControlled in stealth chromium args")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStealthInitScripts(t *testing.T) {
|
||||
if len(stealthInitScripts) == 0 {
|
||||
t.Fatal("expected at least one stealth init script")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user