Merge pull request 'feature: add stealth mode and launch args/init scripts' (#57) from feature/stealth-mode into main
This commit was merged in pull request #57.
This commit is contained in:
@@ -52,6 +52,21 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
|||||||
return nil, ErrInvalidBrowserSelection
|
return nil, ErrInvalidBrowserSelection
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect launch args and init scripts, starting with any stealth-mode presets.
|
||||||
|
stealth := opt.Stealth == nil || *opt.Stealth
|
||||||
|
var launchArgs []string
|
||||||
|
var initScripts []string
|
||||||
|
|
||||||
|
if stealth {
|
||||||
|
if opt.Browser == BrowserChromium {
|
||||||
|
launchArgs = append(launchArgs, stealthChromiumArgs...)
|
||||||
|
}
|
||||||
|
initScripts = append(initScripts, stealthInitScripts...)
|
||||||
|
}
|
||||||
|
|
||||||
|
launchArgs = append(launchArgs, opt.LaunchArgs...)
|
||||||
|
initScripts = append(initScripts, opt.InitScripts...)
|
||||||
|
|
||||||
var browser playwright.Browser
|
var browser playwright.Browser
|
||||||
launch := true
|
launch := true
|
||||||
|
|
||||||
@@ -71,9 +86,13 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
|||||||
|
|
||||||
if launch {
|
if launch {
|
||||||
headless := opt.ShowBrowser == nil || !*opt.ShowBrowser
|
headless := opt.ShowBrowser == nil || !*opt.ShowBrowser
|
||||||
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
launchOpts := playwright.BrowserTypeLaunchOptions{
|
||||||
Headless: playwright.Bool(headless),
|
Headless: playwright.Bool(headless),
|
||||||
})
|
}
|
||||||
|
if len(launchArgs) > 0 {
|
||||||
|
launchOpts.Args = launchArgs
|
||||||
|
}
|
||||||
|
browser, err = bt.Launch(launchOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
||||||
}
|
}
|
||||||
@@ -103,6 +122,12 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
|||||||
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, script := range initScripts {
|
||||||
|
if err := bctx.AddInitScript(playwright.Script{Content: &script}); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to add init script: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if opt.CookieJar != nil {
|
if opt.CookieJar != nil {
|
||||||
cookies, err := opt.CookieJar.GetAll()
|
cookies, err := opt.CookieJar.GetAll()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -158,6 +183,15 @@ func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
|
|||||||
if o.ShowBrowser != nil {
|
if o.ShowBrowser != nil {
|
||||||
base.ShowBrowser = o.ShowBrowser
|
base.ShowBrowser = o.ShowBrowser
|
||||||
}
|
}
|
||||||
|
if len(o.LaunchArgs) > 0 {
|
||||||
|
base.LaunchArgs = append(base.LaunchArgs, o.LaunchArgs...)
|
||||||
|
}
|
||||||
|
if len(o.InitScripts) > 0 {
|
||||||
|
base.InitScripts = append(base.InitScripts, o.InitScripts...)
|
||||||
|
}
|
||||||
|
if o.Stealth != nil {
|
||||||
|
base.Stealth = o.Stealth
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return base
|
return base
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ var Flags = BrowserFlags{
|
|||||||
Usage: "If set, the browser will be visible, if not set, the browser will be headless",
|
Usage: "If set, the browser will be visible, if not set, the browser will be headless",
|
||||||
DefaultText: "false",
|
DefaultText: "false",
|
||||||
},
|
},
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "no-stealth",
|
||||||
|
Usage: "Disable stealth mode (anti-bot-detection evasions are enabled by default)",
|
||||||
|
DefaultText: "false",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
||||||
@@ -74,5 +79,9 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
|
|||||||
opts.ShowBrowser = extractor.Bool(cmd.Bool("visible"))
|
opts.ShowBrowser = extractor.Bool(cmd.Bool("visible"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cmd.IsSet("no-stealth") && cmd.Bool("no-stealth") {
|
||||||
|
opts.Stealth = extractor.Bool(false)
|
||||||
|
}
|
||||||
|
|
||||||
return extractor.NewBrowser(ctx, opts)
|
return extractor.NewBrowser(ctx, opts)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
|
|||||||
UserAgent: DefaultUserAgent,
|
UserAgent: DefaultUserAgent,
|
||||||
Browser: BrowserFirefox,
|
Browser: BrowserFirefox,
|
||||||
Timeout: &thirtySeconds,
|
Timeout: &thirtySeconds,
|
||||||
|
Stealth: Bool(true),
|
||||||
Dimensions: Size{
|
Dimensions: Size{
|
||||||
Width: 1280,
|
Width: 1280,
|
||||||
Height: 720,
|
Height: 720,
|
||||||
|
|||||||
@@ -70,6 +70,20 @@ type BrowserOptions struct {
|
|||||||
|
|
||||||
// UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
|
// UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
|
||||||
UseLocalOnly bool
|
UseLocalOnly bool
|
||||||
|
|
||||||
|
// LaunchArgs are additional command-line arguments passed to the browser process.
|
||||||
|
// For example: []string{"--disable-blink-features=AutomationControlled"}
|
||||||
|
LaunchArgs []string
|
||||||
|
|
||||||
|
// InitScripts are JavaScript snippets injected into every new browser context
|
||||||
|
// before any page scripts run. Useful for overriding detectable properties like
|
||||||
|
// navigator.webdriver.
|
||||||
|
InitScripts []string
|
||||||
|
|
||||||
|
// Stealth enables anti-bot-detection measures. When non-nil and true, common
|
||||||
|
// evasions are applied automatically (launch args + init scripts). When nil,
|
||||||
|
// defaults to true in NewBrowser / NewInteractiveBrowser.
|
||||||
|
Stealth *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute {
|
func sameSiteToPlaywright(s SameSite) *playwright.SameSiteAttribute {
|
||||||
@@ -134,6 +148,7 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
|
|||||||
UserAgent: DefaultUserAgent,
|
UserAgent: DefaultUserAgent,
|
||||||
Browser: BrowserFirefox,
|
Browser: BrowserFirefox,
|
||||||
Timeout: &thirtySeconds,
|
Timeout: &thirtySeconds,
|
||||||
|
Stealth: Bool(true),
|
||||||
}, opts)
|
}, opts)
|
||||||
|
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
|
|||||||
50
stealth.go
Normal file
50
stealth.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
|
||||||
|
var stealthChromiumArgs = []string{
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
}
|
||||||
|
|
||||||
|
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
|
||||||
|
var stealthInitScripts = []string{
|
||||||
|
// Override navigator.webdriver to return undefined (the real-browser value).
|
||||||
|
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
|
||||||
|
|
||||||
|
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
|
||||||
|
`Object.defineProperty(navigator, 'plugins', {
|
||||||
|
get: () => {
|
||||||
|
const arr = [
|
||||||
|
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||||
|
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
|
||||||
|
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||||
|
];
|
||||||
|
arr.item = (i) => arr[i] || null;
|
||||||
|
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
||||||
|
arr.refresh = () => {};
|
||||||
|
return arr;
|
||||||
|
},
|
||||||
|
})`,
|
||||||
|
|
||||||
|
// Populate navigator.mimeTypes to match the fake plugins above.
|
||||||
|
`Object.defineProperty(navigator, 'mimeTypes', {
|
||||||
|
get: () => {
|
||||||
|
const arr = [
|
||||||
|
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
|
||||||
|
];
|
||||||
|
arr.item = (i) => arr[i] || null;
|
||||||
|
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
|
||||||
|
return arr;
|
||||||
|
},
|
||||||
|
})`,
|
||||||
|
|
||||||
|
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
|
||||||
|
`if (!window.chrome) {
|
||||||
|
window.chrome = { runtime: {} };
|
||||||
|
}`,
|
||||||
|
|
||||||
|
// Fix outerWidth/outerHeight which are 0 in headless mode.
|
||||||
|
`if (window.outerWidth === 0) {
|
||||||
|
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
|
||||||
|
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
|
||||||
|
}`,
|
||||||
|
}
|
||||||
72
stealth_test.go
Normal file
72
stealth_test.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMergeOptions_StealthDefault(t *testing.T) {
|
||||||
|
base := BrowserOptions{Stealth: Bool(true)}
|
||||||
|
got := mergeOptions(base, nil)
|
||||||
|
if got.Stealth == nil || !*got.Stealth {
|
||||||
|
t.Fatal("expected stealth to default to true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeOptions_StealthOverrideFalse(t *testing.T) {
|
||||||
|
base := BrowserOptions{Stealth: Bool(true)}
|
||||||
|
got := mergeOptions(base, []BrowserOptions{{Stealth: Bool(false)}})
|
||||||
|
if got.Stealth == nil || *got.Stealth {
|
||||||
|
t.Fatal("expected stealth to be overridden to false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeOptions_LaunchArgsAppend(t *testing.T) {
|
||||||
|
base := BrowserOptions{LaunchArgs: []string{"--arg1"}}
|
||||||
|
got := mergeOptions(base, []BrowserOptions{{LaunchArgs: []string{"--arg2", "--arg3"}}})
|
||||||
|
if len(got.LaunchArgs) != 3 {
|
||||||
|
t.Fatalf("expected 3 launch args, got %d", len(got.LaunchArgs))
|
||||||
|
}
|
||||||
|
if got.LaunchArgs[0] != "--arg1" || got.LaunchArgs[1] != "--arg2" || got.LaunchArgs[2] != "--arg3" {
|
||||||
|
t.Fatalf("unexpected launch args: %v", got.LaunchArgs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeOptions_InitScriptsAppend(t *testing.T) {
|
||||||
|
base := BrowserOptions{InitScripts: []string{"script1"}}
|
||||||
|
got := mergeOptions(base, []BrowserOptions{{InitScripts: []string{"script2"}}})
|
||||||
|
if len(got.InitScripts) != 2 {
|
||||||
|
t.Fatalf("expected 2 init scripts, got %d", len(got.InitScripts))
|
||||||
|
}
|
||||||
|
if got.InitScripts[0] != "script1" || got.InitScripts[1] != "script2" {
|
||||||
|
t.Fatalf("unexpected init scripts: %v", got.InitScripts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeOptions_StealthNilDoesNotOverride(t *testing.T) {
|
||||||
|
base := BrowserOptions{Stealth: Bool(true)}
|
||||||
|
got := mergeOptions(base, []BrowserOptions{{Stealth: nil}})
|
||||||
|
if got.Stealth == nil || !*got.Stealth {
|
||||||
|
t.Fatal("expected stealth to remain true when override is nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStealthChromiumArgs(t *testing.T) {
|
||||||
|
if len(stealthChromiumArgs) == 0 {
|
||||||
|
t.Fatal("expected at least one chromium stealth arg")
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, arg := range stealthChromiumArgs {
|
||||||
|
if arg == "--disable-blink-features=AutomationControlled" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatal("expected --disable-blink-features=AutomationControlled in stealth chromium args")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStealthInitScripts(t *testing.T) {
|
||||||
|
if len(stealthInitScripts) == 0 {
|
||||||
|
t.Fatal("expected at least one stealth init script")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user