feature: add stealth mode, launch args, and init scripts to BrowserOptions
All checks were successful
CI / vet (pull_request) Successful in 1m18s
CI / build (pull_request) Successful in 1m22s
CI / test (pull_request) Successful in 1m28s

Add anti-bot-detection evasion support to reduce blocking by sites like
archive.ph. Stealth mode is enabled by default for all browsers and applies
common evasions: navigator.webdriver override, plugin/mimeType spoofing,
window.chrome stub, and outerWidth/outerHeight fixes. For Chromium,
--disable-blink-features=AutomationControlled is also added.

New BrowserOptions fields:
- Stealth *bool: toggle stealth presets (default true)
- LaunchArgs []string: custom browser launch arguments
- InitScripts []string: JavaScript injected before page scripts

Closes #56

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-17 20:10:58 +00:00
parent e8f4d64eb9
commit e94665ff25
6 changed files with 183 additions and 2 deletions

50
stealth.go Normal file
View File

@@ -0,0 +1,50 @@
package extractor
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
var stealthChromiumArgs = []string{
"--disable-blink-features=AutomationControlled",
}
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
var stealthInitScripts = []string{
// Override navigator.webdriver to return undefined (the real-browser value).
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
`Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = [
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
];
arr.item = (i) => arr[i] || null;
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
arr.refresh = () => {};
return arr;
},
})`,
// Populate navigator.mimeTypes to match the fake plugins above.
`Object.defineProperty(navigator, 'mimeTypes', {
get: () => {
const arr = [
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
];
arr.item = (i) => arr[i] || null;
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
return arr;
},
})`,
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
`if (!window.chrome) {
window.chrome = { runtime: {} };
}`,
// Fix outerWidth/outerHeight which are 0 in headless mode.
`if (window.outerWidth === 0) {
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
}`,
}