feature: add stealth mode, launch args, and init scripts to BrowserOptions
Add anti-bot-detection evasion support to reduce blocking by sites like archive.ph. Stealth mode is enabled by default for all browsers and applies common evasions: navigator.webdriver override, plugin/mimeType spoofing, window.chrome stub, and outerWidth/outerHeight fixes. For Chromium, --disable-blink-features=AutomationControlled is also added. New BrowserOptions fields: - Stealth *bool: toggle stealth presets (default true) - LaunchArgs []string: custom browser launch arguments - InitScripts []string: JavaScript injected before page scripts Closes #56 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
50
stealth.go
Normal file
50
stealth.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package extractor
|
||||
|
||||
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
|
||||
var stealthChromiumArgs = []string{
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
}
|
||||
|
||||
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
|
||||
var stealthInitScripts = []string{
|
||||
// Override navigator.webdriver to return undefined (the real-browser value).
|
||||
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
|
||||
|
||||
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
|
||||
`Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const arr = [
|
||||
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
|
||||
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
];
|
||||
arr.item = (i) => arr[i] || null;
|
||||
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
||||
arr.refresh = () => {};
|
||||
return arr;
|
||||
},
|
||||
})`,
|
||||
|
||||
// Populate navigator.mimeTypes to match the fake plugins above.
|
||||
`Object.defineProperty(navigator, 'mimeTypes', {
|
||||
get: () => {
|
||||
const arr = [
|
||||
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
|
||||
];
|
||||
arr.item = (i) => arr[i] || null;
|
||||
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
|
||||
return arr;
|
||||
},
|
||||
})`,
|
||||
|
||||
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
|
||||
`if (!window.chrome) {
|
||||
window.chrome = { runtime: {} };
|
||||
}`,
|
||||
|
||||
// Fix outerWidth/outerHeight which are 0 in headless mode.
|
||||
`if (window.outerWidth === 0) {
|
||||
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
|
||||
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
|
||||
}`,
|
||||
}
|
||||
Reference in New Issue
Block a user