Add anti-bot-detection evasion support to reduce blocking by sites like archive.ph. Stealth mode is enabled by default for all browsers and applies common evasions: navigator.webdriver override, plugin/mimeType spoofing, window.chrome stub, and outerWidth/outerHeight fixes. For Chromium, --disable-blink-features=AutomationControlled is also added. New BrowserOptions fields: - Stealth *bool: toggle stealth presets (default true) - LaunchArgs []string: custom browser launch arguments - InitScripts []string: JavaScript injected before page scripts Closes #56 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
51 lines
1.8 KiB
Go
51 lines
1.8 KiB
Go
package extractor
|
|
|
|
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
|
|
var stealthChromiumArgs = []string{
|
|
"--disable-blink-features=AutomationControlled",
|
|
}
|
|
|
|
// stealthInitScripts are JavaScript snippets injected before page scripts to mask automation signals.
|
|
var stealthInitScripts = []string{
|
|
// Override navigator.webdriver to return undefined (the real-browser value).
|
|
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
|
|
|
|
// Populate navigator.plugins with a realistic entry so plugins.length > 0.
|
|
`Object.defineProperty(navigator, 'plugins', {
|
|
get: () => {
|
|
const arr = [
|
|
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
|
|
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
|
];
|
|
arr.item = (i) => arr[i] || null;
|
|
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
|
arr.refresh = () => {};
|
|
return arr;
|
|
},
|
|
})`,
|
|
|
|
// Populate navigator.mimeTypes to match the fake plugins above.
|
|
`Object.defineProperty(navigator, 'mimeTypes', {
|
|
get: () => {
|
|
const arr = [
|
|
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
|
|
];
|
|
arr.item = (i) => arr[i] || null;
|
|
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
|
|
return arr;
|
|
},
|
|
})`,
|
|
|
|
// Provide window.chrome runtime stub (Chromium-only signal; harmless on other engines).
|
|
`if (!window.chrome) {
|
|
window.chrome = { runtime: {} };
|
|
}`,
|
|
|
|
// Fix outerWidth/outerHeight which are 0 in headless mode.
|
|
`if (window.outerWidth === 0) {
|
|
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
|
|
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
|
|
}`,
|
|
}
|