Replace static stealthChromiumScripts and stealthFirefoxScripts slices with builder functions that accept hardware profile structs. Each browser session now randomly selects from a pool of 6 realistic profiles per engine, and Chromium connection stats receive per-session jitter (±20ms RTT, ±2 Mbps downlink). This prevents anti-bot systems from correlating sessions via identical WebGL, connection, mozInnerScreen, and hardwareConcurrency fingerprints. Closes #71 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
272 lines
10 KiB
Go
272 lines
10 KiB
Go
package extractor
|
|
|
|
import (
|
|
"fmt"
|
|
"math/rand/v2"
|
|
)
|
|
|
|
// stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers.
|
|
var stealthChromiumArgs = []string{
|
|
"--disable-blink-features=AutomationControlled",
|
|
}
|
|
|
|
// stealthCommonScripts are JavaScript snippets injected before page scripts on all browser engines.
|
|
var stealthCommonScripts = []string{
|
|
// Override navigator.webdriver to return undefined (the real-browser value).
|
|
`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`,
|
|
|
|
// Fix outerWidth/outerHeight which are 0 in headless mode.
|
|
`if (window.outerWidth === 0) {
|
|
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
|
|
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight });
|
|
}`,
|
|
|
|
// Override navigator.permissions.query to return "denied" for notifications.
|
|
`(function() {
|
|
if (navigator.permissions && navigator.permissions.query) {
|
|
const origQuery = navigator.permissions.query.bind(navigator.permissions);
|
|
navigator.permissions.query = function(desc) {
|
|
if (desc && desc.name === 'notifications') {
|
|
return Promise.resolve({ state: 'denied', onchange: null });
|
|
}
|
|
return origQuery(desc);
|
|
};
|
|
}
|
|
})()`,
|
|
|
|
// Stub Notification constructor if missing (headless may lack it).
|
|
`(function() {
|
|
if (typeof Notification === 'undefined') {
|
|
window.Notification = function() {};
|
|
Notification.permission = 'denied';
|
|
Notification.requestPermission = function() { return Promise.resolve('denied'); };
|
|
}
|
|
})()`,
|
|
}
|
|
|
|
// chromiumHWProfile holds hardware fingerprint values for a Chromium browser session.
|
|
type chromiumHWProfile struct {
|
|
WebGLVendor string
|
|
WebGLRenderer string
|
|
ConnRTT int // base RTT in ms (jittered ±20 per session)
|
|
ConnDownlink float64 // base downlink in Mbps (jittered ±2 per session)
|
|
}
|
|
|
|
// chromiumHWProfiles is a pool of realistic Chromium hardware profiles.
|
|
// Index 0 matches the original hardcoded values.
|
|
var chromiumHWProfiles = []chromiumHWProfile{
|
|
{"Google Inc. (Intel)", "ANGLE (Intel, Intel(R) UHD Graphics 630, OpenGL 4.5)", 50, 10},
|
|
{"Google Inc. (NVIDIA)", "ANGLE (NVIDIA, NVIDIA GeForce GTX 1660 SUPER, D3D11)", 30, 25},
|
|
{"Google Inc. (AMD)", "ANGLE (AMD, AMD Radeon RX 580, D3D11)", 100, 5},
|
|
{"Google Inc. (Intel)", "ANGLE (Intel, Intel(R) UHD Graphics 770, OpenGL 4.5)", 50, 10},
|
|
{"Google Inc. (NVIDIA)", "ANGLE (NVIDIA, NVIDIA GeForce RTX 3060, D3D11)", 25, 50},
|
|
{"Google Inc. (Intel)", "ANGLE (Intel, Intel(R) Iris Xe Graphics, D3D11)", 75, 8},
|
|
}
|
|
|
|
// randomChromiumProfile returns a randomly selected Chromium hardware profile.
|
|
func randomChromiumProfile() chromiumHWProfile {
|
|
return chromiumHWProfiles[rand.IntN(len(chromiumHWProfiles))]
|
|
}
|
|
|
|
// buildChromiumStealthScripts returns Chromium stealth init scripts with the given hardware profile
|
|
// values templated into the WebGL and connection spoofing scripts. Connection RTT and downlink
|
|
// receive per-session jitter (±20ms RTT, ±2 Mbps downlink).
|
|
func buildChromiumStealthScripts(p chromiumHWProfile) []string {
|
|
// Apply jitter to connection stats.
|
|
rtt := p.ConnRTT + rand.IntN(41) - 20 // ±20ms
|
|
if rtt < 0 {
|
|
rtt = 0
|
|
}
|
|
downlink := p.ConnDownlink + (rand.Float64()*4 - 2) // ±2 Mbps
|
|
if downlink < 0.5 {
|
|
downlink = 0.5
|
|
}
|
|
|
|
return []string{
|
|
// Populate navigator.plugins with realistic Chromium entries so plugins.length > 0.
|
|
`Object.defineProperty(navigator, 'plugins', {
|
|
get: () => {
|
|
const arr = [
|
|
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: '' },
|
|
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
|
];
|
|
arr.item = (i) => arr[i] || null;
|
|
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
|
arr.refresh = () => {};
|
|
return arr;
|
|
},
|
|
})`,
|
|
|
|
// Populate navigator.mimeTypes to match the fake Chromium plugins above.
|
|
`Object.defineProperty(navigator, 'mimeTypes', {
|
|
get: () => {
|
|
const arr = [
|
|
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' },
|
|
];
|
|
arr.item = (i) => arr[i] || null;
|
|
arr.namedItem = (n) => arr.find(m => m.type === n) || null;
|
|
return arr;
|
|
},
|
|
})`,
|
|
|
|
// Provide window.chrome runtime stub (Chromium-only signal).
|
|
`if (!window.chrome) {
|
|
window.chrome = { runtime: {} };
|
|
}`,
|
|
|
|
// Add chrome.app, chrome.csi, and chrome.loadTimes stubs missing in headless.
|
|
`(function() {
|
|
if (!window.chrome) window.chrome = {};
|
|
if (!window.chrome.app) {
|
|
window.chrome.app = { isInstalled: false, InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' }, RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' } };
|
|
}
|
|
if (!window.chrome.csi) {
|
|
window.chrome.csi = function() { return { startE: Date.now(), onloadT: Date.now(), pageT: 0, tran: 15 }; };
|
|
}
|
|
if (!window.chrome.loadTimes) {
|
|
window.chrome.loadTimes = function() { return { commitLoadTime: Date.now() / 1000, connectionInfo: 'h2', finishDocumentLoadTime: Date.now() / 1000, finishLoadTime: Date.now() / 1000, firstPaintAfterLoadTime: 0, firstPaintTime: Date.now() / 1000, navigationType: 'Other', npnNegotiatedProtocol: 'h2', requestTime: Date.now() / 1000, startLoadTime: Date.now() / 1000, wasAlternateProtocolAvailable: false, wasFetchedViaSpdy: true, wasNpnNegotiated: true }; };
|
|
}
|
|
})()`,
|
|
|
|
// Spoof WebGL renderer to hide SwiftShader (headless GPU) fingerprint with Chromium ANGLE strings.
|
|
fmt.Sprintf(`(function() {
|
|
const getParam = WebGLRenderingContext.prototype.getParameter;
|
|
WebGLRenderingContext.prototype.getParameter = function(param) {
|
|
if (param === 37445) return '%s';
|
|
if (param === 37446) return '%s';
|
|
return getParam.call(this, param);
|
|
};
|
|
if (typeof WebGL2RenderingContext !== 'undefined') {
|
|
const getParam2 = WebGL2RenderingContext.prototype.getParameter;
|
|
WebGL2RenderingContext.prototype.getParameter = function(param) {
|
|
if (param === 37445) return '%s';
|
|
if (param === 37446) return '%s';
|
|
return getParam2.call(this, param);
|
|
};
|
|
}
|
|
})()`, p.WebGLVendor, p.WebGLRenderer, p.WebGLVendor, p.WebGLRenderer),
|
|
|
|
// Stub navigator.connection (Network Information API) if missing (Chrome-only API).
|
|
fmt.Sprintf(`(function() {
|
|
if (!navigator.connection) {
|
|
Object.defineProperty(navigator, 'connection', {
|
|
get: function() {
|
|
return { effectiveType: '4g', rtt: %d, downlink: %.1f, saveData: false, onchange: null };
|
|
},
|
|
});
|
|
}
|
|
})()`, rtt, downlink),
|
|
|
|
// Remove CDP artifacts (window.cdc_* globals injected by Chrome DevTools Protocol).
|
|
`(function() {
|
|
for (var key in window) {
|
|
if (key.match(/^cdc_/)) {
|
|
delete window[key];
|
|
}
|
|
}
|
|
})()`,
|
|
|
|
// Strip "HeadlessChrome" from navigator.userAgent if present.
|
|
`(function() {
|
|
var ua = navigator.userAgent;
|
|
if (ua.indexOf('HeadlessChrome') !== -1) {
|
|
Object.defineProperty(navigator, 'userAgent', {
|
|
get: function() { return ua.replace('HeadlessChrome', 'Chrome'); },
|
|
});
|
|
}
|
|
})()`,
|
|
}
|
|
}
|
|
|
|
// firefoxHWProfile holds hardware fingerprint values for a Firefox browser session.
|
|
type firefoxHWProfile struct {
|
|
WebGLVendor string
|
|
WebGLRenderer string
|
|
MozInnerScreenX int
|
|
MozInnerScreenY int
|
|
HardwareConcurrency int
|
|
}
|
|
|
|
// firefoxHWProfiles is a pool of realistic Firefox hardware profiles.
|
|
// Index 0 matches the original hardcoded values.
|
|
var firefoxHWProfiles = []firefoxHWProfile{
|
|
{"Intel Open Source Technology Center", "Mesa DRI Intel(R) UHD Graphics 630", 8, 51, 4},
|
|
{"Intel Open Source Technology Center", "Mesa DRI Intel(R) HD Graphics 530", 0, 71, 8},
|
|
{"X.Org", "AMD Radeon RX 580 (polaris10, LLVM 15.0.7, DRM 3.49, 6.1.0-18-amd64)", 8, 51, 8},
|
|
{"Intel Open Source Technology Center", "Mesa DRI Intel(R) UHD Graphics 770", 0, 51, 16},
|
|
{"nouveau", "NV167", 8, 71, 4},
|
|
{"Intel", "Mesa Intel(R) Iris(R) Xe Graphics", 0, 51, 8},
|
|
}
|
|
|
|
// randomFirefoxProfile returns a randomly selected Firefox hardware profile.
|
|
func randomFirefoxProfile() firefoxHWProfile {
|
|
return firefoxHWProfiles[rand.IntN(len(firefoxHWProfiles))]
|
|
}
|
|
|
|
// buildFirefoxStealthScripts returns Firefox stealth init scripts with the given hardware profile
|
|
// values templated into the WebGL, mozInnerScreen, and hardwareConcurrency spoofing scripts.
|
|
func buildFirefoxStealthScripts(p firefoxHWProfile) []string {
|
|
return []string{
|
|
// Harden navigator.webdriver for Firefox: ensure Object.getOwnPropertyDescriptor also returns undefined.
|
|
`(function() {
|
|
const proto = Object.getPrototypeOf(navigator);
|
|
const origGetOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
Object.getOwnPropertyDescriptor = function(obj, prop) {
|
|
if ((obj === navigator || obj === proto) && prop === 'webdriver') {
|
|
return undefined;
|
|
}
|
|
return origGetOwnPropDesc.call(this, obj, prop);
|
|
};
|
|
})()`,
|
|
|
|
// Spoof WebGL renderer with Firefox-appropriate Mesa/driver strings.
|
|
fmt.Sprintf(`(function() {
|
|
const getParam = WebGLRenderingContext.prototype.getParameter;
|
|
WebGLRenderingContext.prototype.getParameter = function(param) {
|
|
if (param === 37445) return '%s';
|
|
if (param === 37446) return '%s';
|
|
return getParam.call(this, param);
|
|
};
|
|
if (typeof WebGL2RenderingContext !== 'undefined') {
|
|
const getParam2 = WebGL2RenderingContext.prototype.getParameter;
|
|
WebGL2RenderingContext.prototype.getParameter = function(param) {
|
|
if (param === 37445) return '%s';
|
|
if (param === 37446) return '%s';
|
|
return getParam2.call(this, param);
|
|
};
|
|
}
|
|
})()`, p.WebGLVendor, p.WebGLRenderer, p.WebGLVendor, p.WebGLRenderer),
|
|
|
|
// Spoof mozInnerScreenX/mozInnerScreenY which are 0 in headless Firefox.
|
|
fmt.Sprintf(`(function() {
|
|
if (window.mozInnerScreenX === 0) {
|
|
Object.defineProperty(window, 'mozInnerScreenX', { get: () => %d });
|
|
}
|
|
if (window.mozInnerScreenY === 0) {
|
|
Object.defineProperty(window, 'mozInnerScreenY', { get: () => %d });
|
|
}
|
|
})()`, p.MozInnerScreenX, p.MozInnerScreenY),
|
|
|
|
// Normalize navigator.hardwareConcurrency (Firefox headless sometimes reports 2).
|
|
fmt.Sprintf(`(function() {
|
|
if (navigator.hardwareConcurrency <= 2) {
|
|
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => %d });
|
|
}
|
|
})()`, p.HardwareConcurrency),
|
|
|
|
// Override navigator.plugins with Firefox-appropriate PDF.js entry.
|
|
`Object.defineProperty(navigator, 'plugins', {
|
|
get: () => {
|
|
const arr = [
|
|
{ name: 'PDF.js', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
];
|
|
arr.item = (i) => arr[i] || null;
|
|
arr.namedItem = (n) => arr.find(p => p.name === n) || null;
|
|
arr.refresh = () => {};
|
|
return arr;
|
|
},
|
|
})`,
|
|
}
|
|
}
|