Files
go-extractor/stealth_test.go
Steve Dudenhoeffer 6647e4f63d
All checks were successful
CI / vet (pull_request) Successful in 1m6s
CI / build (pull_request) Successful in 1m11s
CI / test (pull_request) Successful in 1m11s
fix: set default viewport for NewBrowser and align User-Agent with engine
NewBrowser previously had no viewport (strong headless signal) and used a
Firefox User-Agent unconditionally, even for Chromium instances (detectable
mismatch).

Add per-engine UA constants (DefaultFirefoxUserAgent, DefaultChromiumUserAgent)
and auto-select the matching UA in initBrowser when the caller hasn't set one
explicitly. Keep DefaultUserAgent as a backward-compatible alias.

Add 1920x1080 default viewport to NewBrowser (most common desktop resolution).
NewInteractiveBrowser keeps its existing 1280x720 default but also gains
engine-aware UA selection.

Closes #70

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 01:28:09 +00:00

442 lines
12 KiB
Go

package extractor
import (
"strings"
"testing"
)
func TestMergeOptions_StealthDefault(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, nil)
if got.Stealth == nil || !*got.Stealth {
t.Fatal("expected stealth to default to true")
}
}
func TestMergeOptions_StealthOverrideFalse(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, []BrowserOptions{{Stealth: Bool(false)}})
if got.Stealth == nil || *got.Stealth {
t.Fatal("expected stealth to be overridden to false")
}
}
func TestMergeOptions_LaunchArgsAppend(t *testing.T) {
base := BrowserOptions{LaunchArgs: []string{"--arg1"}}
got := mergeOptions(base, []BrowserOptions{{LaunchArgs: []string{"--arg2", "--arg3"}}})
if len(got.LaunchArgs) != 3 {
t.Fatalf("expected 3 launch args, got %d", len(got.LaunchArgs))
}
if got.LaunchArgs[0] != "--arg1" || got.LaunchArgs[1] != "--arg2" || got.LaunchArgs[2] != "--arg3" {
t.Fatalf("unexpected launch args: %v", got.LaunchArgs)
}
}
func TestMergeOptions_InitScriptsAppend(t *testing.T) {
base := BrowserOptions{InitScripts: []string{"script1"}}
got := mergeOptions(base, []BrowserOptions{{InitScripts: []string{"script2"}}})
if len(got.InitScripts) != 2 {
t.Fatalf("expected 2 init scripts, got %d", len(got.InitScripts))
}
if got.InitScripts[0] != "script1" || got.InitScripts[1] != "script2" {
t.Fatalf("unexpected init scripts: %v", got.InitScripts)
}
}
func TestMergeOptions_StealthNilDoesNotOverride(t *testing.T) {
base := BrowserOptions{Stealth: Bool(true)}
got := mergeOptions(base, []BrowserOptions{{Stealth: nil}})
if got.Stealth == nil || !*got.Stealth {
t.Fatal("expected stealth to remain true when override is nil")
}
}
func TestStealthChromiumArgs(t *testing.T) {
if len(stealthChromiumArgs) == 0 {
t.Fatal("expected at least one chromium stealth arg")
}
found := false
for _, arg := range stealthChromiumArgs {
if arg == "--disable-blink-features=AutomationControlled" {
found = true
}
}
if !found {
t.Fatal("expected --disable-blink-features=AutomationControlled in stealth chromium args")
}
}
// --- Common scripts ---
func TestStealthCommonScripts_Count(t *testing.T) {
if len(stealthCommonScripts) != 4 {
t.Fatalf("expected 4 common stealth scripts, got %d", len(stealthCommonScripts))
}
}
func TestStealthCommonScripts_WebdriverOverride(t *testing.T) {
found := false
for _, s := range stealthCommonScripts {
if strings.Contains(s, "navigator") && strings.Contains(s, "webdriver") {
found = true
break
}
}
if !found {
t.Fatal("expected a common script that overrides navigator.webdriver")
}
}
func TestStealthCommonScripts_OuterDimensions(t *testing.T) {
found := false
for _, s := range stealthCommonScripts {
if strings.Contains(s, "outerWidth") && strings.Contains(s, "outerHeight") {
found = true
break
}
}
if !found {
t.Fatal("expected a common script that fixes outerWidth/outerHeight")
}
}
func TestStealthCommonScripts_PermissionsQuery(t *testing.T) {
found := false
for _, s := range stealthCommonScripts {
if strings.Contains(s, "permissions.query") && strings.Contains(s, "notifications") {
found = true
break
}
}
if !found {
t.Fatal("expected a common script that overrides permissions.query for notifications")
}
}
func TestStealthCommonScripts_Notification(t *testing.T) {
found := false
for _, s := range stealthCommonScripts {
if strings.Contains(s, "Notification") && strings.Contains(s, "requestPermission") {
found = true
break
}
}
if !found {
t.Fatal("expected a common script that stubs Notification constructor")
}
}
// --- Chromium scripts ---
func TestStealthChromiumScripts_Count(t *testing.T) {
if len(stealthChromiumScripts) != 8 {
t.Fatalf("expected 8 chromium stealth scripts, got %d", len(stealthChromiumScripts))
}
}
func TestStealthChromiumScripts_Plugins(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "Chrome PDF Plugin") && strings.Contains(s, "navigator") && strings.Contains(s, "plugins") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that populates navigator.plugins with Chrome entries")
}
}
func TestStealthChromiumScripts_MimeTypes(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "mimeTypes") && strings.Contains(s, "application/pdf") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that populates navigator.mimeTypes")
}
}
func TestStealthChromiumScripts_WindowChrome(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "window.chrome") && strings.Contains(s, "runtime") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that stubs window.chrome")
}
}
func TestStealthChromiumScripts_ChromeApp(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "chrome.app") && strings.Contains(s, "chrome.csi") && strings.Contains(s, "chrome.loadTimes") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that stubs chrome.app, chrome.csi, and chrome.loadTimes")
}
}
func TestStealthChromiumScripts_WebGLSpoof(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "37446") && strings.Contains(s, "ANGLE") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that spoofs WebGL renderer with ANGLE strings")
}
}
func TestStealthChromiumScripts_NavigatorConnection(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "connection") && strings.Contains(s, "effectiveType") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that stubs navigator.connection")
}
}
func TestStealthChromiumScripts_CDPCleanup(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "cdc_") && strings.Contains(s, "delete") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that cleans up CDP artifacts")
}
}
func TestStealthChromiumScripts_UserAgentStrip(t *testing.T) {
found := false
for _, s := range stealthChromiumScripts {
if strings.Contains(s, "HeadlessChrome") && strings.Contains(s, "userAgent") {
found = true
break
}
}
if !found {
t.Fatal("expected a chromium script that strips HeadlessChrome from user agent")
}
}
// --- Firefox scripts ---
func TestStealthFirefoxScripts_Count(t *testing.T) {
if len(stealthFirefoxScripts) != 5 {
t.Fatalf("expected 5 firefox stealth scripts, got %d", len(stealthFirefoxScripts))
}
}
func TestStealthFirefoxScripts_WebdriverHardening(t *testing.T) {
found := false
for _, s := range stealthFirefoxScripts {
if strings.Contains(s, "getOwnPropertyDescriptor") && strings.Contains(s, "webdriver") {
found = true
break
}
}
if !found {
t.Fatal("expected a firefox script that hardens navigator.webdriver via getOwnPropertyDescriptor")
}
}
func TestStealthFirefoxScripts_WebGLSpoof(t *testing.T) {
found := false
for _, s := range stealthFirefoxScripts {
if strings.Contains(s, "37446") && strings.Contains(s, "Mesa DRI") {
found = true
break
}
}
if !found {
t.Fatal("expected a firefox script that spoofs WebGL renderer with Mesa strings")
}
}
func TestStealthFirefoxScripts_MozInnerScreen(t *testing.T) {
found := false
for _, s := range stealthFirefoxScripts {
if strings.Contains(s, "mozInnerScreenX") && strings.Contains(s, "mozInnerScreenY") {
found = true
break
}
}
if !found {
t.Fatal("expected a firefox script that spoofs mozInnerScreenX/mozInnerScreenY")
}
}
func TestStealthFirefoxScripts_HardwareConcurrency(t *testing.T) {
found := false
for _, s := range stealthFirefoxScripts {
if strings.Contains(s, "hardwareConcurrency") {
found = true
break
}
}
if !found {
t.Fatal("expected a firefox script that normalizes navigator.hardwareConcurrency")
}
}
func TestStealthFirefoxScripts_PDFjsPlugins(t *testing.T) {
found := false
for _, s := range stealthFirefoxScripts {
if strings.Contains(s, "PDF.js") && strings.Contains(s, "plugins") {
found = true
break
}
}
if !found {
t.Fatal("expected a firefox script that provides PDF.js plugin entry")
}
}
// --- Cross-category validation ---
func TestStealthScripts_NoOverlap(t *testing.T) {
all := make(map[string]string) // script -> category
for _, s := range stealthCommonScripts {
all[s] = "common"
}
for _, s := range stealthChromiumScripts {
if cat, ok := all[s]; ok {
t.Fatalf("chromium script also appears in %s category", cat)
}
all[s] = "chromium"
}
for _, s := range stealthFirefoxScripts {
if cat, ok := all[s]; ok {
t.Fatalf("firefox script also appears in %s category", cat)
}
}
}
func TestStealthCommonScripts_NoChromiumMarkers(t *testing.T) {
chromiumMarkers := []string{"window.chrome", "chrome.app", "chrome.csi", "chrome.loadTimes", "HeadlessChrome", "cdc_", "Chrome PDF Plugin", "ANGLE"}
for _, s := range stealthCommonScripts {
for _, marker := range chromiumMarkers {
if strings.Contains(s, marker) {
t.Fatalf("common script contains Chromium-specific marker %q", marker)
}
}
}
}
func TestStealthCommonScripts_NoFirefoxMarkers(t *testing.T) {
firefoxMarkers := []string{"mozInnerScreen", "Mesa DRI", "PDF.js"}
for _, s := range stealthCommonScripts {
for _, marker := range firefoxMarkers {
if strings.Contains(s, marker) {
t.Fatalf("common script contains Firefox-specific marker %q", marker)
}
}
}
}
func TestStealthChromiumScripts_NoFirefoxMarkers(t *testing.T) {
firefoxMarkers := []string{"mozInnerScreen", "Mesa DRI", "PDF.js"}
for _, s := range stealthChromiumScripts {
for _, marker := range firefoxMarkers {
if strings.Contains(s, marker) {
t.Fatalf("chromium script contains Firefox-specific marker %q", marker)
}
}
}
}
func TestStealthFirefoxScripts_NoChromiumMarkers(t *testing.T) {
chromiumMarkers := []string{"window.chrome", "chrome.app", "chrome.csi", "chrome.loadTimes", "HeadlessChrome", "cdc_", "Chrome PDF Plugin", "ANGLE"}
for _, s := range stealthFirefoxScripts {
for _, marker := range chromiumMarkers {
if strings.Contains(s, marker) {
t.Fatalf("firefox script contains Chromium-specific marker %q", marker)
}
}
}
}
// --- User-Agent constants ---
func TestDefaultUserAgent_BackwardCompat(t *testing.T) {
if DefaultUserAgent != DefaultFirefoxUserAgent {
t.Fatal("DefaultUserAgent must equal DefaultFirefoxUserAgent for backward compatibility")
}
}
func TestDefaultFirefoxUserAgent_Content(t *testing.T) {
if !strings.Contains(DefaultFirefoxUserAgent, "Firefox") {
t.Fatal("DefaultFirefoxUserAgent must contain 'Firefox'")
}
if strings.Contains(DefaultFirefoxUserAgent, "Chrome") {
t.Fatal("DefaultFirefoxUserAgent must not contain 'Chrome'")
}
}
func TestDefaultChromiumUserAgent_Content(t *testing.T) {
if !strings.Contains(DefaultChromiumUserAgent, "Chrome") {
t.Fatal("DefaultChromiumUserAgent must contain 'Chrome'")
}
if strings.Contains(DefaultChromiumUserAgent, "Firefox") {
t.Fatal("DefaultChromiumUserAgent must not contain 'Firefox'")
}
}
// --- Viewport and UA defaults via mergeOptions ---
func TestMergeOptions_DefaultViewport(t *testing.T) {
base := BrowserOptions{
Dimensions: Size{Width: 1920, Height: 1080},
}
got := mergeOptions(base, nil)
if got.Dimensions.Width != 1920 || got.Dimensions.Height != 1080 {
t.Fatalf("expected default viewport 1920x1080, got %dx%d", got.Dimensions.Width, got.Dimensions.Height)
}
}
func TestMergeOptions_ViewportOverride(t *testing.T) {
base := BrowserOptions{
Dimensions: Size{Width: 1920, Height: 1080},
}
got := mergeOptions(base, []BrowserOptions{{Dimensions: Size{Width: 1280, Height: 720}}})
if got.Dimensions.Width != 1280 || got.Dimensions.Height != 720 {
t.Fatalf("expected overridden viewport 1280x720, got %dx%d", got.Dimensions.Width, got.Dimensions.Height)
}
}
func TestMergeOptions_EmptyUANotOverridden(t *testing.T) {
base := BrowserOptions{}
got := mergeOptions(base, []BrowserOptions{{Browser: BrowserChromium}})
if got.UserAgent != "" {
t.Fatalf("expected empty UserAgent after merge with no explicit UA, got %q", got.UserAgent)
}
}
func TestMergeOptions_ExplicitUAPreserved(t *testing.T) {
base := BrowserOptions{}
customUA := "MyCustomAgent/1.0"
got := mergeOptions(base, []BrowserOptions{{UserAgent: customUA}})
if got.UserAgent != customUA {
t.Fatalf("expected explicit UA %q preserved, got %q", customUA, got.UserAgent)
}
}