diff --git a/browser_init.go b/browser_init.go index f009400..694603c 100644 --- a/browser_init.go +++ b/browser_init.go @@ -52,15 +52,11 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) { return nil, ErrInvalidBrowserSelection } - // Auto-select a User-Agent matching the browser engine when the caller hasn't set one. - if opt.UserAgent == "" { - switch opt.Browser { - case BrowserChromium: - opt.UserAgent = DefaultChromiumUserAgent - default: - opt.UserAgent = DefaultFirefoxUserAgent - } - } + // User-Agent auto-selection is deferred until after browser launch so we + // can read the real browser version and build a UA that matches the + // sec-ch-ua header Chromium sends automatically. A mismatched version + // (e.g. UA says Chrome/131 while sec-ch-ua says Chromium/136) is a + // well-known bot-detection signal that causes 403s on many sites. // Collect launch args and init scripts, starting with any stealth-mode presets. stealth := opt.Stealth == nil || *opt.Stealth @@ -117,6 +113,16 @@ func initBrowser(opt BrowserOptions) (*browserInitResult, error) { } } + // Auto-select User-Agent now that we know the real browser version. + if opt.UserAgent == "" { + switch opt.Browser { + case BrowserChromium: + opt.UserAgent = chromiumUserAgent(browser.Version()) + default: + opt.UserAgent = DefaultFirefoxUserAgent + } + } + var viewport *playwright.Size if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 { viewport = &playwright.Size{ diff --git a/stealth.go b/stealth.go index c61c63e..af7f197 100644 --- a/stealth.go +++ b/stealth.go @@ -3,6 +3,7 @@ package extractor import ( "fmt" "math/rand/v2" + "strings" ) // stealthChromiumArgs are launch arguments that reduce automation detection for Chromium-based browsers. @@ -269,3 +270,20 @@ func buildFirefoxStealthScripts(p firefoxHWProfile) []string { })`, } } + +// chromiumUserAgent builds a Chromium User-Agent string from the actual +// browser version. This keeps the UA in sync with the sec-ch-ua header +// that Chromium sends automatically, avoiding a version mismatch that +// anti-bot systems use to detect automation. +func chromiumUserAgent(version string) string { + // version is typically "136.0.7103.25"; we need the major for the + // Chrome/MAJ.0.0.0 token and the full version for the template. + major := version + if i := strings.IndexByte(version, '.'); i > 0 { + major = version[:i] + } + return fmt.Sprintf( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s.0.0.0 Safari/537.36", + major, + ) +} diff --git a/stealth_test.go b/stealth_test.go index 0375406..700d753 100644 --- a/stealth_test.go +++ b/stealth_test.go @@ -420,6 +420,23 @@ func TestDefaultChromiumUserAgent_Content(t *testing.T) { } } +func TestChromiumUserAgent_MatchesVersion(t *testing.T) { + ua := chromiumUserAgent("136.0.7103.25") + if !strings.Contains(ua, "Chrome/136.0.0.0") { + t.Fatalf("expected Chrome/136.0.0.0 in UA, got %s", ua) + } + if strings.Contains(ua, "131") { + t.Fatalf("UA must not contain old hardcoded version 131: %s", ua) + } +} + +func TestChromiumUserAgent_MajorOnly(t *testing.T) { + ua := chromiumUserAgent("140") + if !strings.Contains(ua, "Chrome/140.0.0.0") { + t.Fatalf("expected Chrome/140.0.0.0 in UA, got %s", ua) + } +} + // --- Viewport and UA defaults via mergeOptions --- func TestMergeOptions_DefaultViewport(t *testing.T) {