The Secure field was dropped in both Playwright<->internal cookie conversion functions, causing cookies with __Secure-/__Host- prefixes to be rejected by Chromium. Additionally, batch AddCookies meant one invalid cookie would fail browser creation entirely. Changes: - Map Secure field in cookieToPlaywrightOptionalCookie and playwrightCookieToCookie - Add cookies one-by-one with slog.Warn on failure instead of failing the entire batch - Add unit tests for both conversion functions Closes #75 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
216 lines
5.6 KiB
Go
216 lines
5.6 KiB
Go
package extractor
|
|
|
|
import (
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
// browserInitResult holds the result of shared browser initialization.
|
|
type browserInitResult struct {
|
|
pw *playwright.Playwright
|
|
browser playwright.Browser
|
|
bctx playwright.BrowserContext
|
|
opt BrowserOptions
|
|
}
|
|
|
|
// initBrowser performs the shared browser initialization steps:
|
|
// start Playwright, select browser type, connect or launch, create context, load cookies.
|
|
func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
|
pw, err := playwright.Run()
|
|
if err != nil {
|
|
err = playwright.Install()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to install playwright: %w", err)
|
|
}
|
|
pw, err = playwright.Run()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to start playwright: %w", err)
|
|
}
|
|
}
|
|
|
|
var bt playwright.BrowserType
|
|
switch opt.Browser {
|
|
case BrowserChromium:
|
|
bt = pw.Chromium
|
|
if opt.ServerAddress == "" {
|
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
|
|
}
|
|
case BrowserFirefox:
|
|
bt = pw.Firefox
|
|
if opt.ServerAddress == "" {
|
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
|
|
}
|
|
case BrowserWebKit:
|
|
bt = pw.WebKit
|
|
if opt.ServerAddress == "" {
|
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
|
|
}
|
|
default:
|
|
return nil, ErrInvalidBrowserSelection
|
|
}
|
|
|
|
// Auto-select a User-Agent matching the browser engine when the caller hasn't set one.
|
|
if opt.UserAgent == "" {
|
|
switch opt.Browser {
|
|
case BrowserChromium:
|
|
opt.UserAgent = DefaultChromiumUserAgent
|
|
default:
|
|
opt.UserAgent = DefaultFirefoxUserAgent
|
|
}
|
|
}
|
|
|
|
// Collect launch args and init scripts, starting with any stealth-mode presets.
|
|
stealth := opt.Stealth == nil || *opt.Stealth
|
|
var launchArgs []string
|
|
var initScripts []string
|
|
|
|
if stealth {
|
|
if opt.Browser == BrowserChromium {
|
|
launchArgs = append(launchArgs, stealthChromiumArgs...)
|
|
}
|
|
initScripts = append(initScripts, stealthCommonScripts...)
|
|
switch opt.Browser {
|
|
case BrowserChromium:
|
|
initScripts = append(initScripts, buildChromiumStealthScripts(randomChromiumProfile())...)
|
|
case BrowserFirefox:
|
|
initScripts = append(initScripts, buildFirefoxStealthScripts(randomFirefoxProfile())...)
|
|
}
|
|
}
|
|
|
|
launchArgs = append(launchArgs, opt.LaunchArgs...)
|
|
initScripts = append(initScripts, opt.InitScripts...)
|
|
|
|
var browser playwright.Browser
|
|
launch := true
|
|
|
|
if opt.ServerAddress != "" && !opt.UseLocalOnly {
|
|
launch = false
|
|
slog.Info("connecting to playwright server", "address", opt.ServerAddress)
|
|
var timeout float64 = 30000
|
|
browser, err = bt.Connect(opt.ServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
|
if err != nil {
|
|
if opt.RequireServer {
|
|
return nil, err
|
|
}
|
|
slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
|
|
launch = true
|
|
}
|
|
}
|
|
|
|
if launch {
|
|
headless := opt.ShowBrowser == nil || !*opt.ShowBrowser
|
|
launchOpts := playwright.BrowserTypeLaunchOptions{
|
|
Headless: playwright.Bool(headless),
|
|
}
|
|
if len(launchArgs) > 0 {
|
|
launchOpts.Args = launchArgs
|
|
}
|
|
if stealth && opt.Browser == BrowserChromium && headless {
|
|
launchOpts.Channel = playwright.String("chromium")
|
|
}
|
|
browser, err = bt.Launch(launchOpts)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
|
}
|
|
}
|
|
|
|
var viewport *playwright.Size
|
|
if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
|
|
viewport = &playwright.Size{
|
|
Width: opt.Dimensions.Width,
|
|
Height: opt.Dimensions.Height,
|
|
}
|
|
}
|
|
|
|
var scheme *playwright.ColorScheme
|
|
if opt.DarkMode {
|
|
scheme = playwright.ColorSchemeDark
|
|
} else {
|
|
scheme = playwright.ColorSchemeNoPreference
|
|
}
|
|
|
|
bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
UserAgent: playwright.String(opt.UserAgent),
|
|
Viewport: viewport,
|
|
ColorScheme: scheme,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
|
}
|
|
|
|
for _, script := range initScripts {
|
|
if err := bctx.AddInitScript(playwright.Script{Content: &script}); err != nil {
|
|
return nil, fmt.Errorf("failed to add init script: %w", err)
|
|
}
|
|
}
|
|
|
|
if opt.CookieJar != nil {
|
|
cookies, err := opt.CookieJar.GetAll()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)
|
|
}
|
|
for _, c := range cookies {
|
|
oc := cookieToPlaywrightOptionalCookie(c)
|
|
if err := bctx.AddCookies([]playwright.OptionalCookie{oc}); err != nil {
|
|
slog.Warn("skipping invalid cookie", "name", c.Name, "host", c.Host, "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return &browserInitResult{
|
|
pw: pw,
|
|
browser: browser,
|
|
bctx: bctx,
|
|
opt: opt,
|
|
}, nil
|
|
}
|
|
|
|
// mergeOptions merges variadic BrowserOptions into a base set of defaults.
|
|
func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
|
|
for _, o := range opts {
|
|
if o.UserAgent != "" {
|
|
base.UserAgent = o.UserAgent
|
|
}
|
|
if o.Browser != "" {
|
|
base.Browser = o.Browser
|
|
}
|
|
if o.Timeout != nil {
|
|
base.Timeout = o.Timeout
|
|
}
|
|
if o.CookieJar != nil {
|
|
base.CookieJar = o.CookieJar
|
|
}
|
|
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
|
base.Dimensions = o.Dimensions
|
|
}
|
|
if o.DarkMode {
|
|
base.DarkMode = true
|
|
}
|
|
if o.ServerAddress != "" {
|
|
base.ServerAddress = o.ServerAddress
|
|
}
|
|
if o.RequireServer {
|
|
base.RequireServer = true
|
|
}
|
|
if o.UseLocalOnly {
|
|
base.UseLocalOnly = true
|
|
}
|
|
if o.ShowBrowser != nil {
|
|
base.ShowBrowser = o.ShowBrowser
|
|
}
|
|
if len(o.LaunchArgs) > 0 {
|
|
base.LaunchArgs = append(base.LaunchArgs, o.LaunchArgs...)
|
|
}
|
|
if len(o.InitScripts) > 0 {
|
|
base.InitScripts = append(base.InitScripts, o.InitScripts...)
|
|
}
|
|
if o.Stealth != nil {
|
|
base.Stealth = o.Stealth
|
|
}
|
|
}
|
|
return base
|
|
}
|