refactor: restructure API, deduplicate code, expand test coverage
- Extract shared DeferClose helper, removing 14 duplicate copies - Rename PlayWright-prefixed types to cleaner names (BrowserOptions, BrowserSelection, NewBrowser, etc.) - Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure) - Extract shared initBrowser/mergeOptions into browser_init.go, deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser - Remove unused locator field from document struct - Add tests for all previously untested packages (archive, aislegopher, wegmans, useragents, powerball) and expand existing test suites - Add MIGRATION.md documenting all breaking API changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
160
browser_init.go
Normal file
160
browser_init.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/playwright-community/playwright-go"
|
||||
)
|
||||
|
||||
// browserInitResult holds the result of shared browser initialization.
|
||||
type browserInitResult struct {
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
bctx playwright.BrowserContext
|
||||
opt BrowserOptions
|
||||
}
|
||||
|
||||
// initBrowser performs the shared browser initialization steps:
|
||||
// start Playwright, select browser type, connect or launch, create context, load cookies.
|
||||
func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
||||
pw, err := playwright.Run()
|
||||
if err != nil {
|
||||
err = playwright.Install()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to install playwright: %w", err)
|
||||
}
|
||||
pw, err = playwright.Run()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to start playwright: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
var bt playwright.BrowserType
|
||||
switch opt.Browser {
|
||||
case BrowserChromium:
|
||||
bt = pw.Chromium
|
||||
if opt.ServerAddress == "" {
|
||||
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
|
||||
}
|
||||
case BrowserFirefox:
|
||||
bt = pw.Firefox
|
||||
if opt.ServerAddress == "" {
|
||||
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
|
||||
}
|
||||
case BrowserWebKit:
|
||||
bt = pw.WebKit
|
||||
if opt.ServerAddress == "" {
|
||||
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
|
||||
}
|
||||
default:
|
||||
return nil, ErrInvalidBrowserSelection
|
||||
}
|
||||
|
||||
var browser playwright.Browser
|
||||
launch := true
|
||||
|
||||
if opt.ServerAddress != "" && !opt.UseLocalOnly {
|
||||
launch = false
|
||||
slog.Info("connecting to playwright server", "address", opt.ServerAddress)
|
||||
var timeout float64 = 30000
|
||||
browser, err = bt.Connect(opt.ServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
||||
if err != nil {
|
||||
if opt.RequireServer {
|
||||
return nil, err
|
||||
}
|
||||
slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
|
||||
launch = true
|
||||
}
|
||||
}
|
||||
|
||||
if launch {
|
||||
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
||||
Headless: playwright.Bool(!opt.ShowBrowser),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
var viewport *playwright.Size
|
||||
if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
|
||||
viewport = &playwright.Size{
|
||||
Width: opt.Dimensions.Width,
|
||||
Height: opt.Dimensions.Height,
|
||||
}
|
||||
}
|
||||
|
||||
var scheme *playwright.ColorScheme
|
||||
if opt.DarkMode {
|
||||
scheme = playwright.ColorSchemeDark
|
||||
} else {
|
||||
scheme = playwright.ColorSchemeNoPreference
|
||||
}
|
||||
|
||||
bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
||||
UserAgent: playwright.String(opt.UserAgent),
|
||||
Viewport: viewport,
|
||||
ColorScheme: scheme,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
||||
}
|
||||
|
||||
if opt.CookieJar != nil {
|
||||
cookies, err := opt.CookieJar.GetAll()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)
|
||||
}
|
||||
pwCookies := make([]playwright.OptionalCookie, len(cookies))
|
||||
for i, c := range cookies {
|
||||
pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
|
||||
}
|
||||
if err := bctx.AddCookies(pwCookies); err != nil {
|
||||
return nil, fmt.Errorf("error adding cookies to browser: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return &browserInitResult{
|
||||
pw: pw,
|
||||
browser: browser,
|
||||
bctx: bctx,
|
||||
opt: opt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// mergeOptions merges variadic BrowserOptions into a base set of defaults.
|
||||
func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
|
||||
for _, o := range opts {
|
||||
if o.UserAgent != "" {
|
||||
base.UserAgent = o.UserAgent
|
||||
}
|
||||
if o.Browser != "" {
|
||||
base.Browser = o.Browser
|
||||
}
|
||||
if o.Timeout != nil {
|
||||
base.Timeout = o.Timeout
|
||||
}
|
||||
if o.CookieJar != nil {
|
||||
base.CookieJar = o.CookieJar
|
||||
}
|
||||
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
||||
base.Dimensions = o.Dimensions
|
||||
}
|
||||
if o.DarkMode {
|
||||
base.DarkMode = true
|
||||
}
|
||||
if o.ServerAddress != "" {
|
||||
base.ServerAddress = o.ServerAddress
|
||||
}
|
||||
if o.RequireServer {
|
||||
base.RequireServer = true
|
||||
}
|
||||
if o.UseLocalOnly {
|
||||
base.UseLocalOnly = true
|
||||
}
|
||||
base.ShowBrowser = o.ShowBrowser
|
||||
}
|
||||
return base
|
||||
}
|
||||
Reference in New Issue
Block a user