- Fix Nodes.First() panic on empty slice (return nil) - Fix ticker leak in archive.go (create once, defer Stop) - Fix cookie path matching for empty and root paths - Fix lost query params in google.go (u.Query().Set was discarded) - Fix type assertion panic in useragents.go - Fix dropped date parse error in powerball.go - Remove unreachable dead code in megamillions.go and powerball.go - Simplify document.go WaitForNetworkIdle, remove unused root field - Remove debug fmt.Println calls across codebase - Replace panic(err) with stderr+exit in all cmd/ programs - Fix duckduckgo cmd: remove useless defer, return error on bad safesearch - Fix archive cmd: ToConfig returns error instead of panicking - Add 39+ unit tests across 6 new test files - Add Gitea Actions CI workflow (build, test, vet in parallel) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
339 lines
8.7 KiB
Go
339 lines
8.7 KiB
Go
package extractor
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
// InteractiveBrowser provides low-level page control for interactive browser sessions.
|
|
// Unlike Browser which is designed for scraping, InteractiveBrowser exposes mouse, keyboard,
|
|
// screenshot, and navigation APIs suitable for remote browser control.
|
|
type InteractiveBrowser interface {
|
|
// Navigate goes to the given URL and returns the final URL after any redirects.
|
|
Navigate(url string) (string, error)
|
|
// GoBack navigates back in history. Returns the final URL.
|
|
GoBack() (string, error)
|
|
// GoForward navigates forward in history. Returns the final URL.
|
|
GoForward() (string, error)
|
|
// URL returns the current page URL.
|
|
URL() string
|
|
|
|
// MouseClick clicks at the given coordinates with the specified button ("left", "middle", "right").
|
|
MouseClick(x, y float64, button string) error
|
|
// MouseMove moves the mouse to the given coordinates.
|
|
MouseMove(x, y float64) error
|
|
// MouseWheel scrolls by the given delta.
|
|
MouseWheel(deltaX, deltaY float64) error
|
|
|
|
// KeyboardType types the given text as if it were entered character by character.
|
|
KeyboardType(text string) error
|
|
// KeyboardPress presses a special key (e.g. "Enter", "Tab", "Backspace").
|
|
KeyboardPress(key string) error
|
|
// KeyboardInsertText inserts text directly into the focused element by dispatching
|
|
// only an input event (no keydown, keypress, or keyup). This is more reliable than
|
|
// KeyboardType for pasting into password fields and custom input components.
|
|
KeyboardInsertText(text string) error
|
|
|
|
// Screenshot takes a full-page screenshot as JPEG with the given quality (0-100).
|
|
Screenshot(quality int) ([]byte, error)
|
|
|
|
// Cookies returns all cookies from the browser context.
|
|
Cookies() ([]Cookie, error)
|
|
|
|
// Close tears down the browser.
|
|
Close() error
|
|
}
|
|
|
|
type interactiveBrowser struct {
|
|
pw *playwright.Playwright
|
|
browser playwright.Browser
|
|
ctx playwright.BrowserContext
|
|
page playwright.Page
|
|
}
|
|
|
|
// NewInteractiveBrowser creates a headless browser with a page ready for interactive control.
|
|
// The context is only used for cancellation during setup.
|
|
func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (InteractiveBrowser, error) {
|
|
var thirtySeconds = 30 * time.Second
|
|
opt := PlayWrightBrowserOptions{
|
|
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
|
Browser: PlayWrightBrowserSelectionChromium,
|
|
Timeout: &thirtySeconds,
|
|
Dimensions: Size{
|
|
Width: 1280,
|
|
Height: 720,
|
|
},
|
|
}
|
|
|
|
for _, o := range opts {
|
|
if o.UserAgent != "" {
|
|
opt.UserAgent = o.UserAgent
|
|
}
|
|
if o.Browser != "" {
|
|
opt.Browser = o.Browser
|
|
}
|
|
if o.Timeout != nil {
|
|
opt.Timeout = o.Timeout
|
|
}
|
|
if o.CookieJar != nil {
|
|
opt.CookieJar = o.CookieJar
|
|
}
|
|
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
|
opt.Dimensions = o.Dimensions
|
|
}
|
|
if o.DarkMode {
|
|
opt.DarkMode = true
|
|
}
|
|
if o.PlayWrightServerAddress != "" {
|
|
opt.PlayWrightServerAddress = o.PlayWrightServerAddress
|
|
}
|
|
if o.DontLaunchOnConnectFailure {
|
|
opt.DontLaunchOnConnectFailure = true
|
|
}
|
|
if o.UseLocalOnly {
|
|
opt.UseLocalOnly = true
|
|
}
|
|
opt.ShowBrowser = o.ShowBrowser
|
|
}
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
type result struct {
|
|
ib InteractiveBrowser
|
|
err error
|
|
}
|
|
|
|
ch := make(chan result, 1)
|
|
|
|
go func() {
|
|
pw, err := playwright.Run()
|
|
if err != nil {
|
|
err = playwright.Install()
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("failed to install playwright: %w", err)}
|
|
return
|
|
}
|
|
pw, err = playwright.Run()
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("failed to start playwright: %w", err)}
|
|
return
|
|
}
|
|
}
|
|
|
|
var bt playwright.BrowserType
|
|
switch opt.Browser {
|
|
case PlayWrightBrowserSelectionChromium:
|
|
bt = pw.Chromium
|
|
case PlayWrightBrowserSelectionFirefox:
|
|
bt = pw.Firefox
|
|
case PlayWrightBrowserSelectionWebKit:
|
|
bt = pw.WebKit
|
|
default:
|
|
ch <- result{nil, ErrInvalidBrowserSelection}
|
|
return
|
|
}
|
|
|
|
var browser playwright.Browser
|
|
var launch = true
|
|
|
|
if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
|
|
launch = false
|
|
var timeout float64 = 30000
|
|
browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
|
if err != nil {
|
|
if opt.DontLaunchOnConnectFailure {
|
|
ch <- result{nil, err}
|
|
return
|
|
}
|
|
launch = true
|
|
}
|
|
}
|
|
|
|
if launch {
|
|
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
|
Headless: playwright.Bool(!opt.ShowBrowser),
|
|
})
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("failed to launch browser: %w", err)}
|
|
return
|
|
}
|
|
}
|
|
|
|
viewport := &playwright.Size{
|
|
Width: opt.Dimensions.Width,
|
|
Height: opt.Dimensions.Height,
|
|
}
|
|
|
|
var scheme *playwright.ColorScheme
|
|
if opt.DarkMode {
|
|
scheme = playwright.ColorSchemeDark
|
|
} else {
|
|
scheme = playwright.ColorSchemeNoPreference
|
|
}
|
|
|
|
bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
UserAgent: playwright.String(opt.UserAgent),
|
|
Viewport: viewport,
|
|
ColorScheme: scheme,
|
|
})
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("failed to create browser context: %w", err)}
|
|
return
|
|
}
|
|
|
|
if opt.CookieJar != nil {
|
|
cookies, err := opt.CookieJar.GetAll()
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)}
|
|
return
|
|
}
|
|
pwCookies := make([]playwright.OptionalCookie, len(cookies))
|
|
for i, c := range cookies {
|
|
pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
|
|
}
|
|
if err := bctx.AddCookies(pwCookies); err != nil {
|
|
ch <- result{nil, fmt.Errorf("error adding cookies: %w", err)}
|
|
return
|
|
}
|
|
}
|
|
|
|
page, err := bctx.NewPage()
|
|
if err != nil {
|
|
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
|
|
return
|
|
}
|
|
|
|
ch <- result{
|
|
ib: &interactiveBrowser{
|
|
pw: pw,
|
|
browser: browser,
|
|
ctx: bctx,
|
|
page: page,
|
|
},
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
case r := <-ch:
|
|
return r.ib, r.err
|
|
}
|
|
}
|
|
|
|
func (ib *interactiveBrowser) Navigate(url string) (string, error) {
|
|
_, err := ib.page.Goto(url, playwright.PageGotoOptions{
|
|
WaitUntil: playwright.WaitUntilStateLoad,
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("navigation failed: %w", err)
|
|
}
|
|
return ib.page.URL(), nil
|
|
}
|
|
|
|
func (ib *interactiveBrowser) GoBack() (string, error) {
|
|
_, err := ib.page.GoBack()
|
|
if err != nil {
|
|
return ib.page.URL(), fmt.Errorf("go back failed: %w", err)
|
|
}
|
|
return ib.page.URL(), nil
|
|
}
|
|
|
|
func (ib *interactiveBrowser) GoForward() (string, error) {
|
|
_, err := ib.page.GoForward()
|
|
if err != nil {
|
|
return ib.page.URL(), fmt.Errorf("go forward failed: %w", err)
|
|
}
|
|
return ib.page.URL(), nil
|
|
}
|
|
|
|
func (ib *interactiveBrowser) URL() string {
|
|
return ib.page.URL()
|
|
}
|
|
|
|
func (ib *interactiveBrowser) MouseClick(x, y float64, button string) error {
|
|
var btn *playwright.MouseButton
|
|
switch button {
|
|
case "right":
|
|
btn = playwright.MouseButtonRight
|
|
case "middle":
|
|
btn = playwright.MouseButtonMiddle
|
|
default:
|
|
btn = playwright.MouseButtonLeft
|
|
}
|
|
return ib.page.Mouse().Click(x, y, playwright.MouseClickOptions{Button: btn})
|
|
}
|
|
|
|
func (ib *interactiveBrowser) MouseMove(x, y float64) error {
|
|
return ib.page.Mouse().Move(x, y)
|
|
}
|
|
|
|
func (ib *interactiveBrowser) MouseWheel(deltaX, deltaY float64) error {
|
|
return ib.page.Mouse().Wheel(deltaX, deltaY)
|
|
}
|
|
|
|
func (ib *interactiveBrowser) KeyboardType(text string) error {
|
|
return ib.page.Keyboard().Type(text)
|
|
}
|
|
|
|
func (ib *interactiveBrowser) KeyboardPress(key string) error {
|
|
return ib.page.Keyboard().Press(key)
|
|
}
|
|
|
|
func (ib *interactiveBrowser) KeyboardInsertText(text string) error {
|
|
return ib.page.Keyboard().InsertText(text)
|
|
}
|
|
|
|
func (ib *interactiveBrowser) Screenshot(quality int) ([]byte, error) {
|
|
return ib.page.Screenshot(playwright.PageScreenshotOptions{
|
|
Type: playwright.ScreenshotTypeJpeg,
|
|
Quality: playwright.Int(quality),
|
|
})
|
|
}
|
|
|
|
func (ib *interactiveBrowser) Cookies() ([]Cookie, error) {
|
|
pwCookies, err := ib.ctx.Cookies()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
|
}
|
|
|
|
cookies := make([]Cookie, len(pwCookies))
|
|
for i, c := range pwCookies {
|
|
cookies[i] = playwrightCookieToCookie(c)
|
|
}
|
|
return cookies, nil
|
|
}
|
|
|
|
func (ib *interactiveBrowser) Close() error {
|
|
var errs []error
|
|
if ib.page != nil {
|
|
if err := ib.page.Close(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
if ib.ctx != nil {
|
|
if err := ib.ctx.Close(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
if ib.browser != nil {
|
|
if err := ib.browser.Close(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
if ib.pw != nil {
|
|
if err := ib.pw.Stop(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
if len(errs) > 0 {
|
|
return fmt.Errorf("errors during close: %v", errs)
|
|
}
|
|
return nil
|
|
}
|