diff --git a/interactive.go b/interactive.go new file mode 100644 index 0000000..ca1d371 --- /dev/null +++ b/interactive.go @@ -0,0 +1,331 @@ +package extractor + +import ( + "context" + "fmt" + "time" + + "github.com/playwright-community/playwright-go" +) + +// InteractiveBrowser provides low-level page control for interactive browser sessions. +// Unlike Browser which is designed for scraping, InteractiveBrowser exposes mouse, keyboard, +// screenshot, and navigation APIs suitable for remote browser control. +type InteractiveBrowser interface { + // Navigate goes to the given URL and returns the final URL after any redirects. + Navigate(url string) (string, error) + // GoBack navigates back in history. Returns the final URL. + GoBack() (string, error) + // GoForward navigates forward in history. Returns the final URL. + GoForward() (string, error) + // URL returns the current page URL. + URL() string + + // MouseClick clicks at the given coordinates with the specified button ("left", "middle", "right"). + MouseClick(x, y float64, button string) error + // MouseMove moves the mouse to the given coordinates. + MouseMove(x, y float64) error + // MouseWheel scrolls by the given delta. + MouseWheel(deltaX, deltaY float64) error + + // KeyboardType types the given text as if it were entered character by character. + KeyboardType(text string) error + // KeyboardPress presses a special key (e.g. "Enter", "Tab", "Backspace"). + KeyboardPress(key string) error + + // Screenshot takes a full-page screenshot as JPEG with the given quality (0-100). + Screenshot(quality int) ([]byte, error) + + // Cookies returns all cookies from the browser context. + Cookies() ([]Cookie, error) + + // Close tears down the browser. + Close() error +} + +type interactiveBrowser struct { + pw *playwright.Playwright + browser playwright.Browser + ctx playwright.BrowserContext + page playwright.Page +} + +// NewInteractiveBrowser creates a headless browser with a page ready for interactive control. +// The context is only used for cancellation during setup. +func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (InteractiveBrowser, error) { + var thirtySeconds = 30 * time.Second + opt := PlayWrightBrowserOptions{ + UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0", + Browser: PlayWrightBrowserSelectionChromium, + Timeout: &thirtySeconds, + Dimensions: Size{ + Width: 1280, + Height: 720, + }, + } + + for _, o := range opts { + if o.UserAgent != "" { + opt.UserAgent = o.UserAgent + } + if o.Browser != "" { + opt.Browser = o.Browser + } + if o.Timeout != nil { + opt.Timeout = o.Timeout + } + if o.CookieJar != nil { + opt.CookieJar = o.CookieJar + } + if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 { + opt.Dimensions = o.Dimensions + } + if o.DarkMode { + opt.DarkMode = true + } + if o.PlayWrightServerAddress != "" { + opt.PlayWrightServerAddress = o.PlayWrightServerAddress + } + if o.DontLaunchOnConnectFailure { + opt.DontLaunchOnConnectFailure = true + } + if o.UseLocalOnly { + opt.UseLocalOnly = true + } + opt.ShowBrowser = o.ShowBrowser + } + + if err := ctx.Err(); err != nil { + return nil, err + } + + type result struct { + ib InteractiveBrowser + err error + } + + ch := make(chan result, 1) + + go func() { + pw, err := playwright.Run() + if err != nil { + err = playwright.Install() + if err != nil { + ch <- result{nil, fmt.Errorf("failed to install playwright: %w", err)} + return + } + pw, err = playwright.Run() + if err != nil { + ch <- result{nil, fmt.Errorf("failed to start playwright: %w", err)} + return + } + } + + var bt playwright.BrowserType + switch opt.Browser { + case PlayWrightBrowserSelectionChromium: + bt = pw.Chromium + case PlayWrightBrowserSelectionFirefox: + bt = pw.Firefox + case PlayWrightBrowserSelectionWebKit: + bt = pw.WebKit + default: + ch <- result{nil, ErrInvalidBrowserSelection} + return + } + + var browser playwright.Browser + var launch = true + + if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly { + launch = false + var timeout float64 = 30000 + browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout}) + if err != nil { + if opt.DontLaunchOnConnectFailure { + ch <- result{nil, err} + return + } + launch = true + } + } + + if launch { + browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{ + Headless: playwright.Bool(!opt.ShowBrowser), + }) + if err != nil { + ch <- result{nil, fmt.Errorf("failed to launch browser: %w", err)} + return + } + } + + viewport := &playwright.Size{ + Width: opt.Dimensions.Width, + Height: opt.Dimensions.Height, + } + + var scheme *playwright.ColorScheme + if opt.DarkMode { + scheme = playwright.ColorSchemeDark + } else { + scheme = playwright.ColorSchemeNoPreference + } + + bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{ + UserAgent: playwright.String(opt.UserAgent), + Viewport: viewport, + ColorScheme: scheme, + }) + if err != nil { + ch <- result{nil, fmt.Errorf("failed to create browser context: %w", err)} + return + } + + if opt.CookieJar != nil { + cookies, err := opt.CookieJar.GetAll() + if err != nil { + ch <- result{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)} + return + } + pwCookies := make([]playwright.OptionalCookie, len(cookies)) + for i, c := range cookies { + pwCookies[i] = cookieToPlaywrightOptionalCookie(c) + } + if err := bctx.AddCookies(pwCookies); err != nil { + ch <- result{nil, fmt.Errorf("error adding cookies: %w", err)} + return + } + } + + page, err := bctx.NewPage() + if err != nil { + ch <- result{nil, fmt.Errorf("failed to create page: %w", err)} + return + } + + ch <- result{ + ib: &interactiveBrowser{ + pw: pw, + browser: browser, + ctx: bctx, + page: page, + }, + } + }() + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case r := <-ch: + return r.ib, r.err + } +} + +func (ib *interactiveBrowser) Navigate(url string) (string, error) { + resp, err := ib.page.Goto(url, playwright.PageGotoOptions{ + WaitUntil: playwright.WaitUntilStateLoad, + }) + if err != nil { + return "", fmt.Errorf("navigation failed: %w", err) + } + _ = resp + return ib.page.URL(), nil +} + +func (ib *interactiveBrowser) GoBack() (string, error) { + _, err := ib.page.GoBack() + if err != nil { + return ib.page.URL(), fmt.Errorf("go back failed: %w", err) + } + return ib.page.URL(), nil +} + +func (ib *interactiveBrowser) GoForward() (string, error) { + _, err := ib.page.GoForward() + if err != nil { + return ib.page.URL(), fmt.Errorf("go forward failed: %w", err) + } + return ib.page.URL(), nil +} + +func (ib *interactiveBrowser) URL() string { + return ib.page.URL() +} + +func (ib *interactiveBrowser) MouseClick(x, y float64, button string) error { + var btn *playwright.MouseButton + switch button { + case "right": + btn = playwright.MouseButtonRight + case "middle": + btn = playwright.MouseButtonMiddle + default: + btn = playwright.MouseButtonLeft + } + return ib.page.Mouse().Click(x, y, playwright.MouseClickOptions{Button: btn}) +} + +func (ib *interactiveBrowser) MouseMove(x, y float64) error { + return ib.page.Mouse().Move(x, y) +} + +func (ib *interactiveBrowser) MouseWheel(deltaX, deltaY float64) error { + return ib.page.Mouse().Wheel(deltaX, deltaY) +} + +func (ib *interactiveBrowser) KeyboardType(text string) error { + return ib.page.Keyboard().Type(text) +} + +func (ib *interactiveBrowser) KeyboardPress(key string) error { + return ib.page.Keyboard().Press(key) +} + +func (ib *interactiveBrowser) Screenshot(quality int) ([]byte, error) { + return ib.page.Screenshot(playwright.PageScreenshotOptions{ + Type: playwright.ScreenshotTypeJpeg, + Quality: playwright.Int(quality), + }) +} + +func (ib *interactiveBrowser) Cookies() ([]Cookie, error) { + pwCookies, err := ib.ctx.Cookies() + if err != nil { + return nil, fmt.Errorf("failed to get cookies: %w", err) + } + + cookies := make([]Cookie, len(pwCookies)) + for i, c := range pwCookies { + cookies[i] = playwrightCookieToCookie(c) + } + return cookies, nil +} + +func (ib *interactiveBrowser) Close() error { + var errs []error + if ib.page != nil { + if err := ib.page.Close(); err != nil { + errs = append(errs, err) + } + } + if ib.ctx != nil { + if err := ib.ctx.Close(); err != nil { + errs = append(errs, err) + } + } + if ib.browser != nil { + if err := ib.browser.Close(); err != nil { + errs = append(errs, err) + } + } + if ib.pw != nil { + if err := ib.pw.Stop(); err != nil { + errs = append(errs, err) + } + } + if len(errs) > 0 { + return fmt.Errorf("errors during close: %v", errs) + } + return nil +}