Compare commits
12 Commits
39371dc261
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b6d864330 | |||
| adefaaef36 | |||
| d89031b20d | |||
| 84e811572b | |||
| 61b68adfd0 | |||
| 0447f1bdbe | |||
| ace6c1e0bf | |||
| 1b95d12890 | |||
| 035151d9fa | |||
| 00ff7ea830 | |||
| d35d144fa2 | |||
| e0da88b9b0 |
10
document.go
10
document.go
@@ -22,9 +22,10 @@ type Document interface {
|
||||
|
||||
type document struct {
|
||||
node
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
page playwright.Page
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
page playwright.Page
|
||||
detached bool
|
||||
}
|
||||
|
||||
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
|
||||
@@ -44,6 +45,9 @@ func newDocument(pw *playwright.Playwright, browser playwright.Browser, page pla
|
||||
return res, nil
|
||||
}
|
||||
func (d *document) Close() error {
|
||||
if d.detached {
|
||||
return nil
|
||||
}
|
||||
return d.page.Close()
|
||||
}
|
||||
|
||||
|
||||
123
interactive.go
123
interactive.go
@@ -23,8 +23,13 @@ type InteractiveBrowser interface {
|
||||
|
||||
// MouseClick clicks at the given coordinates with the specified button ("left", "middle", "right").
|
||||
MouseClick(x, y float64, button string) error
|
||||
// MouseMove moves the mouse to the given coordinates.
|
||||
MouseMove(x, y float64) error
|
||||
// MouseDown presses the mouse button at the given coordinates without releasing.
|
||||
MouseDown(x, y float64, button string) error
|
||||
// MouseUp releases the mouse button at the given coordinates.
|
||||
MouseUp(x, y float64, button string) error
|
||||
// MouseMove moves the mouse to the given coordinates. An optional steps parameter
|
||||
// controls how many intermediate mousemove events are generated (default 1).
|
||||
MouseMove(x, y float64, steps ...int) error
|
||||
// MouseWheel scrolls by the given delta.
|
||||
MouseWheel(deltaX, deltaY float64) error
|
||||
|
||||
@@ -43,15 +48,27 @@ type InteractiveBrowser interface {
|
||||
// Cookies returns all cookies from the browser context.
|
||||
Cookies() ([]Cookie, error)
|
||||
|
||||
// SetDefaultTimeout sets the default timeout for all Playwright operations
|
||||
// (navigation, clicks, screenshots, cookie extraction, etc.). A value of 0
|
||||
// disables timeouts. By default, Playwright uses a 30-second timeout.
|
||||
//
|
||||
// This is the primary mechanism for preventing hung sessions: callers can
|
||||
// set a timeout so that any Playwright call returns an error instead of
|
||||
// blocking forever if the browser process crashes or the remote server
|
||||
// becomes unresponsive.
|
||||
SetDefaultTimeout(timeout time.Duration)
|
||||
|
||||
// Close tears down the browser.
|
||||
Close() error
|
||||
}
|
||||
|
||||
type interactiveBrowser struct {
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
ctx playwright.BrowserContext
|
||||
page playwright.Page
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
ctx playwright.BrowserContext
|
||||
page playwright.Page
|
||||
ownsInfrastructure bool
|
||||
detached bool
|
||||
}
|
||||
|
||||
// NewInteractiveBrowser creates a headless browser with a page ready for interactive control.
|
||||
@@ -88,22 +105,32 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
|
||||
|
||||
page, err := res.bctx.NewPage()
|
||||
if err != nil {
|
||||
_ = res.bctx.Close()
|
||||
_ = res.browser.Close()
|
||||
_ = res.pw.Stop()
|
||||
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
|
||||
return
|
||||
}
|
||||
|
||||
ch <- result{
|
||||
ib: &interactiveBrowser{
|
||||
pw: res.pw,
|
||||
browser: res.browser,
|
||||
ctx: res.bctx,
|
||||
page: page,
|
||||
pw: res.pw,
|
||||
browser: res.browser,
|
||||
ctx: res.bctx,
|
||||
page: page,
|
||||
ownsInfrastructure: true,
|
||||
},
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
go func() {
|
||||
r := <-ch
|
||||
if r.err == nil && r.ib != nil {
|
||||
_ = r.ib.Close()
|
||||
}
|
||||
}()
|
||||
return nil, ctx.Err()
|
||||
case r := <-ch:
|
||||
return r.ib, r.err
|
||||
@@ -153,8 +180,44 @@ func (ib *interactiveBrowser) MouseClick(x, y float64, button string) error {
|
||||
return ib.page.Mouse().Click(x, y, playwright.MouseClickOptions{Button: btn})
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) MouseMove(x, y float64) error {
|
||||
return ib.page.Mouse().Move(x, y)
|
||||
func (ib *interactiveBrowser) MouseDown(x, y float64, button string) error {
|
||||
if err := ib.page.Mouse().Move(x, y); err != nil {
|
||||
return err
|
||||
}
|
||||
var btn *playwright.MouseButton
|
||||
switch button {
|
||||
case "right":
|
||||
btn = playwright.MouseButtonRight
|
||||
case "middle":
|
||||
btn = playwright.MouseButtonMiddle
|
||||
default:
|
||||
btn = playwright.MouseButtonLeft
|
||||
}
|
||||
return ib.page.Mouse().Down(playwright.MouseDownOptions{Button: btn})
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) MouseUp(x, y float64, button string) error {
|
||||
if err := ib.page.Mouse().Move(x, y); err != nil {
|
||||
return err
|
||||
}
|
||||
var btn *playwright.MouseButton
|
||||
switch button {
|
||||
case "right":
|
||||
btn = playwright.MouseButtonRight
|
||||
case "middle":
|
||||
btn = playwright.MouseButtonMiddle
|
||||
default:
|
||||
btn = playwright.MouseButtonLeft
|
||||
}
|
||||
return ib.page.Mouse().Up(playwright.MouseUpOptions{Button: btn})
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) MouseMove(x, y float64, steps ...int) error {
|
||||
var opts playwright.MouseMoveOptions
|
||||
if len(steps) > 0 && steps[0] > 1 {
|
||||
opts.Steps = playwright.Int(steps[0])
|
||||
}
|
||||
return ib.page.Mouse().Move(x, y, opts)
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) MouseWheel(deltaX, deltaY float64) error {
|
||||
@@ -193,26 +256,40 @@ func (ib *interactiveBrowser) Cookies() ([]Cookie, error) {
|
||||
return cookies, nil
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) SetDefaultTimeout(timeout time.Duration) {
|
||||
ms := float64(timeout.Milliseconds())
|
||||
ib.page.SetDefaultTimeout(ms)
|
||||
ib.page.SetDefaultNavigationTimeout(ms)
|
||||
ib.ctx.SetDefaultTimeout(ms)
|
||||
ib.ctx.SetDefaultNavigationTimeout(ms)
|
||||
}
|
||||
|
||||
func (ib *interactiveBrowser) Close() error {
|
||||
if ib.detached {
|
||||
return nil
|
||||
}
|
||||
|
||||
var errs []error
|
||||
if ib.page != nil {
|
||||
if err := ib.page.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
if ib.ctx != nil {
|
||||
if err := ib.ctx.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
if ib.ownsInfrastructure {
|
||||
if ib.ctx != nil {
|
||||
if err := ib.ctx.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if ib.browser != nil {
|
||||
if err := ib.browser.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
if ib.browser != nil {
|
||||
if err := ib.browser.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if ib.pw != nil {
|
||||
if err := ib.pw.Stop(); err != nil {
|
||||
errs = append(errs, err)
|
||||
if ib.pw != nil {
|
||||
if err := ib.pw.Stop(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
|
||||
2
node.go
2
node.go
@@ -29,7 +29,7 @@ type node struct {
|
||||
}
|
||||
|
||||
func (n node) Type(input string) error {
|
||||
return n.locator.Type(input)
|
||||
return n.locator.PressSequentially(input)
|
||||
}
|
||||
|
||||
func (n node) Click() error {
|
||||
|
||||
@@ -122,12 +122,17 @@ func playwrightSameSiteToSameSite(s *playwright.SameSiteAttribute) SameSite {
|
||||
}
|
||||
|
||||
func cookieToPlaywrightOptionalCookie(cookie Cookie) playwright.OptionalCookie {
|
||||
expires := float64(cookie.Expires.Unix())
|
||||
if cookie.Expires.IsZero() || expires <= 0 {
|
||||
expires = -1
|
||||
}
|
||||
|
||||
oc := playwright.OptionalCookie{
|
||||
Name: cookie.Name,
|
||||
Value: cookie.Value,
|
||||
Domain: playwright.String(cookie.Host),
|
||||
Path: playwright.String(cookie.Path),
|
||||
Expires: playwright.Float(float64(cookie.Expires.Unix())),
|
||||
Expires: playwright.Float(expires),
|
||||
Secure: playwright.Bool(cookie.Secure),
|
||||
HttpOnly: playwright.Bool(cookie.HttpOnly),
|
||||
}
|
||||
@@ -195,6 +200,12 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
go func() {
|
||||
r := <-resultCh
|
||||
if r.err == nil && r.browser != nil {
|
||||
_ = r.browser.Close()
|
||||
}
|
||||
}()
|
||||
return nil, ctx.Err()
|
||||
case result := <-resultCh:
|
||||
return result.browser, result.err
|
||||
@@ -208,11 +219,35 @@ func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page
|
||||
return fmt.Errorf("error getting cookies from browser: %w", err)
|
||||
}
|
||||
|
||||
// Build a lookup of existing cookies so we can preserve their security
|
||||
// attributes. Chromium's Cookies() API can lose or normalize Secure,
|
||||
// SameSite, and HttpOnly during the AddCookies → navigate → Cookies()
|
||||
// round-trip, so we only update Value and Expires for cookies that
|
||||
// already exist in the jar.
|
||||
existing, err := b.cookieJar.Get(page.URL())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting existing cookies from jar: %w", err)
|
||||
}
|
||||
type cookieKey struct{ Name, Path string }
|
||||
existingMap := make(map[cookieKey]Cookie, len(existing))
|
||||
for _, c := range existing {
|
||||
existingMap[cookieKey{c.Name, c.Path}] = c
|
||||
}
|
||||
|
||||
for _, cookie := range cookies {
|
||||
// TODO: add support for deleting cookies from the jar which are deleted in the browser
|
||||
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
|
||||
c := playwrightCookieToCookie(cookie)
|
||||
|
||||
if err != nil {
|
||||
if prev, ok := existingMap[cookieKey{c.Name, c.Path}]; ok {
|
||||
// Preserve the original security attributes; only update
|
||||
// Value and Expires which are the fields that legitimately
|
||||
// change during navigation.
|
||||
c.Secure = prev.Secure
|
||||
c.HttpOnly = prev.HttpOnly
|
||||
c.SameSite = prev.SameSite
|
||||
}
|
||||
|
||||
if err = b.cookieJar.Set(c); err != nil {
|
||||
return fmt.Errorf("error setting cookie in cookie jar: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -242,6 +277,7 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
|
||||
|
||||
resp, err := page.Goto(target, pwOpts)
|
||||
if err != nil {
|
||||
_ = page.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -277,8 +313,8 @@ func (b playWrightBrowser) Open(ctx context.Context, url string, opts OpenPageOp
|
||||
|
||||
func (b playWrightBrowser) Close() error {
|
||||
return errors.Join(
|
||||
b.browser.Close(),
|
||||
b.ctx.Close(),
|
||||
b.browser.Close(),
|
||||
b.pw.Stop(),
|
||||
)
|
||||
}
|
||||
|
||||
65
promote.go
Normal file
65
promote.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package extractor
|
||||
|
||||
import "errors"
|
||||
|
||||
// ErrNotPromotable is returned when a Document cannot be promoted to an InteractiveBrowser.
|
||||
// This happens when the Document is not backed by a Playwright page (e.g. a mock or custom implementation).
|
||||
var ErrNotPromotable = errors.New("document is not promotable to InteractiveBrowser")
|
||||
|
||||
// ErrNotDemotable is returned when an InteractiveBrowser cannot be demoted to a Document.
|
||||
// This happens when the InteractiveBrowser is not backed by a Playwright page.
|
||||
var ErrNotDemotable = errors.New("interactive browser is not demotable to Document")
|
||||
|
||||
// ErrAlreadyDetached is returned when attempting to promote or demote an object that has
|
||||
// already been transferred. Each Document or InteractiveBrowser can only be promoted/demoted once.
|
||||
var ErrAlreadyDetached = errors.New("already detached")
|
||||
|
||||
// PromoteToInteractive transfers ownership of the underlying Playwright page from a Document
|
||||
// to a new InteractiveBrowser. After promotion, the Document's Close method becomes a no-op
|
||||
// (the page is now owned by the returned InteractiveBrowser).
|
||||
//
|
||||
// The caller must keep the original Browser alive while the promoted InteractiveBrowser is in use,
|
||||
// since the Browser still owns the Playwright process and browser instance.
|
||||
//
|
||||
// Returns ErrNotPromotable if the Document is not backed by a Playwright page,
|
||||
// or ErrAlreadyDetached if the Document was already promoted.
|
||||
func PromoteToInteractive(doc Document) (InteractiveBrowser, error) {
|
||||
d, ok := doc.(*document)
|
||||
if !ok {
|
||||
return nil, ErrNotPromotable
|
||||
}
|
||||
|
||||
if d.detached {
|
||||
return nil, ErrAlreadyDetached
|
||||
}
|
||||
|
||||
d.detached = true
|
||||
|
||||
return &interactiveBrowser{
|
||||
pw: d.pw,
|
||||
browser: d.browser,
|
||||
ctx: d.page.Context(),
|
||||
page: d.page,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// DemoteToDocument transfers ownership of the underlying Playwright page from an
|
||||
// InteractiveBrowser back to a new Document. After demotion, the InteractiveBrowser's
|
||||
// Close method becomes a no-op (the page is now owned by the returned Document).
|
||||
//
|
||||
// Returns ErrNotDemotable if the InteractiveBrowser is not backed by a Playwright page,
|
||||
// or ErrAlreadyDetached if the InteractiveBrowser was already demoted.
|
||||
func DemoteToDocument(ib InteractiveBrowser) (Document, error) {
|
||||
b, ok := ib.(*interactiveBrowser)
|
||||
if !ok {
|
||||
return nil, ErrNotDemotable
|
||||
}
|
||||
|
||||
if b.detached {
|
||||
return nil, ErrAlreadyDetached
|
||||
}
|
||||
|
||||
b.detached = true
|
||||
|
||||
return newDocument(b.pw, b.browser, b.page)
|
||||
}
|
||||
59
promote_test.go
Normal file
59
promote_test.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// mockInteractiveBrowser implements InteractiveBrowser for testing without Playwright.
|
||||
type mockInteractiveBrowser struct{}
|
||||
|
||||
func (m mockInteractiveBrowser) Navigate(string) (string, error) { return "", nil }
|
||||
func (m mockInteractiveBrowser) GoBack() (string, error) { return "", nil }
|
||||
func (m mockInteractiveBrowser) GoForward() (string, error) { return "", nil }
|
||||
func (m mockInteractiveBrowser) URL() string { return "" }
|
||||
func (m mockInteractiveBrowser) MouseClick(float64, float64, string) error { return nil }
|
||||
func (m mockInteractiveBrowser) MouseDown(float64, float64, string) error { return nil }
|
||||
func (m mockInteractiveBrowser) MouseUp(float64, float64, string) error { return nil }
|
||||
func (m mockInteractiveBrowser) MouseMove(float64, float64, ...int) error { return nil }
|
||||
func (m mockInteractiveBrowser) MouseWheel(float64, float64) error { return nil }
|
||||
func (m mockInteractiveBrowser) KeyboardType(string) error { return nil }
|
||||
func (m mockInteractiveBrowser) KeyboardPress(string) error { return nil }
|
||||
func (m mockInteractiveBrowser) KeyboardInsertText(string) error { return nil }
|
||||
func (m mockInteractiveBrowser) Screenshot(int) ([]byte, error) { return nil, nil }
|
||||
func (m mockInteractiveBrowser) Cookies() ([]Cookie, error) { return nil, nil }
|
||||
func (m mockInteractiveBrowser) SetDefaultTimeout(time.Duration) {}
|
||||
func (m mockInteractiveBrowser) Close() error { return nil }
|
||||
|
||||
func TestPromoteToInteractive_NonPromotable(t *testing.T) {
|
||||
doc := &mockDocument{}
|
||||
_, err := PromoteToInteractive(doc)
|
||||
if !errors.Is(err, ErrNotPromotable) {
|
||||
t.Fatalf("expected ErrNotPromotable, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromoteToInteractive_AlreadyDetached(t *testing.T) {
|
||||
d := &document{detached: true}
|
||||
_, err := PromoteToInteractive(d)
|
||||
if !errors.Is(err, ErrAlreadyDetached) {
|
||||
t.Fatalf("expected ErrAlreadyDetached, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDemoteToDocument_NonDemotable(t *testing.T) {
|
||||
ib := &mockInteractiveBrowser{}
|
||||
_, err := DemoteToDocument(ib)
|
||||
if !errors.Is(err, ErrNotDemotable) {
|
||||
t.Fatalf("expected ErrNotDemotable, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDemoteToDocument_AlreadyDetached(t *testing.T) {
|
||||
ib := &interactiveBrowser{detached: true}
|
||||
_, err := DemoteToDocument(ib)
|
||||
if !errors.Is(err, ErrAlreadyDetached) {
|
||||
t.Fatalf("expected ErrAlreadyDetached, got: %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user