From e0da88b9b02db5ede2dc2f4e76557cef703933f8 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Tue, 24 Feb 2026 02:27:42 +0000 Subject: [PATCH] feat: add PromoteToInteractive and DemoteToDocument for mid-session page transfer Allow transferring ownership of a Playwright page between Document and InteractiveBrowser modes without tearing down the browser. This enables handing a live page to a human (e.g. for captcha solving) and resuming scraping on the same page afterward. Closes #76 Co-Authored-By: Claude Opus 4.6 --- document.go | 10 +++++--- interactive.go | 47 ++++++++++++++++++++--------------- promote.go | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ promote_test.go | 55 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 22 deletions(-) create mode 100644 promote.go create mode 100644 promote_test.go diff --git a/document.go b/document.go index 532758c..c7692b2 100644 --- a/document.go +++ b/document.go @@ -22,9 +22,10 @@ type Document interface { type document struct { node - pw *playwright.Playwright - browser playwright.Browser - page playwright.Page + pw *playwright.Playwright + browser playwright.Browser + page playwright.Page + detached bool } func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) { @@ -44,6 +45,9 @@ func newDocument(pw *playwright.Playwright, browser playwright.Browser, page pla return res, nil } func (d *document) Close() error { + if d.detached { + return nil + } return d.page.Close() } diff --git a/interactive.go b/interactive.go index 29bc577..aad8558 100644 --- a/interactive.go +++ b/interactive.go @@ -48,10 +48,12 @@ type InteractiveBrowser interface { } type interactiveBrowser struct { - pw *playwright.Playwright - browser playwright.Browser - ctx playwright.BrowserContext - page playwright.Page + pw *playwright.Playwright + browser playwright.Browser + ctx playwright.BrowserContext + page playwright.Page + ownsInfrastructure bool + detached bool } // NewInteractiveBrowser creates a headless browser with a page ready for interactive control. @@ -94,10 +96,11 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac ch <- result{ ib: &interactiveBrowser{ - pw: res.pw, - browser: res.browser, - ctx: res.bctx, - page: page, + pw: res.pw, + browser: res.browser, + ctx: res.bctx, + page: page, + ownsInfrastructure: true, }, } }() @@ -194,25 +197,31 @@ func (ib *interactiveBrowser) Cookies() ([]Cookie, error) { } func (ib *interactiveBrowser) Close() error { + if ib.detached { + return nil + } + var errs []error if ib.page != nil { if err := ib.page.Close(); err != nil { errs = append(errs, err) } } - if ib.ctx != nil { - if err := ib.ctx.Close(); err != nil { - errs = append(errs, err) + if ib.ownsInfrastructure { + if ib.ctx != nil { + if err := ib.ctx.Close(); err != nil { + errs = append(errs, err) + } } - } - if ib.browser != nil { - if err := ib.browser.Close(); err != nil { - errs = append(errs, err) + if ib.browser != nil { + if err := ib.browser.Close(); err != nil { + errs = append(errs, err) + } } - } - if ib.pw != nil { - if err := ib.pw.Stop(); err != nil { - errs = append(errs, err) + if ib.pw != nil { + if err := ib.pw.Stop(); err != nil { + errs = append(errs, err) + } } } if len(errs) > 0 { diff --git a/promote.go b/promote.go new file mode 100644 index 0000000..d4aeaab --- /dev/null +++ b/promote.go @@ -0,0 +1,65 @@ +package extractor + +import "errors" + +// ErrNotPromotable is returned when a Document cannot be promoted to an InteractiveBrowser. +// This happens when the Document is not backed by a Playwright page (e.g. a mock or custom implementation). +var ErrNotPromotable = errors.New("document is not promotable to InteractiveBrowser") + +// ErrNotDemotable is returned when an InteractiveBrowser cannot be demoted to a Document. +// This happens when the InteractiveBrowser is not backed by a Playwright page. +var ErrNotDemotable = errors.New("interactive browser is not demotable to Document") + +// ErrAlreadyDetached is returned when attempting to promote or demote an object that has +// already been transferred. Each Document or InteractiveBrowser can only be promoted/demoted once. +var ErrAlreadyDetached = errors.New("already detached") + +// PromoteToInteractive transfers ownership of the underlying Playwright page from a Document +// to a new InteractiveBrowser. After promotion, the Document's Close method becomes a no-op +// (the page is now owned by the returned InteractiveBrowser). +// +// The caller must keep the original Browser alive while the promoted InteractiveBrowser is in use, +// since the Browser still owns the Playwright process and browser instance. +// +// Returns ErrNotPromotable if the Document is not backed by a Playwright page, +// or ErrAlreadyDetached if the Document was already promoted. +func PromoteToInteractive(doc Document) (InteractiveBrowser, error) { + d, ok := doc.(*document) + if !ok { + return nil, ErrNotPromotable + } + + if d.detached { + return nil, ErrAlreadyDetached + } + + d.detached = true + + return &interactiveBrowser{ + pw: d.pw, + browser: d.browser, + ctx: d.page.Context(), + page: d.page, + }, nil +} + +// DemoteToDocument transfers ownership of the underlying Playwright page from an +// InteractiveBrowser back to a new Document. After demotion, the InteractiveBrowser's +// Close method becomes a no-op (the page is now owned by the returned Document). +// +// Returns ErrNotDemotable if the InteractiveBrowser is not backed by a Playwright page, +// or ErrAlreadyDetached if the InteractiveBrowser was already demoted. +func DemoteToDocument(ib InteractiveBrowser) (Document, error) { + b, ok := ib.(*interactiveBrowser) + if !ok { + return nil, ErrNotDemotable + } + + if b.detached { + return nil, ErrAlreadyDetached + } + + b.detached = true + + return newDocument(b.pw, b.browser, b.page) +} diff --git a/promote_test.go b/promote_test.go new file mode 100644 index 0000000..88a8b9d --- /dev/null +++ b/promote_test.go @@ -0,0 +1,55 @@ +package extractor + +import ( + "errors" + "testing" +) + +// mockInteractiveBrowser implements InteractiveBrowser for testing without Playwright. +type mockInteractiveBrowser struct{} + +func (m mockInteractiveBrowser) Navigate(string) (string, error) { return "", nil } +func (m mockInteractiveBrowser) GoBack() (string, error) { return "", nil } +func (m mockInteractiveBrowser) GoForward() (string, error) { return "", nil } +func (m mockInteractiveBrowser) URL() string { return "" } +func (m mockInteractiveBrowser) MouseClick(float64, float64, string) error { return nil } +func (m mockInteractiveBrowser) MouseMove(float64, float64) error { return nil } +func (m mockInteractiveBrowser) MouseWheel(float64, float64) error { return nil } +func (m mockInteractiveBrowser) KeyboardType(string) error { return nil } +func (m mockInteractiveBrowser) KeyboardPress(string) error { return nil } +func (m mockInteractiveBrowser) KeyboardInsertText(string) error { return nil } +func (m mockInteractiveBrowser) Screenshot(int) ([]byte, error) { return nil, nil } +func (m mockInteractiveBrowser) Cookies() ([]Cookie, error) { return nil, nil } +func (m mockInteractiveBrowser) Close() error { return nil } + +func TestPromoteToInteractive_NonPromotable(t *testing.T) { + doc := &mockDocument{} + _, err := PromoteToInteractive(doc) + if !errors.Is(err, ErrNotPromotable) { + t.Fatalf("expected ErrNotPromotable, got: %v", err) + } +} + +func TestPromoteToInteractive_AlreadyDetached(t *testing.T) { + d := &document{detached: true} + _, err := PromoteToInteractive(d) + if !errors.Is(err, ErrAlreadyDetached) { + t.Fatalf("expected ErrAlreadyDetached, got: %v", err) + } +} + +func TestDemoteToDocument_NonDemotable(t *testing.T) { + ib := &mockInteractiveBrowser{} + _, err := DemoteToDocument(ib) + if !errors.Is(err, ErrNotDemotable) { + t.Fatalf("expected ErrNotDemotable, got: %v", err) + } +} + +func TestDemoteToDocument_AlreadyDetached(t *testing.T) { + ib := &interactiveBrowser{detached: true} + _, err := DemoteToDocument(ib) + if !errors.Is(err, ErrAlreadyDetached) { + t.Fatalf("expected ErrAlreadyDetached, got: %v", err) + } +}