Compare commits

...

6 Commits

Author SHA1 Message Date
3b6d864330 Preserve cookie security attributes in updateCookies round-trip
All checks were successful
CI / build (push) Successful in 1m18s
CI / vet (push) Successful in 1m17s
CI / test (push) Successful in 1m19s
Chromium's Cookies() API can lose or normalize Secure, SameSite, and
HttpOnly attributes during the AddCookies → navigate → Cookies()
round-trip. This caused cookies like cf_clearance (set with
Secure=true, SameSite=None) to be overwritten with Chromium's defaults
(Secure=false, SameSite=Lax).

Now updateCookies() looks up existing cookies in the jar first. For
cookies that already exist, only Value and Expires are updated —
security attributes are preserved from the original. New cookies from
the server are still written with all their attributes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:37:47 +00:00
adefaaef36 feat: add SetDefaultTimeout to InteractiveBrowser interface
All checks were successful
CI / test (push) Successful in 45s
CI / vet (push) Successful in 49s
CI / build (push) Successful in 50s
Adds SetDefaultTimeout(time.Duration) to the InteractiveBrowser interface,
delegating to Playwright's Page and BrowserContext SetDefaultTimeout and
SetDefaultNavigationTimeout methods. This allows callers to set a timeout
so Playwright operations return an error instead of blocking forever when
the browser process crashes or the remote server becomes unresponsive.

Closes #86

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 01:59:16 +00:00
d89031b20d Merge pull request 'fix: sanitize cookie expires for Playwright compatibility' (#85) from fix/84-cookie-expires-validation into main
All checks were successful
CI / vet (push) Successful in 2m48s
CI / build (push) Successful in 2m50s
CI / test (push) Successful in 2m51s
2026-03-13 01:29:07 +00:00
84e811572b fix: sanitize cookie expires for Playwright compatibility
All checks were successful
CI / build (pull_request) Successful in 1m51s
CI / vet (pull_request) Successful in 2m38s
CI / test (pull_request) Successful in 2m59s
Playwright requires cookie expires to be either -1 (session cookie) or
a positive unix timestamp. When a cookie has no expiry (zero time.Time),
.Unix() returns -62135596800 which Playwright rejects. Cookies with
non-positive timestamps (e.g. Cloudflare's __cf_bm) also fail.

Now treats zero time or non-positive unix timestamps as session cookies
by setting expires to -1.

Fixes #84

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 01:28:45 +00:00
61b68adfd0 Merge pull request 'fix: address Playwright API audit findings' (#83) from fix/playwright-api-audit into main
All checks were successful
CI / test (push) Successful in 34s
CI / vet (push) Successful in 47s
CI / build (push) Successful in 48s
Reviewed-on: #83
2026-03-02 04:59:43 +00:00
0447f1bdbe fix: address Playwright API audit findings
All checks were successful
CI / test (pull_request) Successful in 34s
CI / vet (pull_request) Successful in 48s
CI / build (pull_request) Successful in 49s
- Replace deprecated Locator.Type() with PressSequentially() (node.go)
- Close page on Goto failure to prevent resource leak (playwright.go)
- Fix teardown order: close context before browser (playwright.go)
- Clean up resources on NewPage failure (interactive.go)
- Spawn cleanup goroutine on context cancellation in both constructors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 04:54:00 +00:00
4 changed files with 70 additions and 5 deletions

View File

@@ -48,6 +48,16 @@ type InteractiveBrowser interface {
// Cookies returns all cookies from the browser context.
Cookies() ([]Cookie, error)
// SetDefaultTimeout sets the default timeout for all Playwright operations
// (navigation, clicks, screenshots, cookie extraction, etc.). A value of 0
// disables timeouts. By default, Playwright uses a 30-second timeout.
//
// This is the primary mechanism for preventing hung sessions: callers can
// set a timeout so that any Playwright call returns an error instead of
// blocking forever if the browser process crashes or the remote server
// becomes unresponsive.
SetDefaultTimeout(timeout time.Duration)
// Close tears down the browser.
Close() error
}
@@ -95,6 +105,9 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
page, err := res.bctx.NewPage()
if err != nil {
_ = res.bctx.Close()
_ = res.browser.Close()
_ = res.pw.Stop()
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
return
}
@@ -112,6 +125,12 @@ func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (Interac
select {
case <-ctx.Done():
go func() {
r := <-ch
if r.err == nil && r.ib != nil {
_ = r.ib.Close()
}
}()
return nil, ctx.Err()
case r := <-ch:
return r.ib, r.err
@@ -237,6 +256,14 @@ func (ib *interactiveBrowser) Cookies() ([]Cookie, error) {
return cookies, nil
}
func (ib *interactiveBrowser) SetDefaultTimeout(timeout time.Duration) {
ms := float64(timeout.Milliseconds())
ib.page.SetDefaultTimeout(ms)
ib.page.SetDefaultNavigationTimeout(ms)
ib.ctx.SetDefaultTimeout(ms)
ib.ctx.SetDefaultNavigationTimeout(ms)
}
func (ib *interactiveBrowser) Close() error {
if ib.detached {
return nil

View File

@@ -29,7 +29,7 @@ type node struct {
}
func (n node) Type(input string) error {
return n.locator.Type(input)
return n.locator.PressSequentially(input)
}
func (n node) Click() error {

View File

@@ -122,12 +122,17 @@ func playwrightSameSiteToSameSite(s *playwright.SameSiteAttribute) SameSite {
}
func cookieToPlaywrightOptionalCookie(cookie Cookie) playwright.OptionalCookie {
expires := float64(cookie.Expires.Unix())
if cookie.Expires.IsZero() || expires <= 0 {
expires = -1
}
oc := playwright.OptionalCookie{
Name: cookie.Name,
Value: cookie.Value,
Domain: playwright.String(cookie.Host),
Path: playwright.String(cookie.Path),
Expires: playwright.Float(float64(cookie.Expires.Unix())),
Expires: playwright.Float(expires),
Secure: playwright.Bool(cookie.Secure),
HttpOnly: playwright.Bool(cookie.HttpOnly),
}
@@ -195,6 +200,12 @@ func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
select {
case <-ctx.Done():
go func() {
r := <-resultCh
if r.err == nil && r.browser != nil {
_ = r.browser.Close()
}
}()
return nil, ctx.Err()
case result := <-resultCh:
return result.browser, result.err
@@ -208,11 +219,35 @@ func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page
return fmt.Errorf("error getting cookies from browser: %w", err)
}
// Build a lookup of existing cookies so we can preserve their security
// attributes. Chromium's Cookies() API can lose or normalize Secure,
// SameSite, and HttpOnly during the AddCookies → navigate → Cookies()
// round-trip, so we only update Value and Expires for cookies that
// already exist in the jar.
existing, err := b.cookieJar.Get(page.URL())
if err != nil {
return fmt.Errorf("error getting existing cookies from jar: %w", err)
}
type cookieKey struct{ Name, Path string }
existingMap := make(map[cookieKey]Cookie, len(existing))
for _, c := range existing {
existingMap[cookieKey{c.Name, c.Path}] = c
}
for _, cookie := range cookies {
// TODO: add support for deleting cookies from the jar which are deleted in the browser
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
c := playwrightCookieToCookie(cookie)
if err != nil {
if prev, ok := existingMap[cookieKey{c.Name, c.Path}]; ok {
// Preserve the original security attributes; only update
// Value and Expires which are the fields that legitimately
// change during navigation.
c.Secure = prev.Secure
c.HttpOnly = prev.HttpOnly
c.SameSite = prev.SameSite
}
if err = b.cookieJar.Set(c); err != nil {
return fmt.Errorf("error setting cookie in cookie jar: %w", err)
}
}
@@ -242,6 +277,7 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
resp, err := page.Goto(target, pwOpts)
if err != nil {
_ = page.Close()
return nil, err
}
@@ -277,8 +313,8 @@ func (b playWrightBrowser) Open(ctx context.Context, url string, opts OpenPageOp
func (b playWrightBrowser) Close() error {
return errors.Join(
b.browser.Close(),
b.ctx.Close(),
b.browser.Close(),
b.pw.Stop(),
)
}

View File

@@ -3,6 +3,7 @@ package extractor
import (
"errors"
"testing"
"time"
)
// mockInteractiveBrowser implements InteractiveBrowser for testing without Playwright.
@@ -22,6 +23,7 @@ func (m mockInteractiveBrowser) KeyboardPress(string) error { return
func (m mockInteractiveBrowser) KeyboardInsertText(string) error { return nil }
func (m mockInteractiveBrowser) Screenshot(int) ([]byte, error) { return nil, nil }
func (m mockInteractiveBrowser) Cookies() ([]Cookie, error) { return nil, nil }
func (m mockInteractiveBrowser) SetDefaultTimeout(time.Duration) {}
func (m mockInteractiveBrowser) Close() error { return nil }
func TestPromoteToInteractive_NonPromotable(t *testing.T) {