feat(archive): keep page open on captcha-status errors so callers can promote
CI / test (push) Successful in 2m6s
CI / vet (push) Successful in 1m21s
CI / build (push) Successful in 2m13s

Adds OpenPageOptions.AllowNonOKStatus. When set, openPage no longer closes
the page on non-2xx (other than 404) and Open returns both a usable Document
and ErrInvalidStatusCode. archive.IsArchived and Archive opt in, so callers
can PromoteToInteractive the captcha page, hand it to a human solver, and
demote back to extract content from the same browser instance — avoiding
the cf_clearance fingerprint-binding issue that re-challenges any fresh
retry browser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 00:29:39 +00:00
parent 841f1ec2bf
commit 3b38637e56
3 changed files with 53 additions and 15 deletions
+24 -10
View File
@@ -293,13 +293,19 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
slog.Info("opened document", "url", target, "status", resp.Status(), "request", resp.Request())
if resp.Status() < 200 || resp.Status() >= 300 {
_ = page.Close()
if resp.Status() == 404 {
_ = page.Close()
return nil, ErrPageNotFound
}
slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request())
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
statusErr := fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
if !opts.AllowNonOKStatus {
_ = page.Close()
return nil, statusErr
}
// Caller asked to keep the page on non-2xx (e.g. to interact with a
// Cloudflare captcha page that returned 403). Return both.
return page, statusErr
}
return page, nil
@@ -307,17 +313,25 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
func (b playWrightBrowser) Open(ctx context.Context, url string, opts OpenPageOptions) (Document, error) {
page, err := b.openPage(ctx, url, opts)
if err != nil {
return nil, err
page, openErr := b.openPage(ctx, url, opts)
if page == nil {
return nil, openErr
}
err = b.updateCookies(ctx, page)
if err != nil {
return nil, err
if cookieErr := b.updateCookies(ctx, page); cookieErr != nil {
_ = page.Close()
return nil, cookieErr
}
return newDocument(b.pw, b.browser, page)
doc, docErr := newDocument(b.pw, b.browser, page)
if docErr != nil {
_ = page.Close()
return nil, docErr
}
// openErr may be ErrInvalidStatusCode when AllowNonOKStatus was set; the
// page is still usable, so propagate both the doc and the error.
return doc, openErr
}
func (b playWrightBrowser) Close() error {