feat(archive): keep page open on captcha-status errors so callers can promote
Adds OpenPageOptions.AllowNonOKStatus. When set, openPage no longer closes the page on non-2xx (other than 404) and Open returns both a usable Document and ErrInvalidStatusCode. archive.IsArchived and Archive opt in, so callers can PromoteToInteractive the captcha page, hand it to a human solver, and demote back to extract content from the same browser instance — avoiding the cf_clearance fingerprint-binding issue that re-challenges any fresh retry browser. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+24
-10
@@ -293,13 +293,19 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
|
||||
slog.Info("opened document", "url", target, "status", resp.Status(), "request", resp.Request())
|
||||
|
||||
if resp.Status() < 200 || resp.Status() >= 300 {
|
||||
_ = page.Close()
|
||||
|
||||
if resp.Status() == 404 {
|
||||
_ = page.Close()
|
||||
return nil, ErrPageNotFound
|
||||
}
|
||||
slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request())
|
||||
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
|
||||
statusErr := fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
|
||||
if !opts.AllowNonOKStatus {
|
||||
_ = page.Close()
|
||||
return nil, statusErr
|
||||
}
|
||||
// Caller asked to keep the page on non-2xx (e.g. to interact with a
|
||||
// Cloudflare captcha page that returned 403). Return both.
|
||||
return page, statusErr
|
||||
}
|
||||
|
||||
return page, nil
|
||||
@@ -307,17 +313,25 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
|
||||
|
||||
func (b playWrightBrowser) Open(ctx context.Context, url string, opts OpenPageOptions) (Document, error) {
|
||||
|
||||
page, err := b.openPage(ctx, url, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
page, openErr := b.openPage(ctx, url, opts)
|
||||
if page == nil {
|
||||
return nil, openErr
|
||||
}
|
||||
|
||||
err = b.updateCookies(ctx, page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if cookieErr := b.updateCookies(ctx, page); cookieErr != nil {
|
||||
_ = page.Close()
|
||||
return nil, cookieErr
|
||||
}
|
||||
|
||||
return newDocument(b.pw, b.browser, page)
|
||||
doc, docErr := newDocument(b.pw, b.browser, page)
|
||||
if docErr != nil {
|
||||
_ = page.Close()
|
||||
return nil, docErr
|
||||
}
|
||||
|
||||
// openErr may be ErrInvalidStatusCode when AllowNonOKStatus was set; the
|
||||
// page is still usable, so propagate both the doc and the error.
|
||||
return doc, openErr
|
||||
}
|
||||
|
||||
func (b playWrightBrowser) Close() error {
|
||||
|
||||
Reference in New Issue
Block a user