feat(duckduckgo): detect anti-bot challenge and surface ErrBlocked
DuckDuckGo intermittently serves a CAPTCHA modal ("Unfortunately, bots
use DuckDuckGo too...") instead of search results. The result selector
matches zero elements on that page, so callers used to get
([]Result{}, nil) — silent empty results that look like "no matches."
Detect the challenge via the BEM class .anomaly-modal__title and return
a typed ErrBlocked so callers can distinguish blocked from no-results
and react (retry, fallback to another engine, surface to user).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -23,9 +23,17 @@ func (s searchPage) GetResults() ([]Result, error) {
|
||||
}
|
||||
|
||||
// extractResults parses search results from a DuckDuckGo results page.
|
||||
//
|
||||
// If the page is an anti-bot challenge ("Unfortunately, bots use DuckDuckGo
|
||||
// too...") rather than results, returns ErrBlocked so callers can distinguish
|
||||
// "blocked" from "no matches."
|
||||
func extractResults(doc extractor.Node) ([]Result, error) {
|
||||
var res []Result
|
||||
|
||||
if isBlocked(doc) {
|
||||
return nil, ErrBlocked
|
||||
}
|
||||
|
||||
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||
var r Result
|
||||
|
||||
@@ -68,6 +76,12 @@ func extractResults(doc extractor.Node) ([]Result, error) {
|
||||
return res, err
|
||||
}
|
||||
|
||||
// isBlocked reports whether the page is the DuckDuckGo anti-bot challenge
|
||||
// modal (".anomaly-modal__title") rather than a normal results page.
|
||||
func isBlocked(doc extractor.Node) bool {
|
||||
return len(doc.Select(".anomaly-modal__title")) > 0
|
||||
}
|
||||
|
||||
func (s searchPage) LoadMore() error {
|
||||
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
|
||||
slog.Info("clicking load more", "node", n)
|
||||
|
||||
Reference in New Issue
Block a user