From 841f1ec2bf46c5c1956c271175b464065427e5fc Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Mon, 27 Apr 2026 23:25:28 +0000 Subject: [PATCH] feat(duckduckgo): detect anti-bot challenge and surface ErrBlocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DuckDuckGo intermittently serves a CAPTCHA modal ("Unfortunately, bots use DuckDuckGo too...") instead of search results. The result selector matches zero elements on that page, so callers used to get ([]Result{}, nil) — silent empty results that look like "no matches." Detect the challenge via the BEM class .anomaly-modal__title and return a typed ErrBlocked so callers can distinguish blocked from no-results and react (retry, fallback to another engine, surface to user). Co-Authored-By: Claude Opus 4.7 --- sites/duckduckgo/duckduckgo.go | 5 +++++ sites/duckduckgo/extract_test.go | 21 +++++++++++++++++++++ sites/duckduckgo/page.go | 14 ++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/sites/duckduckgo/duckduckgo.go b/sites/duckduckgo/duckduckgo.go index de07d97..64ee878 100644 --- a/sites/duckduckgo/duckduckgo.go +++ b/sites/duckduckgo/duckduckgo.go @@ -2,6 +2,7 @@ package duckduckgo import ( "context" + "errors" "fmt" "log/slog" "net/url" @@ -9,6 +10,10 @@ import ( "gitea.stevedudenhoeffer.com/steve/go-extractor" ) +// ErrBlocked is returned when DuckDuckGo serves an anti-bot challenge +// page instead of search results. +var ErrBlocked = errors.New("duckduckgo: blocked by anti-bot challenge") + type SafeSearch int const ( diff --git a/sites/duckduckgo/extract_test.go b/sites/duckduckgo/extract_test.go index 78c6678..4fbe993 100644 --- a/sites/duckduckgo/extract_test.go +++ b/sites/duckduckgo/extract_test.go @@ -2,6 +2,7 @@ package duckduckgo import ( "context" + "errors" "testing" "gitea.stevedudenhoeffer.com/steve/go-extractor" @@ -106,6 +107,26 @@ func TestExtractResults_NoLinks(t *testing.T) { } } +func TestExtractResults_Blocked(t *testing.T) { + doc := &extractortest.MockDocument{ + MockNode: extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + ".anomaly-modal__title": { + &extractortest.MockNode{TextValue: "Unfortunately, bots use DuckDuckGo too."}, + }, + }, + }, + } + + results, err := extractResults(doc) + if !errors.Is(err, ErrBlocked) { + t.Fatalf("expected ErrBlocked, got %v", err) + } + if results != nil { + t.Errorf("expected nil results when blocked, got %v", results) + } +} + func TestSearch_UsesMockBrowser(t *testing.T) { doc := &extractortest.MockDocument{ URLValue: "https://duckduckgo.com/?q=test", diff --git a/sites/duckduckgo/page.go b/sites/duckduckgo/page.go index 88f2bae..44039b9 100644 --- a/sites/duckduckgo/page.go +++ b/sites/duckduckgo/page.go @@ -23,9 +23,17 @@ func (s searchPage) GetResults() ([]Result, error) { } // extractResults parses search results from a DuckDuckGo results page. +// +// If the page is an anti-bot challenge ("Unfortunately, bots use DuckDuckGo +// too...") rather than results, returns ErrBlocked so callers can distinguish +// "blocked" from "no matches." func extractResults(doc extractor.Node) ([]Result, error) { var res []Result + if isBlocked(doc) { + return nil, ErrBlocked + } + err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error { var r Result @@ -68,6 +76,12 @@ func extractResults(doc extractor.Node) ([]Result, error) { return res, err } +// isBlocked reports whether the page is the DuckDuckGo anti-bot challenge +// modal (".anomaly-modal__title") rather than a normal results page. +func isBlocked(doc extractor.Node) bool { + return len(doc.Select(".anomaly-modal__title")) > 0 +} + func (s searchPage) LoadMore() error { return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error { slog.Info("clicking load more", "node", n)