feat(duckduckgo): detect anti-bot challenge and surface ErrBlocked
DuckDuckGo intermittently serves a CAPTCHA modal ("Unfortunately, bots
use DuckDuckGo too...") instead of search results. The result selector
matches zero elements on that page, so callers used to get
([]Result{}, nil) — silent empty results that look like "no matches."
Detect the challenge via the BEM class .anomaly-modal__title and return
a typed ErrBlocked so callers can distinguish blocked from no-results
and react (retry, fallback to another engine, surface to user).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@ package duckduckgo
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
@@ -9,6 +10,10 @@ import (
|
|||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ErrBlocked is returned when DuckDuckGo serves an anti-bot challenge
|
||||||
|
// page instead of search results.
|
||||||
|
var ErrBlocked = errors.New("duckduckgo: blocked by anti-bot challenge")
|
||||||
|
|
||||||
type SafeSearch int
|
type SafeSearch int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package duckduckgo
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
@@ -106,6 +107,26 @@ func TestExtractResults_NoLinks(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractResults_Blocked(t *testing.T) {
|
||||||
|
doc := &extractortest.MockDocument{
|
||||||
|
MockNode: extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
".anomaly-modal__title": {
|
||||||
|
&extractortest.MockNode{TextValue: "Unfortunately, bots use DuckDuckGo too."},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
results, err := extractResults(doc)
|
||||||
|
if !errors.Is(err, ErrBlocked) {
|
||||||
|
t.Fatalf("expected ErrBlocked, got %v", err)
|
||||||
|
}
|
||||||
|
if results != nil {
|
||||||
|
t.Errorf("expected nil results when blocked, got %v", results)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSearch_UsesMockBrowser(t *testing.T) {
|
func TestSearch_UsesMockBrowser(t *testing.T) {
|
||||||
doc := &extractortest.MockDocument{
|
doc := &extractortest.MockDocument{
|
||||||
URLValue: "https://duckduckgo.com/?q=test",
|
URLValue: "https://duckduckgo.com/?q=test",
|
||||||
|
|||||||
@@ -23,9 +23,17 @@ func (s searchPage) GetResults() ([]Result, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// extractResults parses search results from a DuckDuckGo results page.
|
// extractResults parses search results from a DuckDuckGo results page.
|
||||||
|
//
|
||||||
|
// If the page is an anti-bot challenge ("Unfortunately, bots use DuckDuckGo
|
||||||
|
// too...") rather than results, returns ErrBlocked so callers can distinguish
|
||||||
|
// "blocked" from "no matches."
|
||||||
func extractResults(doc extractor.Node) ([]Result, error) {
|
func extractResults(doc extractor.Node) ([]Result, error) {
|
||||||
var res []Result
|
var res []Result
|
||||||
|
|
||||||
|
if isBlocked(doc) {
|
||||||
|
return nil, ErrBlocked
|
||||||
|
}
|
||||||
|
|
||||||
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||||
var r Result
|
var r Result
|
||||||
|
|
||||||
@@ -68,6 +76,12 @@ func extractResults(doc extractor.Node) ([]Result, error) {
|
|||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isBlocked reports whether the page is the DuckDuckGo anti-bot challenge
|
||||||
|
// modal (".anomaly-modal__title") rather than a normal results page.
|
||||||
|
func isBlocked(doc extractor.Node) bool {
|
||||||
|
return len(doc.Select(".anomaly-modal__title")) > 0
|
||||||
|
}
|
||||||
|
|
||||||
func (s searchPage) LoadMore() error {
|
func (s searchPage) LoadMore() error {
|
||||||
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
|
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
|
||||||
slog.Info("clicking load more", "node", n)
|
slog.Info("clicking load more", "node", n)
|
||||||
|
|||||||
Reference in New Issue
Block a user