841f1ec2bf
DuckDuckGo intermittently serves a CAPTCHA modal ("Unfortunately, bots
use DuckDuckGo too...") instead of search results. The result selector
matches zero elements on that page, so callers used to get
([]Result{}, nil) — silent empty results that look like "no matches."
Detect the challenge via the BEM class .anomaly-modal__title and return
a typed ErrBlocked so callers can distinguish blocked from no-results
and react (retry, fallback to another engine, surface to user).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
159 lines
4.0 KiB
Go
159 lines
4.0 KiB
Go
package duckduckgo
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
|
|
)
|
|
|
|
func makeResultNode(url, title, desc string) *extractortest.MockNode {
|
|
return &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`a[href][target="_self"]`: {
|
|
&extractortest.MockNode{Attrs: map[string]string{"href": url}},
|
|
},
|
|
"h2": {
|
|
&extractortest.MockNode{TextValue: title},
|
|
},
|
|
"span > span": {
|
|
&extractortest.MockNode{TextValue: desc},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func TestExtractResults(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
URLValue: "https://duckduckgo.com/?q=test",
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`article[id^="r1-"]`: {
|
|
makeResultNode("https://example.com", "Example", "An example site"),
|
|
makeResultNode("https://golang.org", "Go", "Go programming language"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
results, err := extractResults(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractResults() error: %v", err)
|
|
}
|
|
|
|
if len(results) != 2 {
|
|
t.Fatalf("expected 2 results, got %d", len(results))
|
|
}
|
|
|
|
if results[0].URL != "https://example.com" {
|
|
t.Errorf("results[0].URL = %q, want %q", results[0].URL, "https://example.com")
|
|
}
|
|
if results[0].Title != "Example" {
|
|
t.Errorf("results[0].Title = %q, want %q", results[0].Title, "Example")
|
|
}
|
|
if results[0].Description != "An example site" {
|
|
t.Errorf("results[0].Description = %q, want %q", results[0].Description, "An example site")
|
|
}
|
|
|
|
if results[1].URL != "https://golang.org" {
|
|
t.Errorf("results[1].URL = %q, want %q", results[1].URL, "https://golang.org")
|
|
}
|
|
if results[1].Title != "Go" {
|
|
t.Errorf("results[1].Title = %q, want %q", results[1].Title, "Go")
|
|
}
|
|
}
|
|
|
|
func TestExtractResults_NoResults(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`article[id^="r1-"]`: {},
|
|
},
|
|
},
|
|
}
|
|
|
|
results, err := extractResults(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractResults() error: %v", err)
|
|
}
|
|
if len(results) != 0 {
|
|
t.Errorf("expected 0 results, got %d", len(results))
|
|
}
|
|
}
|
|
|
|
func TestExtractResults_NoLinks(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`article[id^="r1-"]`: {
|
|
&extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`a[href][target="_self"]`: {},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
results, err := extractResults(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractResults() error: %v", err)
|
|
}
|
|
if len(results) != 0 {
|
|
t.Errorf("expected 0 results (no links), got %d", len(results))
|
|
}
|
|
}
|
|
|
|
func TestExtractResults_Blocked(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
".anomaly-modal__title": {
|
|
&extractortest.MockNode{TextValue: "Unfortunately, bots use DuckDuckGo too."},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
results, err := extractResults(doc)
|
|
if !errors.Is(err, ErrBlocked) {
|
|
t.Fatalf("expected ErrBlocked, got %v", err)
|
|
}
|
|
if results != nil {
|
|
t.Errorf("expected nil results when blocked, got %v", results)
|
|
}
|
|
}
|
|
|
|
func TestSearch_UsesMockBrowser(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
URLValue: "https://duckduckgo.com/?q=test",
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
`article[id^="r1-"]`: {
|
|
makeResultNode("https://example.com", "Example", "Example description"),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
browser := &extractortest.MockBrowser{
|
|
Documents: map[string]*extractortest.MockDocument{
|
|
"https://duckduckgo.com/?kp=-2&q=test": doc,
|
|
},
|
|
}
|
|
|
|
results, err := DefaultConfig.Search(context.Background(), browser, "test")
|
|
if err != nil {
|
|
t.Fatalf("Search() error: %v", err)
|
|
}
|
|
if len(results) != 1 {
|
|
t.Fatalf("expected 1 result, got %d", len(results))
|
|
}
|
|
if results[0].URL != "https://example.com" {
|
|
t.Errorf("results[0].URL = %q, want %q", results[0].URL, "https://example.com")
|
|
}
|
|
}
|