Files
go-extractor/sites/duckduckgo/weather_test.go
Steve Dudenhoeffer 8c2848246b
All checks were successful
CI / build (pull_request) Successful in 1m11s
CI / vet (pull_request) Successful in 1m12s
CI / test (pull_request) Successful in 1m17s
fix: use structural selectors for DDG weather to handle advisory banners
The weather extractor used positional CSS selectors (div:first-child,
div:nth-child(2)) to locate the header and hourly container within the
widget section. When DuckDuckGo inserts advisory banners (e.g. wind
advisory), the extra div shifts positions and breaks extraction of
current temp, hourly data, humidity, and wind.

Replace with structural selectors:
- div:not(:has(ul)) for the header (first div without a list)
- div:has(> ul) for the hourly container (div with direct ul child)

These match elements by their content structure rather than position,
so advisory banners no longer break extraction.

Fixes #64

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 18:22:53 +00:00

588 lines
17 KiB
Go

package duckduckgo
import (
"context"
"testing"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
)
func makeWeatherDoc() *extractortest.MockDocument {
// Mock mirrors the actual DuckDuckGo weather widget DOM structure:
// article > section > [div(header), div(hourly+details), ul(daily)]
// CSS class names are randomized, so selectors use structural/attribute patterns.
// Hourly forecast items (section > div:nth-child(2) > ul > li)
hourlyItem1 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "3 PM"},
&extractortest.MockNode{TextValue: "74°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "5%"},
},
},
}
hourlyItem2 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "4 PM"},
&extractortest.MockNode{TextValue: "73°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}},
},
},
}
hourlyItem3 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "5 PM"},
&extractortest.MockNode{TextValue: "70°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "60%"},
},
},
}
// Hourly container (section > div:nth-child(2))
hourlyContainer := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem1, hourlyItem2, hourlyItem3},
"div > p": {
&extractortest.MockNode{
TextValue: "Humidity: 55%",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "55%"}},
},
},
&extractortest.MockNode{
TextValue: "Wind: SW 10 mph",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "SW 10 mph"}},
},
},
},
},
}
// Daily forecast items (section > ul > div)
dayMon := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Mon"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "80°"},
&extractortest.MockNode{TextValue: "66°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "10%"}},
},
}
dayTue := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Tue"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "75°"},
&extractortest.MockNode{TextValue: "62°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "80%"}},
},
}
// Header (section > div:first-child)
header := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "Partly Cloudy"},
&extractortest.MockNode{TextValue: "New York, NY"},
},
},
}
// Section
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:not(:has(ul))": {header},
"div:has(> ul)": {hourlyContainer},
"ul > div": {dayMon, dayTue},
},
}
// Widget article
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
return &extractortest.MockDocument{
URLValue: "https://duckduckgo.com/?q=weather+new+york",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
}
func TestExtractWeather(t *testing.T) {
doc := makeWeatherDoc()
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
if data.Location != "New York, NY" {
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
}
// CurrentTemp is derived from first hourly entry (no standalone current temp in new widget)
if data.CurrentTemp != 74 {
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
}
if data.Condition != "Partly Cloudy" {
t.Errorf("Condition = %q, want %q", data.Condition, "Partly Cloudy")
}
// HighTemp/LowTemp are derived from first daily forecast entry
if data.HighTemp != 80 {
t.Errorf("HighTemp = %v, want 80", data.HighTemp)
}
if data.LowTemp != 66 {
t.Errorf("LowTemp = %v, want 66", data.LowTemp)
}
if data.Humidity != "55%" {
t.Errorf("Humidity = %q, want %q", data.Humidity, "55%")
}
if data.Wind != "SW 10 mph" {
t.Errorf("Wind = %q, want %q", data.Wind, "SW 10 mph")
}
// Daily forecast
if len(data.Forecast) != 2 {
t.Fatalf("Forecast len = %d, want 2", len(data.Forecast))
}
if data.Forecast[0].Day != "Mon" {
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Mon")
}
if data.Forecast[0].HighTemp != 80 {
t.Errorf("Forecast[0].HighTemp = %v, want 80", data.Forecast[0].HighTemp)
}
if data.Forecast[0].Precipitation != 10 {
t.Errorf("Forecast[0].Precipitation = %d, want 10", data.Forecast[0].Precipitation)
}
if data.Forecast[0].IconHint != "PartlyCloudy" {
t.Errorf("Forecast[0].IconHint = %q, want %q", data.Forecast[0].IconHint, "PartlyCloudy")
}
// Condition is now derived from icon hint
if data.Forecast[0].Condition != "PartlyCloudy" {
t.Errorf("Forecast[0].Condition = %q, want %q", data.Forecast[0].Condition, "PartlyCloudy")
}
if data.Forecast[1].Condition != "Rain" {
t.Errorf("Forecast[1].Condition = %q, want %q", data.Forecast[1].Condition, "Rain")
}
if data.Forecast[1].Precipitation != 80 {
t.Errorf("Forecast[1].Precipitation = %d, want 80", data.Forecast[1].Precipitation)
}
if data.Forecast[1].IconHint != "Rain" {
t.Errorf("Forecast[1].IconHint = %q, want %q", data.Forecast[1].IconHint, "Rain")
}
// Hourly forecast
if len(data.Hourly) != 3 {
t.Fatalf("Hourly len = %d, want 3", len(data.Hourly))
}
if data.Hourly[0].Time != "3 PM" {
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "3 PM")
}
if data.Hourly[0].Temp != 74 {
t.Errorf("Hourly[0].Temp = %v, want 74", data.Hourly[0].Temp)
}
// Condition is now derived from icon hint (no separate condition element)
if data.Hourly[0].Condition != "MostlyCloudy" {
t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "MostlyCloudy")
}
if data.Hourly[0].Precipitation != 5 {
t.Errorf("Hourly[0].Precipitation = %d, want 5", data.Hourly[0].Precipitation)
}
if data.Hourly[0].IconHint != "MostlyCloudy" {
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "MostlyCloudy")
}
// Second hourly item has no precipitation
if data.Hourly[1].Time != "4 PM" {
t.Errorf("Hourly[1].Time = %q, want %q", data.Hourly[1].Time, "4 PM")
}
if data.Hourly[1].Precipitation != -1 {
t.Errorf("Hourly[1].Precipitation = %d, want -1 (unavailable)", data.Hourly[1].Precipitation)
}
if data.Hourly[1].IconHint != "Cloudy" {
t.Errorf("Hourly[1].IconHint = %q, want %q", data.Hourly[1].IconHint, "Cloudy")
}
// Third hourly item uses aria-label for icon hint
if data.Hourly[2].Precipitation != 60 {
t.Errorf("Hourly[2].Precipitation = %d, want 60", data.Hourly[2].Precipitation)
}
if data.Hourly[2].IconHint != "HeavyRain" {
t.Errorf("Hourly[2].IconHint = %q, want %q", data.Hourly[2].IconHint, "HeavyRain")
}
}
func TestGetWeather_MockBrowser(t *testing.T) {
doc := makeWeatherDoc()
browser := &extractortest.MockBrowser{
Documents: map[string]*extractortest.MockDocument{
"https://duckduckgo.com/?kp=-2&q=weather+new+york": doc,
},
}
data, err := DefaultConfig.GetWeather(context.Background(), browser, "new york")
if err != nil {
t.Fatalf("GetWeather() error: %v", err)
}
if data.Location != "New York, NY" {
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
}
if data.CurrentTemp != 74 {
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
}
if len(data.Hourly) != 3 {
t.Errorf("Hourly len = %d, want 3", len(data.Hourly))
}
}
func TestExtractWeather_Empty(t *testing.T) {
doc := &extractortest.MockDocument{
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{},
},
}
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
if data.Location != "" || data.CurrentTemp != 0 {
t.Error("expected zero values for empty doc")
}
if len(data.Hourly) != 0 {
t.Errorf("expected no hourly data for empty doc, got %d", len(data.Hourly))
}
}
func TestExtractWeather_NoPrecipitation(t *testing.T) {
// Daily item without precipitation or icon
dayWed := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Wed"}},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "85°"},
&extractortest.MockNode{TextValue: "70°"},
},
},
},
},
},
}
// Hourly item without precipitation or icon
hourlyItem := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "12 PM"},
&extractortest.MockNode{TextValue: "82°"},
},
},
}
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:not(:has(ul))": {&extractortest.MockNode{}},
"div:has(> ul)": {&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem},
},
}},
"ul > div": {dayWed},
},
}
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
doc := &extractortest.MockDocument{
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
if len(data.Forecast) != 1 {
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
}
if data.Forecast[0].Precipitation != -1 {
t.Errorf("Forecast[0].Precipitation = %d, want -1 (unavailable)", data.Forecast[0].Precipitation)
}
if data.Forecast[0].IconHint != "" {
t.Errorf("Forecast[0].IconHint = %q, want empty", data.Forecast[0].IconHint)
}
if len(data.Hourly) != 1 {
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
}
if data.Hourly[0].Precipitation != -1 {
t.Errorf("Hourly[0].Precipitation = %d, want -1 (unavailable)", data.Hourly[0].Precipitation)
}
if data.Hourly[0].IconHint != "" {
t.Errorf("Hourly[0].IconHint = %q, want empty", data.Hourly[0].IconHint)
}
}
func TestExtractWeather_WithAdvisory(t *testing.T) {
// When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo
// inserts an extra div in the section between header and hourly container.
// The structural selectors must still find the correct elements.
hourlyItem := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "2 PM"},
&extractortest.MockNode{TextValue: "31°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "40%"},
},
},
}
hourlyContainer := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem},
"div > p": {
&extractortest.MockNode{
TextValue: "Humidity: 80%",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "80%"}},
},
},
&extractortest.MockNode{
TextValue: "Wind: W 35 mph",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "W 35 mph"}},
},
},
},
},
}
dayThu := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Thu"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "34°"},
&extractortest.MockNode{TextValue: "28°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "70%"}},
},
}
header := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "Snow"},
&extractortest.MockNode{TextValue: "Erie, PA"},
},
},
}
// Advisory div — this is the extra element that was breaking extraction.
// It has no ul child, so div:has(> ul) skips it.
// It has no p child, so div:not(:has(ul)) also skips it for the header.
advisory := &extractortest.MockNode{
TextValue: "Wind Advisory in effect until 7 PM EST",
}
_ = advisory // used in the section Children map below
// Section: the advisory div sits between header and hourly container.
// The mock maps the structural selectors used by extractWeather:
// div:not(:has(ul)) → header (first div without a list)
// div:has(> ul) → hourlyContainer (div with a direct ul child)
// ul > div → daily forecast items
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:not(:has(ul))": {header},
"div:has(> ul)": {hourlyContainer},
"ul > div": {dayThu},
},
}
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
doc := &extractortest.MockDocument{
URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
// Header should be extracted correctly despite advisory
if data.Condition != "Snow" {
t.Errorf("Condition = %q, want %q", data.Condition, "Snow")
}
if data.Location != "Erie, PA" {
t.Errorf("Location = %q, want %q", data.Location, "Erie, PA")
}
// Hourly data should be found despite advisory shifting positions
if len(data.Hourly) != 1 {
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
}
if data.Hourly[0].Time != "2 PM" {
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM")
}
if data.Hourly[0].Temp != 31 {
t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp)
}
if data.Hourly[0].Precipitation != 40 {
t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation)
}
if data.Hourly[0].IconHint != "Snow" {
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow")
}
// Current temp derived from hourly
if data.CurrentTemp != 31 {
t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp)
}
// Humidity and wind
if data.Humidity != "80%" {
t.Errorf("Humidity = %q, want %q", data.Humidity, "80%")
}
if data.Wind != "W 35 mph" {
t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph")
}
// Daily forecast
if len(data.Forecast) != 1 {
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
}
if data.Forecast[0].Day != "Thu" {
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu")
}
if data.Forecast[0].HighTemp != 34 {
t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp)
}
if data.Forecast[0].LowTemp != 28 {
t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp)
}
}
func TestExtractIconHint_Priority(t *testing.T) {
// aria-label takes priority over title and alt
nodes := extractor.Nodes{
&extractortest.MockNode{
Attrs: map[string]string{
"aria-label": "Snow",
"title": "SnowTitle",
"alt": "SnowAlt",
},
},
}
if got := extractIconHint(nodes); got != "Snow" {
t.Errorf("extractIconHint() = %q, want %q (aria-label priority)", got, "Snow")
}
// title used when aria-label absent
nodes = extractor.Nodes{
&extractortest.MockNode{
Attrs: map[string]string{
"title": "Drizzle",
"alt": "DrizzleAlt",
},
},
}
if got := extractIconHint(nodes); got != "Drizzle" {
t.Errorf("extractIconHint() = %q, want %q (title fallback)", got, "Drizzle")
}
// alt used as last fallback
nodes = extractor.Nodes{
&extractortest.MockNode{
Attrs: map[string]string{"alt": "MostlyClear"},
},
}
if got := extractIconHint(nodes); got != "MostlyClear" {
t.Errorf("extractIconHint() = %q, want %q (alt fallback)", got, "MostlyClear")
}
// empty when no nodes
if got := extractIconHint(nil); got != "" {
t.Errorf("extractIconHint(nil) = %q, want empty", got)
}
}