fix: use structural selectors for DDG weather to handle advisory banners
All checks were successful
CI / build (pull_request) Successful in 1m11s
CI / vet (pull_request) Successful in 1m12s
CI / test (pull_request) Successful in 1m17s

The weather extractor used positional CSS selectors (div:first-child,
div:nth-child(2)) to locate the header and hourly container within the
widget section. When DuckDuckGo inserts advisory banners (e.g. wind
advisory), the extra div shifts positions and breaks extraction of
current temp, hourly data, humidity, and wind.

Replace with structural selectors:
- div:not(:has(ul)) for the header (first div without a list)
- div:has(> ul) for the hourly container (div with direct ul child)

These match elements by their content structure rather than position,
so advisory banners no longer break extraction.

Fixes #64

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-20 18:22:53 +00:00
parent 65cf6b027f
commit 8c2848246b
2 changed files with 175 additions and 8 deletions

View File

@@ -86,8 +86,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
} }
// Header: condition and location // Header: condition and location
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)] // Structure: section > div > [div(toggle), p(condition), p(location)]
header := section.SelectFirst("div:first-child") // Use :not(:has(ul)) to skip the hourly container div and avoid breaking
// when advisory banners (e.g. wind advisory) insert extra divs.
header := section.SelectFirst("div:not(:has(ul))")
if header != nil { if header != nil {
ps := header.Select("p") ps := header.Select("p")
if len(ps) >= 2 { if len(ps) >= 2 {
@@ -99,8 +101,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
} }
// Hourly forecast and details // Hourly forecast and details
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)] // Structure: section > div > [ul(hourly items), div(humidity/wind)]
hourlyContainer := section.SelectFirst("div:nth-child(2)") // Use :has(> ul) to find the div containing the hourly list, regardless of
// position. This avoids breaking when advisory banners insert extra divs.
hourlyContainer := section.SelectFirst("div:has(> ul)")
if hourlyContainer != nil { if hourlyContainer != nil {
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
var hour HourlyForecast var hour HourlyForecast

View File

@@ -128,8 +128,8 @@ func makeWeatherDoc() *extractortest.MockDocument {
// Section // Section
section := &extractortest.MockNode{ section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"div:first-child": {header}, "div:not(:has(ul))": {header},
"div:nth-child(2)": {hourlyContainer}, "div:has(> ul)": {hourlyContainer},
"ul > div": {dayMon, dayTue}, "ul > div": {dayMon, dayTue},
}, },
} }
@@ -329,8 +329,8 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
section := &extractortest.MockNode{ section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"div:first-child": {&extractortest.MockNode{}}, "div:not(:has(ul))": {&extractortest.MockNode{}},
"div:nth-child(2)": {&extractortest.MockNode{ "div:has(> ul)": {&extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem}, "ul > li": {hourlyItem},
}, },
@@ -379,6 +379,169 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
} }
} }
func TestExtractWeather_WithAdvisory(t *testing.T) {
// When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo
// inserts an extra div in the section between header and hourly container.
// The structural selectors must still find the correct elements.
hourlyItem := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "2 PM"},
&extractortest.MockNode{TextValue: "31°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "40%"},
},
},
}
hourlyContainer := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem},
"div > p": {
&extractortest.MockNode{
TextValue: "Humidity: 80%",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "80%"}},
},
},
&extractortest.MockNode{
TextValue: "Wind: W 35 mph",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "W 35 mph"}},
},
},
},
},
}
dayThu := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Thu"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "34°"},
&extractortest.MockNode{TextValue: "28°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "70%"}},
},
}
header := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "Snow"},
&extractortest.MockNode{TextValue: "Erie, PA"},
},
},
}
// Advisory div — this is the extra element that was breaking extraction.
// It has no ul child, so div:has(> ul) skips it.
// It has no p child, so div:not(:has(ul)) also skips it for the header.
advisory := &extractortest.MockNode{
TextValue: "Wind Advisory in effect until 7 PM EST",
}
_ = advisory // used in the section Children map below
// Section: the advisory div sits between header and hourly container.
// The mock maps the structural selectors used by extractWeather:
// div:not(:has(ul)) → header (first div without a list)
// div:has(> ul) → hourlyContainer (div with a direct ul child)
// ul > div → daily forecast items
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:not(:has(ul))": {header},
"div:has(> ul)": {hourlyContainer},
"ul > div": {dayThu},
},
}
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
doc := &extractortest.MockDocument{
URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
// Header should be extracted correctly despite advisory
if data.Condition != "Snow" {
t.Errorf("Condition = %q, want %q", data.Condition, "Snow")
}
if data.Location != "Erie, PA" {
t.Errorf("Location = %q, want %q", data.Location, "Erie, PA")
}
// Hourly data should be found despite advisory shifting positions
if len(data.Hourly) != 1 {
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
}
if data.Hourly[0].Time != "2 PM" {
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM")
}
if data.Hourly[0].Temp != 31 {
t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp)
}
if data.Hourly[0].Precipitation != 40 {
t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation)
}
if data.Hourly[0].IconHint != "Snow" {
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow")
}
// Current temp derived from hourly
if data.CurrentTemp != 31 {
t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp)
}
// Humidity and wind
if data.Humidity != "80%" {
t.Errorf("Humidity = %q, want %q", data.Humidity, "80%")
}
if data.Wind != "W 35 mph" {
t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph")
}
// Daily forecast
if len(data.Forecast) != 1 {
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
}
if data.Forecast[0].Day != "Thu" {
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu")
}
if data.Forecast[0].HighTemp != 34 {
t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp)
}
if data.Forecast[0].LowTemp != 28 {
t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp)
}
}
func TestExtractIconHint_Priority(t *testing.T) { func TestExtractIconHint_Priority(t *testing.T) {
// aria-label takes priority over title and alt // aria-label takes priority over title and alt
nodes := extractor.Nodes{ nodes := extractor.Nodes{