From 8c2848246bd4fd6cd6a076482be9900a68d390e3 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 20 Feb 2026 18:22:53 +0000 Subject: [PATCH] fix: use structural selectors for DDG weather to handle advisory banners The weather extractor used positional CSS selectors (div:first-child, div:nth-child(2)) to locate the header and hourly container within the widget section. When DuckDuckGo inserts advisory banners (e.g. wind advisory), the extra div shifts positions and breaks extraction of current temp, hourly data, humidity, and wind. Replace with structural selectors: - div:not(:has(ul)) for the header (first div without a list) - div:has(> ul) for the hourly container (div with direct ul child) These match elements by their content structure rather than position, so advisory banners no longer break extraction. Fixes #64 Co-Authored-By: Claude Opus 4.6 --- sites/duckduckgo/weather.go | 12 ++- sites/duckduckgo/weather_test.go | 171 ++++++++++++++++++++++++++++++- 2 files changed, 175 insertions(+), 8 deletions(-) diff --git a/sites/duckduckgo/weather.go b/sites/duckduckgo/weather.go index a410024..54c7150 100644 --- a/sites/duckduckgo/weather.go +++ b/sites/duckduckgo/weather.go @@ -86,8 +86,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) { } // Header: condition and location - // Structure: section > div:first-child > [div(toggle), p(condition), p(location)] - header := section.SelectFirst("div:first-child") + // Structure: section > div > [div(toggle), p(condition), p(location)] + // Use :not(:has(ul)) to skip the hourly container div and avoid breaking + // when advisory banners (e.g. wind advisory) insert extra divs. + header := section.SelectFirst("div:not(:has(ul))") if header != nil { ps := header.Select("p") if len(ps) >= 2 { @@ -99,8 +101,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) { } // Hourly forecast and details - // Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)] - hourlyContainer := section.SelectFirst("div:nth-child(2)") + // Structure: section > div > [ul(hourly items), div(humidity/wind)] + // Use :has(> ul) to find the div containing the hourly list, regardless of + // position. This avoids breaking when advisory banners insert extra divs. + hourlyContainer := section.SelectFirst("div:has(> ul)") if hourlyContainer != nil { _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { var hour HourlyForecast diff --git a/sites/duckduckgo/weather_test.go b/sites/duckduckgo/weather_test.go index 8647930..1c3026a 100644 --- a/sites/duckduckgo/weather_test.go +++ b/sites/duckduckgo/weather_test.go @@ -128,8 +128,8 @@ func makeWeatherDoc() *extractortest.MockDocument { // Section section := &extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div:first-child": {header}, - "div:nth-child(2)": {hourlyContainer}, + "div:not(:has(ul))": {header}, + "div:has(> ul)": {hourlyContainer}, "ul > div": {dayMon, dayTue}, }, } @@ -329,8 +329,8 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) { section := &extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div:first-child": {&extractortest.MockNode{}}, - "div:nth-child(2)": {&extractortest.MockNode{ + "div:not(:has(ul))": {&extractortest.MockNode{}}, + "div:has(> ul)": {&extractortest.MockNode{ Children: map[string]extractor.Nodes{ "ul > li": {hourlyItem}, }, @@ -379,6 +379,169 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) { } } +func TestExtractWeather_WithAdvisory(t *testing.T) { + // When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo + // inserts an extra div in the section between header and hourly container. + // The structural selectors must still find the correct elements. + + hourlyItem := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "2 PM"}, + &extractortest.MockNode{TextValue: "31°"}, + }, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}}, + }, + "span > span": { + &extractortest.MockNode{TextValue: "40%"}, + }, + }, + } + + hourlyContainer := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "ul > li": {hourlyItem}, + "div > p": { + &extractortest.MockNode{ + TextValue: "Humidity: 80%", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "80%"}}, + }, + }, + &extractortest.MockNode{ + TextValue: "Wind: W 35 mph", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "W 35 mph"}}, + }, + }, + }, + }, + } + + dayThu := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p:first-child": {&extractortest.MockNode{TextValue: "Thu"}}, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}}, + }, + "p:last-of-type": { + &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "span": { + &extractortest.MockNode{TextValue: "34°"}, + &extractortest.MockNode{TextValue: "28°"}, + }, + }, + }, + }, + "span > span": {&extractortest.MockNode{TextValue: "70%"}}, + }, + } + + header := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "Snow"}, + &extractortest.MockNode{TextValue: "Erie, PA"}, + }, + }, + } + + // Advisory div — this is the extra element that was breaking extraction. + // It has no ul child, so div:has(> ul) skips it. + // It has no p child, so div:not(:has(ul)) also skips it for the header. + advisory := &extractortest.MockNode{ + TextValue: "Wind Advisory in effect until 7 PM EST", + } + _ = advisory // used in the section Children map below + + // Section: the advisory div sits between header and hourly container. + // The mock maps the structural selectors used by extractWeather: + // div:not(:has(ul)) → header (first div without a list) + // div:has(> ul) → hourlyContainer (div with a direct ul child) + // ul > div → daily forecast items + section := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "div:not(:has(ul))": {header}, + "div:has(> ul)": {hourlyContainer}, + "ul > div": {dayThu}, + }, + } + + widget := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "section": {section}, + }, + } + + doc := &extractortest.MockDocument{ + URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS", + MockNode: extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "article:has(img[src*='weatherkit'])": {widget}, + }, + }, + } + + data, err := extractWeather(doc) + if err != nil { + t.Fatalf("extractWeather() error: %v", err) + } + + // Header should be extracted correctly despite advisory + if data.Condition != "Snow" { + t.Errorf("Condition = %q, want %q", data.Condition, "Snow") + } + if data.Location != "Erie, PA" { + t.Errorf("Location = %q, want %q", data.Location, "Erie, PA") + } + + // Hourly data should be found despite advisory shifting positions + if len(data.Hourly) != 1 { + t.Fatalf("Hourly len = %d, want 1", len(data.Hourly)) + } + if data.Hourly[0].Time != "2 PM" { + t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM") + } + if data.Hourly[0].Temp != 31 { + t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp) + } + if data.Hourly[0].Precipitation != 40 { + t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation) + } + if data.Hourly[0].IconHint != "Snow" { + t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow") + } + + // Current temp derived from hourly + if data.CurrentTemp != 31 { + t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp) + } + + // Humidity and wind + if data.Humidity != "80%" { + t.Errorf("Humidity = %q, want %q", data.Humidity, "80%") + } + if data.Wind != "W 35 mph" { + t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph") + } + + // Daily forecast + if len(data.Forecast) != 1 { + t.Fatalf("Forecast len = %d, want 1", len(data.Forecast)) + } + if data.Forecast[0].Day != "Thu" { + t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu") + } + if data.Forecast[0].HighTemp != 34 { + t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp) + } + if data.Forecast[0].LowTemp != 28 { + t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp) + } +} + func TestExtractIconHint_Priority(t *testing.T) { // aria-label takes priority over title and alt nodes := extractor.Nodes{