diff --git a/sites/duckduckgo/weather.go b/sites/duckduckgo/weather.go index a410024..54c7150 100644 --- a/sites/duckduckgo/weather.go +++ b/sites/duckduckgo/weather.go @@ -86,8 +86,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) { } // Header: condition and location - // Structure: section > div:first-child > [div(toggle), p(condition), p(location)] - header := section.SelectFirst("div:first-child") + // Structure: section > div > [div(toggle), p(condition), p(location)] + // Use :not(:has(ul)) to skip the hourly container div and avoid breaking + // when advisory banners (e.g. wind advisory) insert extra divs. + header := section.SelectFirst("div:not(:has(ul))") if header != nil { ps := header.Select("p") if len(ps) >= 2 { @@ -99,8 +101,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) { } // Hourly forecast and details - // Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)] - hourlyContainer := section.SelectFirst("div:nth-child(2)") + // Structure: section > div > [ul(hourly items), div(humidity/wind)] + // Use :has(> ul) to find the div containing the hourly list, regardless of + // position. This avoids breaking when advisory banners insert extra divs. + hourlyContainer := section.SelectFirst("div:has(> ul)") if hourlyContainer != nil { _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { var hour HourlyForecast diff --git a/sites/duckduckgo/weather_test.go b/sites/duckduckgo/weather_test.go index 8647930..1c3026a 100644 --- a/sites/duckduckgo/weather_test.go +++ b/sites/duckduckgo/weather_test.go @@ -128,8 +128,8 @@ func makeWeatherDoc() *extractortest.MockDocument { // Section section := &extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div:first-child": {header}, - "div:nth-child(2)": {hourlyContainer}, + "div:not(:has(ul))": {header}, + "div:has(> ul)": {hourlyContainer}, "ul > div": {dayMon, dayTue}, }, } @@ -329,8 +329,8 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) { section := &extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div:first-child": {&extractortest.MockNode{}}, - "div:nth-child(2)": {&extractortest.MockNode{ + "div:not(:has(ul))": {&extractortest.MockNode{}}, + "div:has(> ul)": {&extractortest.MockNode{ Children: map[string]extractor.Nodes{ "ul > li": {hourlyItem}, }, @@ -379,6 +379,169 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) { } } +func TestExtractWeather_WithAdvisory(t *testing.T) { + // When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo + // inserts an extra div in the section between header and hourly container. + // The structural selectors must still find the correct elements. + + hourlyItem := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "2 PM"}, + &extractortest.MockNode{TextValue: "31°"}, + }, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}}, + }, + "span > span": { + &extractortest.MockNode{TextValue: "40%"}, + }, + }, + } + + hourlyContainer := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "ul > li": {hourlyItem}, + "div > p": { + &extractortest.MockNode{ + TextValue: "Humidity: 80%", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "80%"}}, + }, + }, + &extractortest.MockNode{ + TextValue: "Wind: W 35 mph", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "W 35 mph"}}, + }, + }, + }, + }, + } + + dayThu := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p:first-child": {&extractortest.MockNode{TextValue: "Thu"}}, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}}, + }, + "p:last-of-type": { + &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "span": { + &extractortest.MockNode{TextValue: "34°"}, + &extractortest.MockNode{TextValue: "28°"}, + }, + }, + }, + }, + "span > span": {&extractortest.MockNode{TextValue: "70%"}}, + }, + } + + header := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "Snow"}, + &extractortest.MockNode{TextValue: "Erie, PA"}, + }, + }, + } + + // Advisory div — this is the extra element that was breaking extraction. + // It has no ul child, so div:has(> ul) skips it. + // It has no p child, so div:not(:has(ul)) also skips it for the header. + advisory := &extractortest.MockNode{ + TextValue: "Wind Advisory in effect until 7 PM EST", + } + _ = advisory // used in the section Children map below + + // Section: the advisory div sits between header and hourly container. + // The mock maps the structural selectors used by extractWeather: + // div:not(:has(ul)) → header (first div without a list) + // div:has(> ul) → hourlyContainer (div with a direct ul child) + // ul > div → daily forecast items + section := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "div:not(:has(ul))": {header}, + "div:has(> ul)": {hourlyContainer}, + "ul > div": {dayThu}, + }, + } + + widget := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "section": {section}, + }, + } + + doc := &extractortest.MockDocument{ + URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS", + MockNode: extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "article:has(img[src*='weatherkit'])": {widget}, + }, + }, + } + + data, err := extractWeather(doc) + if err != nil { + t.Fatalf("extractWeather() error: %v", err) + } + + // Header should be extracted correctly despite advisory + if data.Condition != "Snow" { + t.Errorf("Condition = %q, want %q", data.Condition, "Snow") + } + if data.Location != "Erie, PA" { + t.Errorf("Location = %q, want %q", data.Location, "Erie, PA") + } + + // Hourly data should be found despite advisory shifting positions + if len(data.Hourly) != 1 { + t.Fatalf("Hourly len = %d, want 1", len(data.Hourly)) + } + if data.Hourly[0].Time != "2 PM" { + t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM") + } + if data.Hourly[0].Temp != 31 { + t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp) + } + if data.Hourly[0].Precipitation != 40 { + t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation) + } + if data.Hourly[0].IconHint != "Snow" { + t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow") + } + + // Current temp derived from hourly + if data.CurrentTemp != 31 { + t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp) + } + + // Humidity and wind + if data.Humidity != "80%" { + t.Errorf("Humidity = %q, want %q", data.Humidity, "80%") + } + if data.Wind != "W 35 mph" { + t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph") + } + + // Daily forecast + if len(data.Forecast) != 1 { + t.Fatalf("Forecast len = %d, want 1", len(data.Forecast)) + } + if data.Forecast[0].Day != "Thu" { + t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu") + } + if data.Forecast[0].HighTemp != 34 { + t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp) + } + if data.Forecast[0].LowTemp != 28 { + t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp) + } +} + func TestExtractIconHint_Priority(t *testing.T) { // aria-label takes priority over title and alt nodes := extractor.Nodes{