Merge pull request 'fix: use structural selectors for DDG weather advisory handling' (#65) from fix/weather-advisory-selectors into main
All checks were successful
CI / build (push) Successful in 29s
CI / vet (push) Successful in 1m1s
CI / test (push) Successful in 1m2s

Reviewed-on: #65
This commit was merged in pull request #65.
This commit is contained in:
2026-02-20 18:23:54 +00:00
2 changed files with 175 additions and 8 deletions

View File

@@ -86,8 +86,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
} }
// Header: condition and location // Header: condition and location
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)] // Structure: section > div > [div(toggle), p(condition), p(location)]
header := section.SelectFirst("div:first-child") // Use :not(:has(ul)) to skip the hourly container div and avoid breaking
// when advisory banners (e.g. wind advisory) insert extra divs.
header := section.SelectFirst("div:not(:has(ul))")
if header != nil { if header != nil {
ps := header.Select("p") ps := header.Select("p")
if len(ps) >= 2 { if len(ps) >= 2 {
@@ -99,8 +101,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
} }
// Hourly forecast and details // Hourly forecast and details
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)] // Structure: section > div > [ul(hourly items), div(humidity/wind)]
hourlyContainer := section.SelectFirst("div:nth-child(2)") // Use :has(> ul) to find the div containing the hourly list, regardless of
// position. This avoids breaking when advisory banners insert extra divs.
hourlyContainer := section.SelectFirst("div:has(> ul)")
if hourlyContainer != nil { if hourlyContainer != nil {
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
var hour HourlyForecast var hour HourlyForecast

View File

@@ -128,8 +128,8 @@ func makeWeatherDoc() *extractortest.MockDocument {
// Section // Section
section := &extractortest.MockNode{ section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"div:first-child": {header}, "div:not(:has(ul))": {header},
"div:nth-child(2)": {hourlyContainer}, "div:has(> ul)": {hourlyContainer},
"ul > div": {dayMon, dayTue}, "ul > div": {dayMon, dayTue},
}, },
} }
@@ -329,8 +329,8 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
section := &extractortest.MockNode{ section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"div:first-child": {&extractortest.MockNode{}}, "div:not(:has(ul))": {&extractortest.MockNode{}},
"div:nth-child(2)": {&extractortest.MockNode{ "div:has(> ul)": {&extractortest.MockNode{
Children: map[string]extractor.Nodes{ Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem}, "ul > li": {hourlyItem},
}, },
@@ -379,6 +379,169 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
} }
} }
func TestExtractWeather_WithAdvisory(t *testing.T) {
// When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo
// inserts an extra div in the section between header and hourly container.
// The structural selectors must still find the correct elements.
hourlyItem := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "2 PM"},
&extractortest.MockNode{TextValue: "31°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "40%"},
},
},
}
hourlyContainer := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem},
"div > p": {
&extractortest.MockNode{
TextValue: "Humidity: 80%",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "80%"}},
},
},
&extractortest.MockNode{
TextValue: "Wind: W 35 mph",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "W 35 mph"}},
},
},
},
},
}
dayThu := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Thu"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "34°"},
&extractortest.MockNode{TextValue: "28°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "70%"}},
},
}
header := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "Snow"},
&extractortest.MockNode{TextValue: "Erie, PA"},
},
},
}
// Advisory div — this is the extra element that was breaking extraction.
// It has no ul child, so div:has(> ul) skips it.
// It has no p child, so div:not(:has(ul)) also skips it for the header.
advisory := &extractortest.MockNode{
TextValue: "Wind Advisory in effect until 7 PM EST",
}
_ = advisory // used in the section Children map below
// Section: the advisory div sits between header and hourly container.
// The mock maps the structural selectors used by extractWeather:
// div:not(:has(ul)) → header (first div without a list)
// div:has(> ul) → hourlyContainer (div with a direct ul child)
// ul > div → daily forecast items
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:not(:has(ul))": {header},
"div:has(> ul)": {hourlyContainer},
"ul > div": {dayThu},
},
}
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
doc := &extractortest.MockDocument{
URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
data, err := extractWeather(doc)
if err != nil {
t.Fatalf("extractWeather() error: %v", err)
}
// Header should be extracted correctly despite advisory
if data.Condition != "Snow" {
t.Errorf("Condition = %q, want %q", data.Condition, "Snow")
}
if data.Location != "Erie, PA" {
t.Errorf("Location = %q, want %q", data.Location, "Erie, PA")
}
// Hourly data should be found despite advisory shifting positions
if len(data.Hourly) != 1 {
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
}
if data.Hourly[0].Time != "2 PM" {
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM")
}
if data.Hourly[0].Temp != 31 {
t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp)
}
if data.Hourly[0].Precipitation != 40 {
t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation)
}
if data.Hourly[0].IconHint != "Snow" {
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow")
}
// Current temp derived from hourly
if data.CurrentTemp != 31 {
t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp)
}
// Humidity and wind
if data.Humidity != "80%" {
t.Errorf("Humidity = %q, want %q", data.Humidity, "80%")
}
if data.Wind != "W 35 mph" {
t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph")
}
// Daily forecast
if len(data.Forecast) != 1 {
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
}
if data.Forecast[0].Day != "Thu" {
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu")
}
if data.Forecast[0].HighTemp != 34 {
t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp)
}
if data.Forecast[0].LowTemp != 28 {
t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp)
}
}
func TestExtractIconHint_Priority(t *testing.T) { func TestExtractIconHint_Priority(t *testing.T) {
// aria-label takes priority over title and alt // aria-label takes priority over title and alt
nodes := extractor.Nodes{ nodes := extractor.Nodes{