Merge pull request 'fix: use structural selectors for DDG weather advisory handling' (#65) from fix/weather-advisory-selectors into main
Reviewed-on: #65
This commit was merged in pull request #65.
This commit is contained in:
@@ -86,8 +86,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Header: condition and location
|
// Header: condition and location
|
||||||
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)]
|
// Structure: section > div > [div(toggle), p(condition), p(location)]
|
||||||
header := section.SelectFirst("div:first-child")
|
// Use :not(:has(ul)) to skip the hourly container div and avoid breaking
|
||||||
|
// when advisory banners (e.g. wind advisory) insert extra divs.
|
||||||
|
header := section.SelectFirst("div:not(:has(ul))")
|
||||||
if header != nil {
|
if header != nil {
|
||||||
ps := header.Select("p")
|
ps := header.Select("p")
|
||||||
if len(ps) >= 2 {
|
if len(ps) >= 2 {
|
||||||
@@ -99,8 +101,10 @@ func extractWeather(doc extractor.Node) (*WeatherData, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Hourly forecast and details
|
// Hourly forecast and details
|
||||||
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)]
|
// Structure: section > div > [ul(hourly items), div(humidity/wind)]
|
||||||
hourlyContainer := section.SelectFirst("div:nth-child(2)")
|
// Use :has(> ul) to find the div containing the hourly list, regardless of
|
||||||
|
// position. This avoids breaking when advisory banners insert extra divs.
|
||||||
|
hourlyContainer := section.SelectFirst("div:has(> ul)")
|
||||||
if hourlyContainer != nil {
|
if hourlyContainer != nil {
|
||||||
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
|
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
|
||||||
var hour HourlyForecast
|
var hour HourlyForecast
|
||||||
|
|||||||
@@ -128,8 +128,8 @@ func makeWeatherDoc() *extractortest.MockDocument {
|
|||||||
// Section
|
// Section
|
||||||
section := &extractortest.MockNode{
|
section := &extractortest.MockNode{
|
||||||
Children: map[string]extractor.Nodes{
|
Children: map[string]extractor.Nodes{
|
||||||
"div:first-child": {header},
|
"div:not(:has(ul))": {header},
|
||||||
"div:nth-child(2)": {hourlyContainer},
|
"div:has(> ul)": {hourlyContainer},
|
||||||
"ul > div": {dayMon, dayTue},
|
"ul > div": {dayMon, dayTue},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -329,8 +329,8 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
|
|||||||
|
|
||||||
section := &extractortest.MockNode{
|
section := &extractortest.MockNode{
|
||||||
Children: map[string]extractor.Nodes{
|
Children: map[string]extractor.Nodes{
|
||||||
"div:first-child": {&extractortest.MockNode{}},
|
"div:not(:has(ul))": {&extractortest.MockNode{}},
|
||||||
"div:nth-child(2)": {&extractortest.MockNode{
|
"div:has(> ul)": {&extractortest.MockNode{
|
||||||
Children: map[string]extractor.Nodes{
|
Children: map[string]extractor.Nodes{
|
||||||
"ul > li": {hourlyItem},
|
"ul > li": {hourlyItem},
|
||||||
},
|
},
|
||||||
@@ -379,6 +379,169 @@ func TestExtractWeather_NoPrecipitation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractWeather_WithAdvisory(t *testing.T) {
|
||||||
|
// When a weather advisory (e.g. "Wind Advisory") is present, DuckDuckGo
|
||||||
|
// inserts an extra div in the section between header and hourly container.
|
||||||
|
// The structural selectors must still find the correct elements.
|
||||||
|
|
||||||
|
hourlyItem := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"p": {
|
||||||
|
&extractortest.MockNode{TextValue: "2 PM"},
|
||||||
|
&extractortest.MockNode{TextValue: "31°"},
|
||||||
|
},
|
||||||
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
||||||
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
|
||||||
|
},
|
||||||
|
"span > span": {
|
||||||
|
&extractortest.MockNode{TextValue: "40%"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
hourlyContainer := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"ul > li": {hourlyItem},
|
||||||
|
"div > p": {
|
||||||
|
&extractortest.MockNode{
|
||||||
|
TextValue: "Humidity: 80%",
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"strong": {&extractortest.MockNode{TextValue: "80%"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
&extractortest.MockNode{
|
||||||
|
TextValue: "Wind: W 35 mph",
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"strong": {&extractortest.MockNode{TextValue: "W 35 mph"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
dayThu := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"p:first-child": {&extractortest.MockNode{TextValue: "Thu"}},
|
||||||
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
||||||
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "Snow"}},
|
||||||
|
},
|
||||||
|
"p:last-of-type": {
|
||||||
|
&extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"span": {
|
||||||
|
&extractortest.MockNode{TextValue: "34°"},
|
||||||
|
&extractortest.MockNode{TextValue: "28°"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"span > span": {&extractortest.MockNode{TextValue: "70%"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
header := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"p": {
|
||||||
|
&extractortest.MockNode{TextValue: "Snow"},
|
||||||
|
&extractortest.MockNode{TextValue: "Erie, PA"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advisory div — this is the extra element that was breaking extraction.
|
||||||
|
// It has no ul child, so div:has(> ul) skips it.
|
||||||
|
// It has no p child, so div:not(:has(ul)) also skips it for the header.
|
||||||
|
advisory := &extractortest.MockNode{
|
||||||
|
TextValue: "Wind Advisory in effect until 7 PM EST",
|
||||||
|
}
|
||||||
|
_ = advisory // used in the section Children map below
|
||||||
|
|
||||||
|
// Section: the advisory div sits between header and hourly container.
|
||||||
|
// The mock maps the structural selectors used by extractWeather:
|
||||||
|
// div:not(:has(ul)) → header (first div without a list)
|
||||||
|
// div:has(> ul) → hourlyContainer (div with a direct ul child)
|
||||||
|
// ul > div → daily forecast items
|
||||||
|
section := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"div:not(:has(ul))": {header},
|
||||||
|
"div:has(> ul)": {hourlyContainer},
|
||||||
|
"ul > div": {dayThu},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
widget := &extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"section": {section},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
doc := &extractortest.MockDocument{
|
||||||
|
URLValue: "https://duckduckgo.com/?q=weather+Erie%2CPA%2CUS",
|
||||||
|
MockNode: extractortest.MockNode{
|
||||||
|
Children: map[string]extractor.Nodes{
|
||||||
|
"article:has(img[src*='weatherkit'])": {widget},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := extractWeather(doc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("extractWeather() error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Header should be extracted correctly despite advisory
|
||||||
|
if data.Condition != "Snow" {
|
||||||
|
t.Errorf("Condition = %q, want %q", data.Condition, "Snow")
|
||||||
|
}
|
||||||
|
if data.Location != "Erie, PA" {
|
||||||
|
t.Errorf("Location = %q, want %q", data.Location, "Erie, PA")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hourly data should be found despite advisory shifting positions
|
||||||
|
if len(data.Hourly) != 1 {
|
||||||
|
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
|
||||||
|
}
|
||||||
|
if data.Hourly[0].Time != "2 PM" {
|
||||||
|
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "2 PM")
|
||||||
|
}
|
||||||
|
if data.Hourly[0].Temp != 31 {
|
||||||
|
t.Errorf("Hourly[0].Temp = %v, want 31", data.Hourly[0].Temp)
|
||||||
|
}
|
||||||
|
if data.Hourly[0].Precipitation != 40 {
|
||||||
|
t.Errorf("Hourly[0].Precipitation = %d, want 40", data.Hourly[0].Precipitation)
|
||||||
|
}
|
||||||
|
if data.Hourly[0].IconHint != "Snow" {
|
||||||
|
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "Snow")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current temp derived from hourly
|
||||||
|
if data.CurrentTemp != 31 {
|
||||||
|
t.Errorf("CurrentTemp = %v, want 31", data.CurrentTemp)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Humidity and wind
|
||||||
|
if data.Humidity != "80%" {
|
||||||
|
t.Errorf("Humidity = %q, want %q", data.Humidity, "80%")
|
||||||
|
}
|
||||||
|
if data.Wind != "W 35 mph" {
|
||||||
|
t.Errorf("Wind = %q, want %q", data.Wind, "W 35 mph")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Daily forecast
|
||||||
|
if len(data.Forecast) != 1 {
|
||||||
|
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
|
||||||
|
}
|
||||||
|
if data.Forecast[0].Day != "Thu" {
|
||||||
|
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Thu")
|
||||||
|
}
|
||||||
|
if data.Forecast[0].HighTemp != 34 {
|
||||||
|
t.Errorf("Forecast[0].HighTemp = %v, want 34", data.Forecast[0].HighTemp)
|
||||||
|
}
|
||||||
|
if data.Forecast[0].LowTemp != 28 {
|
||||||
|
t.Errorf("Forecast[0].LowTemp = %v, want 28", data.Forecast[0].LowTemp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestExtractIconHint_Priority(t *testing.T) {
|
func TestExtractIconHint_Priority(t *testing.T) {
|
||||||
// aria-label takes priority over title and alt
|
// aria-label takes priority over title and alt
|
||||||
nodes := extractor.Nodes{
|
nodes := extractor.Nodes{
|
||||||
|
|||||||
Reference in New Issue
Block a user