fix: update weather selectors to match DDG's actual DOM #54

Merged
steve merged 1 commits from fix/weather-selector-dom-mismatch into main 2026-02-15 23:07:10 +00:00
2 changed files with 292 additions and 188 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
@@ -71,121 +72,127 @@ func GetWeather(ctx context.Context, b extractor.Browser, city string) (*Weather
func extractWeather(doc extractor.Node) (*WeatherData, error) {
var data WeatherData
// Location
locs := doc.Select("div.module--weather span.module__title__link")
if len(locs) > 0 {
data.Location, _ = locs[0].Text()
// DuckDuckGo's weather widget uses randomized CSS class names (CSS modules),
// so we identify elements by structural selectors and image src attributes.
// The widget is an article element containing weatherkit icon images.
widget := doc.SelectFirst("article:has(img[src*='weatherkit'])")
if widget == nil {
return &data, nil
}
// Current temperature
temps := doc.Select("div.module--weather .module__current-temp")
if len(temps) > 0 {
txt, _ := temps[0].Text()
data.CurrentTemp = parse.NumericOnly(txt)
section := widget.SelectFirst("section")
if section == nil {
return &data, nil
}
// Condition
conds := doc.Select("div.module--weather .module__weather-summary")
if len(conds) > 0 {
data.Condition, _ = conds[0].Text()
// Header: condition and location
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)]
header := section.SelectFirst("div:first-child")
if header != nil {
ps := header.Select("p")
if len(ps) >= 2 {
data.Condition, _ = ps[0].Text()
data.Location, _ = ps[1].Text()
} else if len(ps) == 1 {
data.Condition, _ = ps[0].Text()
}
}
// High/low
highs := doc.Select("div.module--weather .module__high-temp")
if len(highs) > 0 {
txt, _ := highs[0].Text()
data.HighTemp = parse.NumericOnly(txt)
}
// Hourly forecast and details
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)]
hourlyContainer := section.SelectFirst("div:nth-child(2)")
if hourlyContainer != nil {
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
var hour HourlyForecast
hour.Precipitation = -1
lows := doc.Select("div.module--weather .module__low-temp")
if len(lows) > 0 {
txt, _ := lows[0].Text()
data.LowTemp = parse.NumericOnly(txt)
}
// Each li contains: p(time), img(icon), [span(precip)], p(temp)
ps := n.Select("p")
if len(ps) >= 2 {
hour.Time, _ = ps[0].Text()
txt, _ := ps[len(ps)-1].Text()
hour.Temp = parse.NumericOnly(txt)
}
// Humidity
humids := doc.Select("div.module--weather .module__humidity")
if len(humids) > 0 {
data.Humidity, _ = humids[0].Text()
}
// Icon hint and condition from the weather icon's alt attribute
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
hour.IconHint = extractIconHint(icons)
hour.Condition = hour.IconHint
// Wind
winds := doc.Select("div.module--weather .module__wind")
if len(winds) > 0 {
data.Wind, _ = winds[0].Text()
// Precipitation percentage is in a span > span structure
if precip := n.SelectFirst("span > span"); precip != nil {
txt, _ := precip.Text()
hour.Precipitation = int(parse.NumericOnly(txt))
}
data.Hourly = append(data.Hourly, hour)
return nil
})
// Use first hourly temperature as current temp (no standalone current temp element)
if len(data.Hourly) > 0 {
data.CurrentTemp = data.Hourly[0].Temp
}
// Humidity and wind from the details div (after the hourly ul)
details := hourlyContainer.Select("div > p")
for _, p := range details {
txt, _ := p.Text()
if strings.Contains(txt, "Humidity") {
if strong := p.SelectFirst("strong"); strong != nil {
data.Humidity, _ = strong.Text()
}
} else if strings.Contains(txt, "Wind") {
if strong := p.SelectFirst("strong"); strong != nil {
data.Wind, _ = strong.Text()
}
}
}
}
// Daily forecast
_ = doc.ForEach("div.module--weather .module__forecast-day", func(n extractor.Node) error {
// Structure: section > ul > div (each day)
// The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous.
_ = section.ForEach("ul > div", func(n extractor.Node) error {
var day DayForecast
day.Precipitation = -1
days := n.Select(".forecast-day__name")
if len(days) > 0 {
day.Day, _ = days[0].Text()
// Day name from first p
if d := n.SelectFirst("p:first-child"); d != nil {
day.Day, _ = d.Text()
}
dayHighs := n.Select(".forecast-day__high")
if len(dayHighs) > 0 {
txt, _ := dayHighs[0].Text()
day.HighTemp = parse.NumericOnly(txt)
// Icon hint and condition from the weather icon's alt attribute
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
day.IconHint = extractIconHint(icons)
day.Condition = day.IconHint
// High/low temps from last p's spans
if temps := n.SelectFirst("p:last-of-type"); temps != nil {
spans := temps.Select("span")
if len(spans) >= 2 {
highTxt, _ := spans[0].Text()
day.HighTemp = parse.NumericOnly(highTxt)
lowTxt, _ := spans[1].Text()
day.LowTemp = parse.NumericOnly(lowTxt)
}
}
dayLows := n.Select(".forecast-day__low")
if len(dayLows) > 0 {
txt, _ := dayLows[0].Text()
day.LowTemp = parse.NumericOnly(txt)
}
dayConds := n.Select(".forecast-day__condition")
if len(dayConds) > 0 {
day.Condition, _ = dayConds[0].Text()
}
precips := n.Select(".forecast-day__precip")
if len(precips) > 0 {
txt, _ := precips[0].Text()
// Precipitation percentage is in a span > span structure
if precip := n.SelectFirst("span > span"); precip != nil {
txt, _ := precip.Text()
day.Precipitation = int(parse.NumericOnly(txt))
}
day.IconHint = extractIconHint(n.Select(".forecast-day__icon"))
data.Forecast = append(data.Forecast, day)
return nil
})
// Hourly forecast
_ = doc.ForEach("div.module--weather .module__hourly-item", func(n extractor.Node) error {
var hour HourlyForecast
hour.Precipitation = -1
times := n.Select(".hourly-item__time")
if len(times) > 0 {
hour.Time, _ = times[0].Text()
}
temps := n.Select(".hourly-item__temp")
if len(temps) > 0 {
txt, _ := temps[0].Text()
hour.Temp = parse.NumericOnly(txt)
}
conds := n.Select(".hourly-item__condition")
if len(conds) > 0 {
hour.Condition, _ = conds[0].Text()
}
precips := n.Select(".hourly-item__precip")
if len(precips) > 0 {
txt, _ := precips[0].Text()
hour.Precipitation = int(parse.NumericOnly(txt))
}
hour.IconHint = extractIconHint(n.Select(".hourly-item__icon"))
data.Hourly = append(data.Hourly, hour)
return nil
})
// Today's high/low from first daily forecast entry
if len(data.Forecast) > 0 {
data.HighTemp = data.Forecast[0].HighTemp
data.LowTemp = data.Forecast[0].LowTemp
}
return &data, nil
}

View File

@@ -9,81 +9,143 @@ import (
)
func makeWeatherDoc() *extractortest.MockDocument {
// Mock mirrors the actual DuckDuckGo weather widget DOM structure:
// article > section > [div(header), div(hourly+details), ul(daily)]
// CSS class names are randomized, so selectors use structural/attribute patterns.
// Hourly forecast items (section > div:nth-child(2) > ul > li)
hourlyItem1 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "3 PM"},
&extractortest.MockNode{TextValue: "74°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "5%"},
},
},
}
hourlyItem2 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "4 PM"},
&extractortest.MockNode{TextValue: "73°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}},
},
},
}
hourlyItem3 := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "5 PM"},
&extractortest.MockNode{TextValue: "70°"},
},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}},
},
"span > span": {
&extractortest.MockNode{TextValue: "60%"},
},
},
}
// Hourly container (section > div:nth-child(2))
hourlyContainer := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem1, hourlyItem2, hourlyItem3},
"div > p": {
&extractortest.MockNode{
TextValue: "Humidity: 55%",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "55%"}},
},
},
&extractortest.MockNode{
TextValue: "Wind: SW 10 mph",
Children: map[string]extractor.Nodes{
"strong": {&extractortest.MockNode{TextValue: "SW 10 mph"}},
},
},
},
},
}
// Daily forecast items (section > ul > div)
dayMon := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Mon"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "80°"},
&extractortest.MockNode{TextValue: "66°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "10%"}},
},
}
dayTue := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Tue"}},
"img[src*='weatherkit']:not([src*='Precipitation'])": {
&extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}},
},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "75°"},
&extractortest.MockNode{TextValue: "62°"},
},
},
},
},
"span > span": {&extractortest.MockNode{TextValue: "80%"}},
},
}
// Header (section > div:first-child)
header := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "Partly Cloudy"},
&extractortest.MockNode{TextValue: "New York, NY"},
},
},
}
// Section
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:first-child": {header},
"div:nth-child(2)": {hourlyContainer},
"ul > div": {dayMon, dayTue},
},
}
// Widget article
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
return &extractortest.MockDocument{
URLValue: "https://duckduckgo.com/?q=weather+new+york",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div.module--weather span.module__title__link": {
&extractortest.MockNode{TextValue: "New York, NY"},
},
"div.module--weather .module__current-temp": {
&extractortest.MockNode{TextValue: "72°F"},
},
"div.module--weather .module__weather-summary": {
&extractortest.MockNode{TextValue: "Partly Cloudy"},
},
"div.module--weather .module__high-temp": {
&extractortest.MockNode{TextValue: "78°"},
},
"div.module--weather .module__low-temp": {
&extractortest.MockNode{TextValue: "65°"},
},
"div.module--weather .module__humidity": {
&extractortest.MockNode{TextValue: "55%"},
},
"div.module--weather .module__wind": {
&extractortest.MockNode{TextValue: "SW 10 mph"},
},
"div.module--weather .module__forecast-day": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".forecast-day__name": {&extractortest.MockNode{TextValue: "Mon"}},
".forecast-day__high": {&extractortest.MockNode{TextValue: "80°"}},
".forecast-day__low": {&extractortest.MockNode{TextValue: "66°"}},
".forecast-day__condition": {&extractortest.MockNode{TextValue: "Sunny"}},
".forecast-day__precip": {&extractortest.MockNode{TextValue: "10%"}},
".forecast-day__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}}},
},
},
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".forecast-day__name": {&extractortest.MockNode{TextValue: "Tue"}},
".forecast-day__high": {&extractortest.MockNode{TextValue: "75°"}},
".forecast-day__low": {&extractortest.MockNode{TextValue: "62°"}},
".forecast-day__condition": {&extractortest.MockNode{TextValue: "Rain"}},
".forecast-day__precip": {&extractortest.MockNode{TextValue: "80%"}},
".forecast-day__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}}},
},
},
},
"div.module--weather .module__hourly-item": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".hourly-item__time": {&extractortest.MockNode{TextValue: "3 PM"}},
".hourly-item__temp": {&extractortest.MockNode{TextValue: "74°"}},
".hourly-item__condition": {&extractortest.MockNode{TextValue: "Partly Cloudy"}},
".hourly-item__precip": {&extractortest.MockNode{TextValue: "5%"}},
".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}}},
},
},
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".hourly-item__time": {&extractortest.MockNode{TextValue: "4 PM"}},
".hourly-item__temp": {&extractortest.MockNode{TextValue: "73°"}},
".hourly-item__condition": {&extractortest.MockNode{TextValue: "Cloudy"}},
".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}}},
},
},
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".hourly-item__time": {&extractortest.MockNode{TextValue: "5 PM"}},
".hourly-item__temp": {&extractortest.MockNode{TextValue: "70°"}},
".hourly-item__condition": {&extractortest.MockNode{TextValue: "Rain"}},
".hourly-item__precip": {&extractortest.MockNode{TextValue: "60%"}},
".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}}},
},
},
},
"article:has(img[src*='weatherkit'])": {widget},
},
},
}
@@ -100,17 +162,19 @@ func TestExtractWeather(t *testing.T) {
if data.Location != "New York, NY" {
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
}
if data.CurrentTemp != 72 {
t.Errorf("CurrentTemp = %v, want 72", data.CurrentTemp)
// CurrentTemp is derived from first hourly entry (no standalone current temp in new widget)
if data.CurrentTemp != 74 {
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
}
if data.Condition != "Partly Cloudy" {
t.Errorf("Condition = %q, want %q", data.Condition, "Partly Cloudy")
}
if data.HighTemp != 78 {
t.Errorf("HighTemp = %v, want 78", data.HighTemp)
// HighTemp/LowTemp are derived from first daily forecast entry
if data.HighTemp != 80 {
t.Errorf("HighTemp = %v, want 80", data.HighTemp)
}
if data.LowTemp != 65 {
t.Errorf("LowTemp = %v, want 65", data.LowTemp)
if data.LowTemp != 66 {
t.Errorf("LowTemp = %v, want 66", data.LowTemp)
}
if data.Humidity != "55%" {
t.Errorf("Humidity = %q, want %q", data.Humidity, "55%")
@@ -135,6 +199,10 @@ func TestExtractWeather(t *testing.T) {
if data.Forecast[0].IconHint != "PartlyCloudy" {
t.Errorf("Forecast[0].IconHint = %q, want %q", data.Forecast[0].IconHint, "PartlyCloudy")
}
// Condition is now derived from icon hint
if data.Forecast[0].Condition != "PartlyCloudy" {
t.Errorf("Forecast[0].Condition = %q, want %q", data.Forecast[0].Condition, "PartlyCloudy")
}
if data.Forecast[1].Condition != "Rain" {
t.Errorf("Forecast[1].Condition = %q, want %q", data.Forecast[1].Condition, "Rain")
}
@@ -155,8 +223,9 @@ func TestExtractWeather(t *testing.T) {
if data.Hourly[0].Temp != 74 {
t.Errorf("Hourly[0].Temp = %v, want 74", data.Hourly[0].Temp)
}
if data.Hourly[0].Condition != "Partly Cloudy" {
t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "Partly Cloudy")
// Condition is now derived from icon hint (no separate condition element)
if data.Hourly[0].Condition != "MostlyCloudy" {
t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "MostlyCloudy")
}
if data.Hourly[0].Precipitation != 5 {
t.Errorf("Hourly[0].Precipitation = %d, want 5", data.Hourly[0].Precipitation)
@@ -202,8 +271,8 @@ func TestGetWeather_MockBrowser(t *testing.T) {
if data.Location != "New York, NY" {
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
}
if data.CurrentTemp != 72 {
t.Errorf("CurrentTemp = %v, want 72", data.CurrentTemp)
if data.CurrentTemp != 74 {
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
}
if len(data.Hourly) != 3 {
t.Errorf("Hourly len = %d, want 3", len(data.Hourly))
@@ -231,27 +300,55 @@ func TestExtractWeather_Empty(t *testing.T) {
}
func TestExtractWeather_NoPrecipitation(t *testing.T) {
// Daily item without precipitation or icon
dayWed := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p:first-child": {&extractortest.MockNode{TextValue: "Wed"}},
"p:last-of-type": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"span": {
&extractortest.MockNode{TextValue: "85°"},
&extractortest.MockNode{TextValue: "70°"},
},
},
},
},
},
}
// Hourly item without precipitation or icon
hourlyItem := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"p": {
&extractortest.MockNode{TextValue: "12 PM"},
&extractortest.MockNode{TextValue: "82°"},
},
},
}
section := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div:first-child": {&extractortest.MockNode{}},
"div:nth-child(2)": {&extractortest.MockNode{
Children: map[string]extractor.Nodes{
"ul > li": {hourlyItem},
},
}},
"ul > div": {dayWed},
},
}
widget := &extractortest.MockNode{
Children: map[string]extractor.Nodes{
"section": {section},
},
}
doc := &extractortest.MockDocument{
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"div.module--weather .module__forecast-day": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".forecast-day__name": {&extractortest.MockNode{TextValue: "Wed"}},
".forecast-day__high": {&extractortest.MockNode{TextValue: "85°"}},
".forecast-day__low": {&extractortest.MockNode{TextValue: "70°"}},
".forecast-day__condition": {&extractortest.MockNode{TextValue: "Clear"}},
},
},
},
"div.module--weather .module__hourly-item": {
&extractortest.MockNode{
Children: map[string]extractor.Nodes{
".hourly-item__time": {&extractortest.MockNode{TextValue: "12 PM"}},
".hourly-item__temp": {&extractortest.MockNode{TextValue: "82°"}},
},
},
},
"article:has(img[src*='weatherkit'])": {widget},
},
},
}