fix: update weather extractor selectors to match DuckDuckGo's actual DOM
DuckDuckGo's weather widget uses randomized CSS module class names that don't match the BEM-style selectors the extractor was using. Replace all class-based selectors with structural and attribute-based selectors: - Identify widget via article:has(img[src*='weatherkit']) - Use positional selectors (div:first-child, p:first-of-type, etc.) - Extract icon hints from img[alt] attributes - Parse precipitation from span > span structure - Derive CurrentTemp from first hourly entry (no standalone element) - Derive HighTemp/LowTemp from first daily forecast entry - Use text-matching for Humidity/Wind labels Fixes #53 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
@@ -71,121 +72,127 @@ func GetWeather(ctx context.Context, b extractor.Browser, city string) (*Weather
|
||||
func extractWeather(doc extractor.Node) (*WeatherData, error) {
|
||||
var data WeatherData
|
||||
|
||||
// Location
|
||||
locs := doc.Select("div.module--weather span.module__title__link")
|
||||
if len(locs) > 0 {
|
||||
data.Location, _ = locs[0].Text()
|
||||
// DuckDuckGo's weather widget uses randomized CSS class names (CSS modules),
|
||||
// so we identify elements by structural selectors and image src attributes.
|
||||
// The widget is an article element containing weatherkit icon images.
|
||||
widget := doc.SelectFirst("article:has(img[src*='weatherkit'])")
|
||||
if widget == nil {
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
// Current temperature
|
||||
temps := doc.Select("div.module--weather .module__current-temp")
|
||||
if len(temps) > 0 {
|
||||
txt, _ := temps[0].Text()
|
||||
data.CurrentTemp = parse.NumericOnly(txt)
|
||||
section := widget.SelectFirst("section")
|
||||
if section == nil {
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
// Condition
|
||||
conds := doc.Select("div.module--weather .module__weather-summary")
|
||||
if len(conds) > 0 {
|
||||
data.Condition, _ = conds[0].Text()
|
||||
// Header: condition and location
|
||||
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)]
|
||||
header := section.SelectFirst("div:first-child")
|
||||
if header != nil {
|
||||
ps := header.Select("p")
|
||||
if len(ps) >= 2 {
|
||||
data.Condition, _ = ps[0].Text()
|
||||
data.Location, _ = ps[1].Text()
|
||||
} else if len(ps) == 1 {
|
||||
data.Condition, _ = ps[0].Text()
|
||||
}
|
||||
}
|
||||
|
||||
// High/low
|
||||
highs := doc.Select("div.module--weather .module__high-temp")
|
||||
if len(highs) > 0 {
|
||||
txt, _ := highs[0].Text()
|
||||
data.HighTemp = parse.NumericOnly(txt)
|
||||
}
|
||||
// Hourly forecast and details
|
||||
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)]
|
||||
hourlyContainer := section.SelectFirst("div:nth-child(2)")
|
||||
if hourlyContainer != nil {
|
||||
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
|
||||
var hour HourlyForecast
|
||||
hour.Precipitation = -1
|
||||
|
||||
lows := doc.Select("div.module--weather .module__low-temp")
|
||||
if len(lows) > 0 {
|
||||
txt, _ := lows[0].Text()
|
||||
data.LowTemp = parse.NumericOnly(txt)
|
||||
}
|
||||
// Each li contains: p(time), img(icon), [span(precip)], p(temp)
|
||||
ps := n.Select("p")
|
||||
if len(ps) >= 2 {
|
||||
hour.Time, _ = ps[0].Text()
|
||||
txt, _ := ps[len(ps)-1].Text()
|
||||
hour.Temp = parse.NumericOnly(txt)
|
||||
}
|
||||
|
||||
// Humidity
|
||||
humids := doc.Select("div.module--weather .module__humidity")
|
||||
if len(humids) > 0 {
|
||||
data.Humidity, _ = humids[0].Text()
|
||||
}
|
||||
// Icon hint and condition from the weather icon's alt attribute
|
||||
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
|
||||
hour.IconHint = extractIconHint(icons)
|
||||
hour.Condition = hour.IconHint
|
||||
|
||||
// Wind
|
||||
winds := doc.Select("div.module--weather .module__wind")
|
||||
if len(winds) > 0 {
|
||||
data.Wind, _ = winds[0].Text()
|
||||
// Precipitation percentage is in a span > span structure
|
||||
if precip := n.SelectFirst("span > span"); precip != nil {
|
||||
txt, _ := precip.Text()
|
||||
hour.Precipitation = int(parse.NumericOnly(txt))
|
||||
}
|
||||
|
||||
data.Hourly = append(data.Hourly, hour)
|
||||
return nil
|
||||
})
|
||||
|
||||
// Use first hourly temperature as current temp (no standalone current temp element)
|
||||
if len(data.Hourly) > 0 {
|
||||
data.CurrentTemp = data.Hourly[0].Temp
|
||||
}
|
||||
|
||||
// Humidity and wind from the details div (after the hourly ul)
|
||||
details := hourlyContainer.Select("div > p")
|
||||
for _, p := range details {
|
||||
txt, _ := p.Text()
|
||||
if strings.Contains(txt, "Humidity") {
|
||||
if strong := p.SelectFirst("strong"); strong != nil {
|
||||
data.Humidity, _ = strong.Text()
|
||||
}
|
||||
} else if strings.Contains(txt, "Wind") {
|
||||
if strong := p.SelectFirst("strong"); strong != nil {
|
||||
data.Wind, _ = strong.Text()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Daily forecast
|
||||
_ = doc.ForEach("div.module--weather .module__forecast-day", func(n extractor.Node) error {
|
||||
// Structure: section > ul > div (each day)
|
||||
// The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous.
|
||||
_ = section.ForEach("ul > div", func(n extractor.Node) error {
|
||||
var day DayForecast
|
||||
day.Precipitation = -1
|
||||
|
||||
days := n.Select(".forecast-day__name")
|
||||
if len(days) > 0 {
|
||||
day.Day, _ = days[0].Text()
|
||||
// Day name from first p
|
||||
if d := n.SelectFirst("p:first-child"); d != nil {
|
||||
day.Day, _ = d.Text()
|
||||
}
|
||||
|
||||
dayHighs := n.Select(".forecast-day__high")
|
||||
if len(dayHighs) > 0 {
|
||||
txt, _ := dayHighs[0].Text()
|
||||
day.HighTemp = parse.NumericOnly(txt)
|
||||
// Icon hint and condition from the weather icon's alt attribute
|
||||
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
|
||||
day.IconHint = extractIconHint(icons)
|
||||
day.Condition = day.IconHint
|
||||
|
||||
// High/low temps from last p's spans
|
||||
if temps := n.SelectFirst("p:last-of-type"); temps != nil {
|
||||
spans := temps.Select("span")
|
||||
if len(spans) >= 2 {
|
||||
highTxt, _ := spans[0].Text()
|
||||
day.HighTemp = parse.NumericOnly(highTxt)
|
||||
lowTxt, _ := spans[1].Text()
|
||||
day.LowTemp = parse.NumericOnly(lowTxt)
|
||||
}
|
||||
}
|
||||
|
||||
dayLows := n.Select(".forecast-day__low")
|
||||
if len(dayLows) > 0 {
|
||||
txt, _ := dayLows[0].Text()
|
||||
day.LowTemp = parse.NumericOnly(txt)
|
||||
}
|
||||
|
||||
dayConds := n.Select(".forecast-day__condition")
|
||||
if len(dayConds) > 0 {
|
||||
day.Condition, _ = dayConds[0].Text()
|
||||
}
|
||||
|
||||
precips := n.Select(".forecast-day__precip")
|
||||
if len(precips) > 0 {
|
||||
txt, _ := precips[0].Text()
|
||||
// Precipitation percentage is in a span > span structure
|
||||
if precip := n.SelectFirst("span > span"); precip != nil {
|
||||
txt, _ := precip.Text()
|
||||
day.Precipitation = int(parse.NumericOnly(txt))
|
||||
}
|
||||
|
||||
day.IconHint = extractIconHint(n.Select(".forecast-day__icon"))
|
||||
|
||||
data.Forecast = append(data.Forecast, day)
|
||||
return nil
|
||||
})
|
||||
|
||||
// Hourly forecast
|
||||
_ = doc.ForEach("div.module--weather .module__hourly-item", func(n extractor.Node) error {
|
||||
var hour HourlyForecast
|
||||
hour.Precipitation = -1
|
||||
|
||||
times := n.Select(".hourly-item__time")
|
||||
if len(times) > 0 {
|
||||
hour.Time, _ = times[0].Text()
|
||||
}
|
||||
|
||||
temps := n.Select(".hourly-item__temp")
|
||||
if len(temps) > 0 {
|
||||
txt, _ := temps[0].Text()
|
||||
hour.Temp = parse.NumericOnly(txt)
|
||||
}
|
||||
|
||||
conds := n.Select(".hourly-item__condition")
|
||||
if len(conds) > 0 {
|
||||
hour.Condition, _ = conds[0].Text()
|
||||
}
|
||||
|
||||
precips := n.Select(".hourly-item__precip")
|
||||
if len(precips) > 0 {
|
||||
txt, _ := precips[0].Text()
|
||||
hour.Precipitation = int(parse.NumericOnly(txt))
|
||||
}
|
||||
|
||||
hour.IconHint = extractIconHint(n.Select(".hourly-item__icon"))
|
||||
|
||||
data.Hourly = append(data.Hourly, hour)
|
||||
return nil
|
||||
})
|
||||
// Today's high/low from first daily forecast entry
|
||||
if len(data.Forecast) > 0 {
|
||||
data.HighTemp = data.Forecast[0].HighTemp
|
||||
data.LowTemp = data.Forecast[0].LowTemp
|
||||
}
|
||||
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user