Files
go-extractor/sites/duckduckgo/weather.go
Steve Dudenhoeffer a32f57ec92
All checks were successful
CI / build (pull_request) Successful in 30s
CI / vet (pull_request) Successful in 45s
CI / test (pull_request) Successful in 48s
fix: update weather extractor selectors to match DuckDuckGo's actual DOM
DuckDuckGo's weather widget uses randomized CSS module class names that
don't match the BEM-style selectors the extractor was using. Replace all
class-based selectors with structural and attribute-based selectors:

- Identify widget via article:has(img[src*='weatherkit'])
- Use positional selectors (div:first-child, p:first-of-type, etc.)
- Extract icon hints from img[alt] attributes
- Parse precipitation from span > span structure
- Derive CurrentTemp from first hourly entry (no standalone element)
- Derive HighTemp/LowTemp from first daily forecast entry
- Use text-matching for Humidity/Wind labels

Fixes #53

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 23:00:44 +00:00

214 lines
6.1 KiB
Go

package duckduckgo
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
)
// WeatherData holds structured weather information extracted from DuckDuckGo.
type WeatherData struct {
Location string
CurrentTemp float64
Condition string
HighTemp float64
LowTemp float64
Humidity string
Wind string
Forecast []DayForecast
Hourly []HourlyForecast
}
// DayForecast holds a single day's forecast.
type DayForecast struct {
Day string
HighTemp float64
LowTemp float64
Condition string
Precipitation int // percentage 0-100, -1 if unavailable
IconHint string // icon type from element attributes (e.g. "PartlyCloudy", "Snow")
}
// HourlyForecast holds a single hour's forecast.
type HourlyForecast struct {
Time string
Temp float64
Condition string
Precipitation int // percentage 0-100, -1 if unavailable
IconHint string // icon type from element attributes (e.g. "MostlyCloudy", "Rain")
}
// GetWeather extracts weather data from DuckDuckGo's weather widget.
func (c Config) GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) {
c = c.validate()
u := c.ToSearchURL("weather " + city)
slog.Info("fetching weather", "url", u, "city", city)
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
return nil, fmt.Errorf("failed to open weather page: %w", err)
}
defer extractor.DeferClose(doc)
timeout := 10 * time.Second
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
slog.Warn("WaitForNetworkIdle failed", "err", err)
}
return extractWeather(doc)
}
// GetWeather is a convenience function using DefaultConfig.
func GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) {
return DefaultConfig.GetWeather(ctx, b, city)
}
func extractWeather(doc extractor.Node) (*WeatherData, error) {
var data WeatherData
// DuckDuckGo's weather widget uses randomized CSS class names (CSS modules),
// so we identify elements by structural selectors and image src attributes.
// The widget is an article element containing weatherkit icon images.
widget := doc.SelectFirst("article:has(img[src*='weatherkit'])")
if widget == nil {
return &data, nil
}
section := widget.SelectFirst("section")
if section == nil {
return &data, nil
}
// Header: condition and location
// Structure: section > div:first-child > [div(toggle), p(condition), p(location)]
header := section.SelectFirst("div:first-child")
if header != nil {
ps := header.Select("p")
if len(ps) >= 2 {
data.Condition, _ = ps[0].Text()
data.Location, _ = ps[1].Text()
} else if len(ps) == 1 {
data.Condition, _ = ps[0].Text()
}
}
// Hourly forecast and details
// Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)]
hourlyContainer := section.SelectFirst("div:nth-child(2)")
if hourlyContainer != nil {
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
var hour HourlyForecast
hour.Precipitation = -1
// Each li contains: p(time), img(icon), [span(precip)], p(temp)
ps := n.Select("p")
if len(ps) >= 2 {
hour.Time, _ = ps[0].Text()
txt, _ := ps[len(ps)-1].Text()
hour.Temp = parse.NumericOnly(txt)
}
// Icon hint and condition from the weather icon's alt attribute
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
hour.IconHint = extractIconHint(icons)
hour.Condition = hour.IconHint
// Precipitation percentage is in a span > span structure
if precip := n.SelectFirst("span > span"); precip != nil {
txt, _ := precip.Text()
hour.Precipitation = int(parse.NumericOnly(txt))
}
data.Hourly = append(data.Hourly, hour)
return nil
})
// Use first hourly temperature as current temp (no standalone current temp element)
if len(data.Hourly) > 0 {
data.CurrentTemp = data.Hourly[0].Temp
}
// Humidity and wind from the details div (after the hourly ul)
details := hourlyContainer.Select("div > p")
for _, p := range details {
txt, _ := p.Text()
if strings.Contains(txt, "Humidity") {
if strong := p.SelectFirst("strong"); strong != nil {
data.Humidity, _ = strong.Text()
}
} else if strings.Contains(txt, "Wind") {
if strong := p.SelectFirst("strong"); strong != nil {
data.Wind, _ = strong.Text()
}
}
}
}
// Daily forecast
// Structure: section > ul > div (each day)
// The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous.
_ = section.ForEach("ul > div", func(n extractor.Node) error {
var day DayForecast
day.Precipitation = -1
// Day name from first p
if d := n.SelectFirst("p:first-child"); d != nil {
day.Day, _ = d.Text()
}
// Icon hint and condition from the weather icon's alt attribute
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
day.IconHint = extractIconHint(icons)
day.Condition = day.IconHint
// High/low temps from last p's spans
if temps := n.SelectFirst("p:last-of-type"); temps != nil {
spans := temps.Select("span")
if len(spans) >= 2 {
highTxt, _ := spans[0].Text()
day.HighTemp = parse.NumericOnly(highTxt)
lowTxt, _ := spans[1].Text()
day.LowTemp = parse.NumericOnly(lowTxt)
}
}
// Precipitation percentage is in a span > span structure
if precip := n.SelectFirst("span > span"); precip != nil {
txt, _ := precip.Text()
day.Precipitation = int(parse.NumericOnly(txt))
}
data.Forecast = append(data.Forecast, day)
return nil
})
// Today's high/low from first daily forecast entry
if len(data.Forecast) > 0 {
data.HighTemp = data.Forecast[0].HighTemp
data.LowTemp = data.Forecast[0].LowTemp
}
return &data, nil
}
// extractIconHint reads the icon type from an element's aria-label, title, or alt attribute.
func extractIconHint(nodes extractor.Nodes) string {
if len(nodes) == 0 {
return ""
}
n := nodes[0]
for _, attr := range []string{"aria-label", "title", "alt"} {
v, _ := n.Attr(attr)
if v != "" {
return v
}
}
return ""
}