The weather extractor used positional CSS selectors (div:first-child, div:nth-child(2)) to locate the header and hourly container within the widget section. When DuckDuckGo inserts advisory banners (e.g. wind advisory), the extra div shifts positions and breaks extraction of current temp, hourly data, humidity, and wind. Replace with structural selectors: - div:not(:has(ul)) for the header (first div without a list) - div:has(> ul) for the hourly container (div with direct ul child) These match elements by their content structure rather than position, so advisory banners no longer break extraction. Fixes #64 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
218 lines
6.4 KiB
Go
218 lines
6.4 KiB
Go
package duckduckgo
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
|
|
)
|
|
|
|
// WeatherData holds structured weather information extracted from DuckDuckGo.
|
|
type WeatherData struct {
|
|
Location string
|
|
CurrentTemp float64
|
|
Condition string
|
|
HighTemp float64
|
|
LowTemp float64
|
|
Humidity string
|
|
Wind string
|
|
Forecast []DayForecast
|
|
Hourly []HourlyForecast
|
|
}
|
|
|
|
// DayForecast holds a single day's forecast.
|
|
type DayForecast struct {
|
|
Day string
|
|
HighTemp float64
|
|
LowTemp float64
|
|
Condition string
|
|
Precipitation int // percentage 0-100, -1 if unavailable
|
|
IconHint string // icon type from element attributes (e.g. "PartlyCloudy", "Snow")
|
|
}
|
|
|
|
// HourlyForecast holds a single hour's forecast.
|
|
type HourlyForecast struct {
|
|
Time string
|
|
Temp float64
|
|
Condition string
|
|
Precipitation int // percentage 0-100, -1 if unavailable
|
|
IconHint string // icon type from element attributes (e.g. "MostlyCloudy", "Rain")
|
|
}
|
|
|
|
// GetWeather extracts weather data from DuckDuckGo's weather widget.
|
|
func (c Config) GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) {
|
|
c = c.validate()
|
|
|
|
u := c.ToSearchURL("weather " + city)
|
|
|
|
slog.Info("fetching weather", "url", u, "city", city)
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open weather page: %w", err)
|
|
}
|
|
defer extractor.DeferClose(doc)
|
|
|
|
timeout := 10 * time.Second
|
|
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
|
|
slog.Warn("WaitForNetworkIdle failed", "err", err)
|
|
}
|
|
|
|
return extractWeather(doc)
|
|
}
|
|
|
|
// GetWeather is a convenience function using DefaultConfig.
|
|
func GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) {
|
|
return DefaultConfig.GetWeather(ctx, b, city)
|
|
}
|
|
|
|
func extractWeather(doc extractor.Node) (*WeatherData, error) {
|
|
var data WeatherData
|
|
|
|
// DuckDuckGo's weather widget uses randomized CSS class names (CSS modules),
|
|
// so we identify elements by structural selectors and image src attributes.
|
|
// The widget is an article element containing weatherkit icon images.
|
|
widget := doc.SelectFirst("article:has(img[src*='weatherkit'])")
|
|
if widget == nil {
|
|
return &data, nil
|
|
}
|
|
|
|
section := widget.SelectFirst("section")
|
|
if section == nil {
|
|
return &data, nil
|
|
}
|
|
|
|
// Header: condition and location
|
|
// Structure: section > div > [div(toggle), p(condition), p(location)]
|
|
// Use :not(:has(ul)) to skip the hourly container div and avoid breaking
|
|
// when advisory banners (e.g. wind advisory) insert extra divs.
|
|
header := section.SelectFirst("div:not(:has(ul))")
|
|
if header != nil {
|
|
ps := header.Select("p")
|
|
if len(ps) >= 2 {
|
|
data.Condition, _ = ps[0].Text()
|
|
data.Location, _ = ps[1].Text()
|
|
} else if len(ps) == 1 {
|
|
data.Condition, _ = ps[0].Text()
|
|
}
|
|
}
|
|
|
|
// Hourly forecast and details
|
|
// Structure: section > div > [ul(hourly items), div(humidity/wind)]
|
|
// Use :has(> ul) to find the div containing the hourly list, regardless of
|
|
// position. This avoids breaking when advisory banners insert extra divs.
|
|
hourlyContainer := section.SelectFirst("div:has(> ul)")
|
|
if hourlyContainer != nil {
|
|
_ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error {
|
|
var hour HourlyForecast
|
|
hour.Precipitation = -1
|
|
|
|
// Each li contains: p(time), img(icon), [span(precip)], p(temp)
|
|
ps := n.Select("p")
|
|
if len(ps) >= 2 {
|
|
hour.Time, _ = ps[0].Text()
|
|
txt, _ := ps[len(ps)-1].Text()
|
|
hour.Temp = parse.NumericOnly(txt)
|
|
}
|
|
|
|
// Icon hint and condition from the weather icon's alt attribute
|
|
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
|
|
hour.IconHint = extractIconHint(icons)
|
|
hour.Condition = hour.IconHint
|
|
|
|
// Precipitation percentage is in a span > span structure
|
|
if precip := n.SelectFirst("span > span"); precip != nil {
|
|
txt, _ := precip.Text()
|
|
hour.Precipitation = int(parse.NumericOnly(txt))
|
|
}
|
|
|
|
data.Hourly = append(data.Hourly, hour)
|
|
return nil
|
|
})
|
|
|
|
// Use first hourly temperature as current temp (no standalone current temp element)
|
|
if len(data.Hourly) > 0 {
|
|
data.CurrentTemp = data.Hourly[0].Temp
|
|
}
|
|
|
|
// Humidity and wind from the details div (after the hourly ul)
|
|
details := hourlyContainer.Select("div > p")
|
|
for _, p := range details {
|
|
txt, _ := p.Text()
|
|
if strings.Contains(txt, "Humidity") {
|
|
if strong := p.SelectFirst("strong"); strong != nil {
|
|
data.Humidity, _ = strong.Text()
|
|
}
|
|
} else if strings.Contains(txt, "Wind") {
|
|
if strong := p.SelectFirst("strong"); strong != nil {
|
|
data.Wind, _ = strong.Text()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Daily forecast
|
|
// Structure: section > ul > div (each day)
|
|
// The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous.
|
|
_ = section.ForEach("ul > div", func(n extractor.Node) error {
|
|
var day DayForecast
|
|
day.Precipitation = -1
|
|
|
|
// Day name from first p
|
|
if d := n.SelectFirst("p:first-child"); d != nil {
|
|
day.Day, _ = d.Text()
|
|
}
|
|
|
|
// Icon hint and condition from the weather icon's alt attribute
|
|
icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])")
|
|
day.IconHint = extractIconHint(icons)
|
|
day.Condition = day.IconHint
|
|
|
|
// High/low temps from last p's spans
|
|
if temps := n.SelectFirst("p:last-of-type"); temps != nil {
|
|
spans := temps.Select("span")
|
|
if len(spans) >= 2 {
|
|
highTxt, _ := spans[0].Text()
|
|
day.HighTemp = parse.NumericOnly(highTxt)
|
|
lowTxt, _ := spans[1].Text()
|
|
day.LowTemp = parse.NumericOnly(lowTxt)
|
|
}
|
|
}
|
|
|
|
// Precipitation percentage is in a span > span structure
|
|
if precip := n.SelectFirst("span > span"); precip != nil {
|
|
txt, _ := precip.Text()
|
|
day.Precipitation = int(parse.NumericOnly(txt))
|
|
}
|
|
|
|
data.Forecast = append(data.Forecast, day)
|
|
return nil
|
|
})
|
|
|
|
// Today's high/low from first daily forecast entry
|
|
if len(data.Forecast) > 0 {
|
|
data.HighTemp = data.Forecast[0].HighTemp
|
|
data.LowTemp = data.Forecast[0].LowTemp
|
|
}
|
|
|
|
return &data, nil
|
|
}
|
|
|
|
// extractIconHint reads the icon type from an element's aria-label, title, or alt attribute.
|
|
func extractIconHint(nodes extractor.Nodes) string {
|
|
if len(nodes) == 0 {
|
|
return ""
|
|
}
|
|
n := nodes[0]
|
|
for _, attr := range []string{"aria-label", "title", "alt"} {
|
|
v, _ := n.Attr(attr)
|
|
if v != "" {
|
|
return v
|
|
}
|
|
}
|
|
return ""
|
|
}
|