package duckduckgo import ( "context" "fmt" "log/slog" "strings" "time" "gitea.stevedudenhoeffer.com/steve/go-extractor" "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse" ) // WeatherData holds structured weather information extracted from DuckDuckGo. type WeatherData struct { Location string CurrentTemp float64 Condition string HighTemp float64 LowTemp float64 Humidity string Wind string Forecast []DayForecast Hourly []HourlyForecast } // DayForecast holds a single day's forecast. type DayForecast struct { Day string HighTemp float64 LowTemp float64 Condition string Precipitation int // percentage 0-100, -1 if unavailable IconHint string // icon type from element attributes (e.g. "PartlyCloudy", "Snow") } // HourlyForecast holds a single hour's forecast. type HourlyForecast struct { Time string Temp float64 Condition string Precipitation int // percentage 0-100, -1 if unavailable IconHint string // icon type from element attributes (e.g. "MostlyCloudy", "Rain") } // GetWeather extracts weather data from DuckDuckGo's weather widget. func (c Config) GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) { c = c.validate() u := c.ToSearchURL("weather " + city) slog.Info("fetching weather", "url", u, "city", city) doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{}) if err != nil { return nil, fmt.Errorf("failed to open weather page: %w", err) } defer extractor.DeferClose(doc) timeout := 10 * time.Second if err := doc.WaitForNetworkIdle(&timeout); err != nil { slog.Warn("WaitForNetworkIdle failed", "err", err) } return extractWeather(doc) } // GetWeather is a convenience function using DefaultConfig. func GetWeather(ctx context.Context, b extractor.Browser, city string) (*WeatherData, error) { return DefaultConfig.GetWeather(ctx, b, city) } func extractWeather(doc extractor.Node) (*WeatherData, error) { var data WeatherData // DuckDuckGo's weather widget uses randomized CSS class names (CSS modules), // so we identify elements by structural selectors and image src attributes. // The widget is an article element containing weatherkit icon images. widget := doc.SelectFirst("article:has(img[src*='weatherkit'])") if widget == nil { return &data, nil } section := widget.SelectFirst("section") if section == nil { return &data, nil } // Header: condition and location // Structure: section > div > [div(toggle), p(condition), p(location)] // Use :not(:has(ul)) to skip the hourly container div and avoid breaking // when advisory banners (e.g. wind advisory) insert extra divs. header := section.SelectFirst("div:not(:has(ul))") if header != nil { ps := header.Select("p") if len(ps) >= 2 { data.Condition, _ = ps[0].Text() data.Location, _ = ps[1].Text() } else if len(ps) == 1 { data.Condition, _ = ps[0].Text() } } // Hourly forecast and details // Structure: section > div > [ul(hourly items), div(humidity/wind)] // Use :has(> ul) to find the div containing the hourly list, regardless of // position. This avoids breaking when advisory banners insert extra divs. hourlyContainer := section.SelectFirst("div:has(> ul)") if hourlyContainer != nil { _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { var hour HourlyForecast hour.Precipitation = -1 // Each li contains: p(time), img(icon), [span(precip)], p(temp) ps := n.Select("p") if len(ps) >= 2 { hour.Time, _ = ps[0].Text() txt, _ := ps[len(ps)-1].Text() hour.Temp = parse.NumericOnly(txt) } // Icon hint and condition from the weather icon's alt attribute icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])") hour.IconHint = extractIconHint(icons) hour.Condition = hour.IconHint // Precipitation percentage is in a span > span structure if precip := n.SelectFirst("span > span"); precip != nil { txt, _ := precip.Text() hour.Precipitation = int(parse.NumericOnly(txt)) } data.Hourly = append(data.Hourly, hour) return nil }) // Use first hourly temperature as current temp (no standalone current temp element) if len(data.Hourly) > 0 { data.CurrentTemp = data.Hourly[0].Temp } // Humidity and wind from the details div (after the hourly ul) details := hourlyContainer.Select("div > p") for _, p := range details { txt, _ := p.Text() if strings.Contains(txt, "Humidity") { if strong := p.SelectFirst("strong"); strong != nil { data.Humidity, _ = strong.Text() } } else if strings.Contains(txt, "Wind") { if strong := p.SelectFirst("strong"); strong != nil { data.Wind, _ = strong.Text() } } } } // Daily forecast // Structure: section > ul > div (each day) // The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous. _ = section.ForEach("ul > div", func(n extractor.Node) error { var day DayForecast day.Precipitation = -1 // Day name from first p if d := n.SelectFirst("p:first-child"); d != nil { day.Day, _ = d.Text() } // Icon hint and condition from the weather icon's alt attribute icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])") day.IconHint = extractIconHint(icons) day.Condition = day.IconHint // High/low temps from last p's spans if temps := n.SelectFirst("p:last-of-type"); temps != nil { spans := temps.Select("span") if len(spans) >= 2 { highTxt, _ := spans[0].Text() day.HighTemp = parse.NumericOnly(highTxt) lowTxt, _ := spans[1].Text() day.LowTemp = parse.NumericOnly(lowTxt) } } // Precipitation percentage is in a span > span structure if precip := n.SelectFirst("span > span"); precip != nil { txt, _ := precip.Text() day.Precipitation = int(parse.NumericOnly(txt)) } data.Forecast = append(data.Forecast, day) return nil }) // Today's high/low from first daily forecast entry if len(data.Forecast) > 0 { data.HighTemp = data.Forecast[0].HighTemp data.LowTemp = data.Forecast[0].LowTemp } return &data, nil } // extractIconHint reads the icon type from an element's aria-label, title, or alt attribute. func extractIconHint(nodes extractor.Nodes) string { if len(nodes) == 0 { return "" } n := nodes[0] for _, attr := range []string{"aria-label", "title", "alt"} { v, _ := n.Attr(attr) if v != "" { return v } } return "" }