From a32f57ec92122e9bf68e388ccc550e652ebfb555 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sun, 15 Feb 2026 23:00:44 +0000 Subject: [PATCH] fix: update weather extractor selectors to match DuckDuckGo's actual DOM DuckDuckGo's weather widget uses randomized CSS module class names that don't match the BEM-style selectors the extractor was using. Replace all class-based selectors with structural and attribute-based selectors: - Identify widget via article:has(img[src*='weatherkit']) - Use positional selectors (div:first-child, p:first-of-type, etc.) - Extract icon hints from img[alt] attributes - Parse precipitation from span > span structure - Derive CurrentTemp from first hourly entry (no standalone element) - Derive HighTemp/LowTemp from first daily forecast entry - Use text-matching for Humidity/Wind labels Fixes #53 Co-Authored-By: Claude Opus 4.6 --- sites/duckduckgo/weather.go | 185 +++++++++---------- sites/duckduckgo/weather_test.go | 295 ++++++++++++++++++++----------- 2 files changed, 292 insertions(+), 188 deletions(-) diff --git a/sites/duckduckgo/weather.go b/sites/duckduckgo/weather.go index 2428f59..a410024 100644 --- a/sites/duckduckgo/weather.go +++ b/sites/duckduckgo/weather.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log/slog" + "strings" "time" "gitea.stevedudenhoeffer.com/steve/go-extractor" @@ -71,121 +72,127 @@ func GetWeather(ctx context.Context, b extractor.Browser, city string) (*Weather func extractWeather(doc extractor.Node) (*WeatherData, error) { var data WeatherData - // Location - locs := doc.Select("div.module--weather span.module__title__link") - if len(locs) > 0 { - data.Location, _ = locs[0].Text() + // DuckDuckGo's weather widget uses randomized CSS class names (CSS modules), + // so we identify elements by structural selectors and image src attributes. + // The widget is an article element containing weatherkit icon images. + widget := doc.SelectFirst("article:has(img[src*='weatherkit'])") + if widget == nil { + return &data, nil } - // Current temperature - temps := doc.Select("div.module--weather .module__current-temp") - if len(temps) > 0 { - txt, _ := temps[0].Text() - data.CurrentTemp = parse.NumericOnly(txt) + section := widget.SelectFirst("section") + if section == nil { + return &data, nil } - // Condition - conds := doc.Select("div.module--weather .module__weather-summary") - if len(conds) > 0 { - data.Condition, _ = conds[0].Text() + // Header: condition and location + // Structure: section > div:first-child > [div(toggle), p(condition), p(location)] + header := section.SelectFirst("div:first-child") + if header != nil { + ps := header.Select("p") + if len(ps) >= 2 { + data.Condition, _ = ps[0].Text() + data.Location, _ = ps[1].Text() + } else if len(ps) == 1 { + data.Condition, _ = ps[0].Text() + } } - // High/low - highs := doc.Select("div.module--weather .module__high-temp") - if len(highs) > 0 { - txt, _ := highs[0].Text() - data.HighTemp = parse.NumericOnly(txt) - } + // Hourly forecast and details + // Structure: section > div:nth-child(2) > [ul(hourly items), div(humidity/wind)] + hourlyContainer := section.SelectFirst("div:nth-child(2)") + if hourlyContainer != nil { + _ = hourlyContainer.ForEach("ul > li", func(n extractor.Node) error { + var hour HourlyForecast + hour.Precipitation = -1 - lows := doc.Select("div.module--weather .module__low-temp") - if len(lows) > 0 { - txt, _ := lows[0].Text() - data.LowTemp = parse.NumericOnly(txt) - } + // Each li contains: p(time), img(icon), [span(precip)], p(temp) + ps := n.Select("p") + if len(ps) >= 2 { + hour.Time, _ = ps[0].Text() + txt, _ := ps[len(ps)-1].Text() + hour.Temp = parse.NumericOnly(txt) + } - // Humidity - humids := doc.Select("div.module--weather .module__humidity") - if len(humids) > 0 { - data.Humidity, _ = humids[0].Text() - } + // Icon hint and condition from the weather icon's alt attribute + icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])") + hour.IconHint = extractIconHint(icons) + hour.Condition = hour.IconHint - // Wind - winds := doc.Select("div.module--weather .module__wind") - if len(winds) > 0 { - data.Wind, _ = winds[0].Text() + // Precipitation percentage is in a span > span structure + if precip := n.SelectFirst("span > span"); precip != nil { + txt, _ := precip.Text() + hour.Precipitation = int(parse.NumericOnly(txt)) + } + + data.Hourly = append(data.Hourly, hour) + return nil + }) + + // Use first hourly temperature as current temp (no standalone current temp element) + if len(data.Hourly) > 0 { + data.CurrentTemp = data.Hourly[0].Temp + } + + // Humidity and wind from the details div (after the hourly ul) + details := hourlyContainer.Select("div > p") + for _, p := range details { + txt, _ := p.Text() + if strings.Contains(txt, "Humidity") { + if strong := p.SelectFirst("strong"); strong != nil { + data.Humidity, _ = strong.Text() + } + } else if strings.Contains(txt, "Wind") { + if strong := p.SelectFirst("strong"); strong != nil { + data.Wind, _ = strong.Text() + } + } + } } // Daily forecast - _ = doc.ForEach("div.module--weather .module__forecast-day", func(n extractor.Node) error { + // Structure: section > ul > div (each day) + // The daily ul has div children; the hourly ul has li children, so ul > div is unambiguous. + _ = section.ForEach("ul > div", func(n extractor.Node) error { var day DayForecast day.Precipitation = -1 - days := n.Select(".forecast-day__name") - if len(days) > 0 { - day.Day, _ = days[0].Text() + // Day name from first p + if d := n.SelectFirst("p:first-child"); d != nil { + day.Day, _ = d.Text() } - dayHighs := n.Select(".forecast-day__high") - if len(dayHighs) > 0 { - txt, _ := dayHighs[0].Text() - day.HighTemp = parse.NumericOnly(txt) + // Icon hint and condition from the weather icon's alt attribute + icons := n.Select("img[src*='weatherkit']:not([src*='Precipitation'])") + day.IconHint = extractIconHint(icons) + day.Condition = day.IconHint + + // High/low temps from last p's spans + if temps := n.SelectFirst("p:last-of-type"); temps != nil { + spans := temps.Select("span") + if len(spans) >= 2 { + highTxt, _ := spans[0].Text() + day.HighTemp = parse.NumericOnly(highTxt) + lowTxt, _ := spans[1].Text() + day.LowTemp = parse.NumericOnly(lowTxt) + } } - dayLows := n.Select(".forecast-day__low") - if len(dayLows) > 0 { - txt, _ := dayLows[0].Text() - day.LowTemp = parse.NumericOnly(txt) - } - - dayConds := n.Select(".forecast-day__condition") - if len(dayConds) > 0 { - day.Condition, _ = dayConds[0].Text() - } - - precips := n.Select(".forecast-day__precip") - if len(precips) > 0 { - txt, _ := precips[0].Text() + // Precipitation percentage is in a span > span structure + if precip := n.SelectFirst("span > span"); precip != nil { + txt, _ := precip.Text() day.Precipitation = int(parse.NumericOnly(txt)) } - day.IconHint = extractIconHint(n.Select(".forecast-day__icon")) - data.Forecast = append(data.Forecast, day) return nil }) - // Hourly forecast - _ = doc.ForEach("div.module--weather .module__hourly-item", func(n extractor.Node) error { - var hour HourlyForecast - hour.Precipitation = -1 - - times := n.Select(".hourly-item__time") - if len(times) > 0 { - hour.Time, _ = times[0].Text() - } - - temps := n.Select(".hourly-item__temp") - if len(temps) > 0 { - txt, _ := temps[0].Text() - hour.Temp = parse.NumericOnly(txt) - } - - conds := n.Select(".hourly-item__condition") - if len(conds) > 0 { - hour.Condition, _ = conds[0].Text() - } - - precips := n.Select(".hourly-item__precip") - if len(precips) > 0 { - txt, _ := precips[0].Text() - hour.Precipitation = int(parse.NumericOnly(txt)) - } - - hour.IconHint = extractIconHint(n.Select(".hourly-item__icon")) - - data.Hourly = append(data.Hourly, hour) - return nil - }) + // Today's high/low from first daily forecast entry + if len(data.Forecast) > 0 { + data.HighTemp = data.Forecast[0].HighTemp + data.LowTemp = data.Forecast[0].LowTemp + } return &data, nil } diff --git a/sites/duckduckgo/weather_test.go b/sites/duckduckgo/weather_test.go index 486e93e..8647930 100644 --- a/sites/duckduckgo/weather_test.go +++ b/sites/duckduckgo/weather_test.go @@ -9,81 +9,143 @@ import ( ) func makeWeatherDoc() *extractortest.MockDocument { + // Mock mirrors the actual DuckDuckGo weather widget DOM structure: + // article > section > [div(header), div(hourly+details), ul(daily)] + // CSS class names are randomized, so selectors use structural/attribute patterns. + + // Hourly forecast items (section > div:nth-child(2) > ul > li) + hourlyItem1 := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "3 PM"}, + &extractortest.MockNode{TextValue: "74°"}, + }, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}}, + }, + "span > span": { + &extractortest.MockNode{TextValue: "5%"}, + }, + }, + } + hourlyItem2 := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "4 PM"}, + &extractortest.MockNode{TextValue: "73°"}, + }, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}}, + }, + }, + } + hourlyItem3 := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "5 PM"}, + &extractortest.MockNode{TextValue: "70°"}, + }, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}}, + }, + "span > span": { + &extractortest.MockNode{TextValue: "60%"}, + }, + }, + } + + // Hourly container (section > div:nth-child(2)) + hourlyContainer := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "ul > li": {hourlyItem1, hourlyItem2, hourlyItem3}, + "div > p": { + &extractortest.MockNode{ + TextValue: "Humidity: 55%", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "55%"}}, + }, + }, + &extractortest.MockNode{ + TextValue: "Wind: SW 10 mph", + Children: map[string]extractor.Nodes{ + "strong": {&extractortest.MockNode{TextValue: "SW 10 mph"}}, + }, + }, + }, + }, + } + + // Daily forecast items (section > ul > div) + dayMon := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p:first-child": {&extractortest.MockNode{TextValue: "Mon"}}, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}}, + }, + "p:last-of-type": { + &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "span": { + &extractortest.MockNode{TextValue: "80°"}, + &extractortest.MockNode{TextValue: "66°"}, + }, + }, + }, + }, + "span > span": {&extractortest.MockNode{TextValue: "10%"}}, + }, + } + dayTue := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p:first-child": {&extractortest.MockNode{TextValue: "Tue"}}, + "img[src*='weatherkit']:not([src*='Precipitation'])": { + &extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}}, + }, + "p:last-of-type": { + &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "span": { + &extractortest.MockNode{TextValue: "75°"}, + &extractortest.MockNode{TextValue: "62°"}, + }, + }, + }, + }, + "span > span": {&extractortest.MockNode{TextValue: "80%"}}, + }, + } + + // Header (section > div:first-child) + header := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "Partly Cloudy"}, + &extractortest.MockNode{TextValue: "New York, NY"}, + }, + }, + } + + // Section + section := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "div:first-child": {header}, + "div:nth-child(2)": {hourlyContainer}, + "ul > div": {dayMon, dayTue}, + }, + } + + // Widget article + widget := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "section": {section}, + }, + } + return &extractortest.MockDocument{ URLValue: "https://duckduckgo.com/?q=weather+new+york", MockNode: extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div.module--weather span.module__title__link": { - &extractortest.MockNode{TextValue: "New York, NY"}, - }, - "div.module--weather .module__current-temp": { - &extractortest.MockNode{TextValue: "72°F"}, - }, - "div.module--weather .module__weather-summary": { - &extractortest.MockNode{TextValue: "Partly Cloudy"}, - }, - "div.module--weather .module__high-temp": { - &extractortest.MockNode{TextValue: "78°"}, - }, - "div.module--weather .module__low-temp": { - &extractortest.MockNode{TextValue: "65°"}, - }, - "div.module--weather .module__humidity": { - &extractortest.MockNode{TextValue: "55%"}, - }, - "div.module--weather .module__wind": { - &extractortest.MockNode{TextValue: "SW 10 mph"}, - }, - "div.module--weather .module__forecast-day": { - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".forecast-day__name": {&extractortest.MockNode{TextValue: "Mon"}}, - ".forecast-day__high": {&extractortest.MockNode{TextValue: "80°"}}, - ".forecast-day__low": {&extractortest.MockNode{TextValue: "66°"}}, - ".forecast-day__condition": {&extractortest.MockNode{TextValue: "Sunny"}}, - ".forecast-day__precip": {&extractortest.MockNode{TextValue: "10%"}}, - ".forecast-day__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}}}, - }, - }, - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".forecast-day__name": {&extractortest.MockNode{TextValue: "Tue"}}, - ".forecast-day__high": {&extractortest.MockNode{TextValue: "75°"}}, - ".forecast-day__low": {&extractortest.MockNode{TextValue: "62°"}}, - ".forecast-day__condition": {&extractortest.MockNode{TextValue: "Rain"}}, - ".forecast-day__precip": {&extractortest.MockNode{TextValue: "80%"}}, - ".forecast-day__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}}}, - }, - }, - }, - "div.module--weather .module__hourly-item": { - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".hourly-item__time": {&extractortest.MockNode{TextValue: "3 PM"}}, - ".hourly-item__temp": {&extractortest.MockNode{TextValue: "74°"}}, - ".hourly-item__condition": {&extractortest.MockNode{TextValue: "Partly Cloudy"}}, - ".hourly-item__precip": {&extractortest.MockNode{TextValue: "5%"}}, - ".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}}}, - }, - }, - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".hourly-item__time": {&extractortest.MockNode{TextValue: "4 PM"}}, - ".hourly-item__temp": {&extractortest.MockNode{TextValue: "73°"}}, - ".hourly-item__condition": {&extractortest.MockNode{TextValue: "Cloudy"}}, - ".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}}}, - }, - }, - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".hourly-item__time": {&extractortest.MockNode{TextValue: "5 PM"}}, - ".hourly-item__temp": {&extractortest.MockNode{TextValue: "70°"}}, - ".hourly-item__condition": {&extractortest.MockNode{TextValue: "Rain"}}, - ".hourly-item__precip": {&extractortest.MockNode{TextValue: "60%"}}, - ".hourly-item__icon": {&extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}}}, - }, - }, - }, + "article:has(img[src*='weatherkit'])": {widget}, }, }, } @@ -100,17 +162,19 @@ func TestExtractWeather(t *testing.T) { if data.Location != "New York, NY" { t.Errorf("Location = %q, want %q", data.Location, "New York, NY") } - if data.CurrentTemp != 72 { - t.Errorf("CurrentTemp = %v, want 72", data.CurrentTemp) + // CurrentTemp is derived from first hourly entry (no standalone current temp in new widget) + if data.CurrentTemp != 74 { + t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp) } if data.Condition != "Partly Cloudy" { t.Errorf("Condition = %q, want %q", data.Condition, "Partly Cloudy") } - if data.HighTemp != 78 { - t.Errorf("HighTemp = %v, want 78", data.HighTemp) + // HighTemp/LowTemp are derived from first daily forecast entry + if data.HighTemp != 80 { + t.Errorf("HighTemp = %v, want 80", data.HighTemp) } - if data.LowTemp != 65 { - t.Errorf("LowTemp = %v, want 65", data.LowTemp) + if data.LowTemp != 66 { + t.Errorf("LowTemp = %v, want 66", data.LowTemp) } if data.Humidity != "55%" { t.Errorf("Humidity = %q, want %q", data.Humidity, "55%") @@ -135,6 +199,10 @@ func TestExtractWeather(t *testing.T) { if data.Forecast[0].IconHint != "PartlyCloudy" { t.Errorf("Forecast[0].IconHint = %q, want %q", data.Forecast[0].IconHint, "PartlyCloudy") } + // Condition is now derived from icon hint + if data.Forecast[0].Condition != "PartlyCloudy" { + t.Errorf("Forecast[0].Condition = %q, want %q", data.Forecast[0].Condition, "PartlyCloudy") + } if data.Forecast[1].Condition != "Rain" { t.Errorf("Forecast[1].Condition = %q, want %q", data.Forecast[1].Condition, "Rain") } @@ -155,8 +223,9 @@ func TestExtractWeather(t *testing.T) { if data.Hourly[0].Temp != 74 { t.Errorf("Hourly[0].Temp = %v, want 74", data.Hourly[0].Temp) } - if data.Hourly[0].Condition != "Partly Cloudy" { - t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "Partly Cloudy") + // Condition is now derived from icon hint (no separate condition element) + if data.Hourly[0].Condition != "MostlyCloudy" { + t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "MostlyCloudy") } if data.Hourly[0].Precipitation != 5 { t.Errorf("Hourly[0].Precipitation = %d, want 5", data.Hourly[0].Precipitation) @@ -202,8 +271,8 @@ func TestGetWeather_MockBrowser(t *testing.T) { if data.Location != "New York, NY" { t.Errorf("Location = %q, want %q", data.Location, "New York, NY") } - if data.CurrentTemp != 72 { - t.Errorf("CurrentTemp = %v, want 72", data.CurrentTemp) + if data.CurrentTemp != 74 { + t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp) } if len(data.Hourly) != 3 { t.Errorf("Hourly len = %d, want 3", len(data.Hourly)) @@ -231,27 +300,55 @@ func TestExtractWeather_Empty(t *testing.T) { } func TestExtractWeather_NoPrecipitation(t *testing.T) { + // Daily item without precipitation or icon + dayWed := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p:first-child": {&extractortest.MockNode{TextValue: "Wed"}}, + "p:last-of-type": { + &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "span": { + &extractortest.MockNode{TextValue: "85°"}, + &extractortest.MockNode{TextValue: "70°"}, + }, + }, + }, + }, + }, + } + + // Hourly item without precipitation or icon + hourlyItem := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "p": { + &extractortest.MockNode{TextValue: "12 PM"}, + &extractortest.MockNode{TextValue: "82°"}, + }, + }, + } + + section := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "div:first-child": {&extractortest.MockNode{}}, + "div:nth-child(2)": {&extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "ul > li": {hourlyItem}, + }, + }}, + "ul > div": {dayWed}, + }, + } + + widget := &extractortest.MockNode{ + Children: map[string]extractor.Nodes{ + "section": {section}, + }, + } + doc := &extractortest.MockDocument{ MockNode: extractortest.MockNode{ Children: map[string]extractor.Nodes{ - "div.module--weather .module__forecast-day": { - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".forecast-day__name": {&extractortest.MockNode{TextValue: "Wed"}}, - ".forecast-day__high": {&extractortest.MockNode{TextValue: "85°"}}, - ".forecast-day__low": {&extractortest.MockNode{TextValue: "70°"}}, - ".forecast-day__condition": {&extractortest.MockNode{TextValue: "Clear"}}, - }, - }, - }, - "div.module--weather .module__hourly-item": { - &extractortest.MockNode{ - Children: map[string]extractor.Nodes{ - ".hourly-item__time": {&extractortest.MockNode{TextValue: "12 PM"}}, - ".hourly-item__temp": {&extractortest.MockNode{TextValue: "82°"}}, - }, - }, - }, + "article:has(img[src*='weatherkit'])": {widget}, }, }, } -- 2.49.1