DuckDuckGo's weather widget uses randomized CSS module class names that don't match the BEM-style selectors the extractor was using. Replace all class-based selectors with structural and attribute-based selectors: - Identify widget via article:has(img[src*='weatherkit']) - Use positional selectors (div:first-child, p:first-of-type, etc.) - Extract icon hints from img[alt] attributes - Parse precipitation from span > span structure - Derive CurrentTemp from first hourly entry (no standalone element) - Derive HighTemp/LowTemp from first daily forecast entry - Use text-matching for Humidity/Wind labels Fixes #53 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
425 lines
12 KiB
Go
425 lines
12 KiB
Go
package duckduckgo
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
|
|
)
|
|
|
|
func makeWeatherDoc() *extractortest.MockDocument {
|
|
// Mock mirrors the actual DuckDuckGo weather widget DOM structure:
|
|
// article > section > [div(header), div(hourly+details), ul(daily)]
|
|
// CSS class names are randomized, so selectors use structural/attribute patterns.
|
|
|
|
// Hourly forecast items (section > div:nth-child(2) > ul > li)
|
|
hourlyItem1 := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p": {
|
|
&extractortest.MockNode{TextValue: "3 PM"},
|
|
&extractortest.MockNode{TextValue: "74°"},
|
|
},
|
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "MostlyCloudy"}},
|
|
},
|
|
"span > span": {
|
|
&extractortest.MockNode{TextValue: "5%"},
|
|
},
|
|
},
|
|
}
|
|
hourlyItem2 := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p": {
|
|
&extractortest.MockNode{TextValue: "4 PM"},
|
|
&extractortest.MockNode{TextValue: "73°"},
|
|
},
|
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "Cloudy"}},
|
|
},
|
|
},
|
|
}
|
|
hourlyItem3 := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p": {
|
|
&extractortest.MockNode{TextValue: "5 PM"},
|
|
&extractortest.MockNode{TextValue: "70°"},
|
|
},
|
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
|
&extractortest.MockNode{Attrs: map[string]string{"aria-label": "HeavyRain"}},
|
|
},
|
|
"span > span": {
|
|
&extractortest.MockNode{TextValue: "60%"},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Hourly container (section > div:nth-child(2))
|
|
hourlyContainer := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"ul > li": {hourlyItem1, hourlyItem2, hourlyItem3},
|
|
"div > p": {
|
|
&extractortest.MockNode{
|
|
TextValue: "Humidity: 55%",
|
|
Children: map[string]extractor.Nodes{
|
|
"strong": {&extractortest.MockNode{TextValue: "55%"}},
|
|
},
|
|
},
|
|
&extractortest.MockNode{
|
|
TextValue: "Wind: SW 10 mph",
|
|
Children: map[string]extractor.Nodes{
|
|
"strong": {&extractortest.MockNode{TextValue: "SW 10 mph"}},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Daily forecast items (section > ul > div)
|
|
dayMon := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p:first-child": {&extractortest.MockNode{TextValue: "Mon"}},
|
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "PartlyCloudy"}},
|
|
},
|
|
"p:last-of-type": {
|
|
&extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"span": {
|
|
&extractortest.MockNode{TextValue: "80°"},
|
|
&extractortest.MockNode{TextValue: "66°"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"span > span": {&extractortest.MockNode{TextValue: "10%"}},
|
|
},
|
|
}
|
|
dayTue := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p:first-child": {&extractortest.MockNode{TextValue: "Tue"}},
|
|
"img[src*='weatherkit']:not([src*='Precipitation'])": {
|
|
&extractortest.MockNode{Attrs: map[string]string{"alt": "Rain"}},
|
|
},
|
|
"p:last-of-type": {
|
|
&extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"span": {
|
|
&extractortest.MockNode{TextValue: "75°"},
|
|
&extractortest.MockNode{TextValue: "62°"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"span > span": {&extractortest.MockNode{TextValue: "80%"}},
|
|
},
|
|
}
|
|
|
|
// Header (section > div:first-child)
|
|
header := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p": {
|
|
&extractortest.MockNode{TextValue: "Partly Cloudy"},
|
|
&extractortest.MockNode{TextValue: "New York, NY"},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Section
|
|
section := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"div:first-child": {header},
|
|
"div:nth-child(2)": {hourlyContainer},
|
|
"ul > div": {dayMon, dayTue},
|
|
},
|
|
}
|
|
|
|
// Widget article
|
|
widget := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"section": {section},
|
|
},
|
|
}
|
|
|
|
return &extractortest.MockDocument{
|
|
URLValue: "https://duckduckgo.com/?q=weather+new+york",
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"article:has(img[src*='weatherkit'])": {widget},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func TestExtractWeather(t *testing.T) {
|
|
doc := makeWeatherDoc()
|
|
|
|
data, err := extractWeather(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractWeather() error: %v", err)
|
|
}
|
|
|
|
if data.Location != "New York, NY" {
|
|
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
|
|
}
|
|
// CurrentTemp is derived from first hourly entry (no standalone current temp in new widget)
|
|
if data.CurrentTemp != 74 {
|
|
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
|
|
}
|
|
if data.Condition != "Partly Cloudy" {
|
|
t.Errorf("Condition = %q, want %q", data.Condition, "Partly Cloudy")
|
|
}
|
|
// HighTemp/LowTemp are derived from first daily forecast entry
|
|
if data.HighTemp != 80 {
|
|
t.Errorf("HighTemp = %v, want 80", data.HighTemp)
|
|
}
|
|
if data.LowTemp != 66 {
|
|
t.Errorf("LowTemp = %v, want 66", data.LowTemp)
|
|
}
|
|
if data.Humidity != "55%" {
|
|
t.Errorf("Humidity = %q, want %q", data.Humidity, "55%")
|
|
}
|
|
if data.Wind != "SW 10 mph" {
|
|
t.Errorf("Wind = %q, want %q", data.Wind, "SW 10 mph")
|
|
}
|
|
|
|
// Daily forecast
|
|
if len(data.Forecast) != 2 {
|
|
t.Fatalf("Forecast len = %d, want 2", len(data.Forecast))
|
|
}
|
|
if data.Forecast[0].Day != "Mon" {
|
|
t.Errorf("Forecast[0].Day = %q, want %q", data.Forecast[0].Day, "Mon")
|
|
}
|
|
if data.Forecast[0].HighTemp != 80 {
|
|
t.Errorf("Forecast[0].HighTemp = %v, want 80", data.Forecast[0].HighTemp)
|
|
}
|
|
if data.Forecast[0].Precipitation != 10 {
|
|
t.Errorf("Forecast[0].Precipitation = %d, want 10", data.Forecast[0].Precipitation)
|
|
}
|
|
if data.Forecast[0].IconHint != "PartlyCloudy" {
|
|
t.Errorf("Forecast[0].IconHint = %q, want %q", data.Forecast[0].IconHint, "PartlyCloudy")
|
|
}
|
|
// Condition is now derived from icon hint
|
|
if data.Forecast[0].Condition != "PartlyCloudy" {
|
|
t.Errorf("Forecast[0].Condition = %q, want %q", data.Forecast[0].Condition, "PartlyCloudy")
|
|
}
|
|
if data.Forecast[1].Condition != "Rain" {
|
|
t.Errorf("Forecast[1].Condition = %q, want %q", data.Forecast[1].Condition, "Rain")
|
|
}
|
|
if data.Forecast[1].Precipitation != 80 {
|
|
t.Errorf("Forecast[1].Precipitation = %d, want 80", data.Forecast[1].Precipitation)
|
|
}
|
|
if data.Forecast[1].IconHint != "Rain" {
|
|
t.Errorf("Forecast[1].IconHint = %q, want %q", data.Forecast[1].IconHint, "Rain")
|
|
}
|
|
|
|
// Hourly forecast
|
|
if len(data.Hourly) != 3 {
|
|
t.Fatalf("Hourly len = %d, want 3", len(data.Hourly))
|
|
}
|
|
if data.Hourly[0].Time != "3 PM" {
|
|
t.Errorf("Hourly[0].Time = %q, want %q", data.Hourly[0].Time, "3 PM")
|
|
}
|
|
if data.Hourly[0].Temp != 74 {
|
|
t.Errorf("Hourly[0].Temp = %v, want 74", data.Hourly[0].Temp)
|
|
}
|
|
// Condition is now derived from icon hint (no separate condition element)
|
|
if data.Hourly[0].Condition != "MostlyCloudy" {
|
|
t.Errorf("Hourly[0].Condition = %q, want %q", data.Hourly[0].Condition, "MostlyCloudy")
|
|
}
|
|
if data.Hourly[0].Precipitation != 5 {
|
|
t.Errorf("Hourly[0].Precipitation = %d, want 5", data.Hourly[0].Precipitation)
|
|
}
|
|
if data.Hourly[0].IconHint != "MostlyCloudy" {
|
|
t.Errorf("Hourly[0].IconHint = %q, want %q", data.Hourly[0].IconHint, "MostlyCloudy")
|
|
}
|
|
|
|
// Second hourly item has no precipitation
|
|
if data.Hourly[1].Time != "4 PM" {
|
|
t.Errorf("Hourly[1].Time = %q, want %q", data.Hourly[1].Time, "4 PM")
|
|
}
|
|
if data.Hourly[1].Precipitation != -1 {
|
|
t.Errorf("Hourly[1].Precipitation = %d, want -1 (unavailable)", data.Hourly[1].Precipitation)
|
|
}
|
|
if data.Hourly[1].IconHint != "Cloudy" {
|
|
t.Errorf("Hourly[1].IconHint = %q, want %q", data.Hourly[1].IconHint, "Cloudy")
|
|
}
|
|
|
|
// Third hourly item uses aria-label for icon hint
|
|
if data.Hourly[2].Precipitation != 60 {
|
|
t.Errorf("Hourly[2].Precipitation = %d, want 60", data.Hourly[2].Precipitation)
|
|
}
|
|
if data.Hourly[2].IconHint != "HeavyRain" {
|
|
t.Errorf("Hourly[2].IconHint = %q, want %q", data.Hourly[2].IconHint, "HeavyRain")
|
|
}
|
|
}
|
|
|
|
func TestGetWeather_MockBrowser(t *testing.T) {
|
|
doc := makeWeatherDoc()
|
|
|
|
browser := &extractortest.MockBrowser{
|
|
Documents: map[string]*extractortest.MockDocument{
|
|
"https://duckduckgo.com/?kp=-2&q=weather+new+york": doc,
|
|
},
|
|
}
|
|
|
|
data, err := DefaultConfig.GetWeather(context.Background(), browser, "new york")
|
|
if err != nil {
|
|
t.Fatalf("GetWeather() error: %v", err)
|
|
}
|
|
|
|
if data.Location != "New York, NY" {
|
|
t.Errorf("Location = %q, want %q", data.Location, "New York, NY")
|
|
}
|
|
if data.CurrentTemp != 74 {
|
|
t.Errorf("CurrentTemp = %v, want 74", data.CurrentTemp)
|
|
}
|
|
if len(data.Hourly) != 3 {
|
|
t.Errorf("Hourly len = %d, want 3", len(data.Hourly))
|
|
}
|
|
}
|
|
|
|
func TestExtractWeather_Empty(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{},
|
|
},
|
|
}
|
|
|
|
data, err := extractWeather(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractWeather() error: %v", err)
|
|
}
|
|
|
|
if data.Location != "" || data.CurrentTemp != 0 {
|
|
t.Error("expected zero values for empty doc")
|
|
}
|
|
if len(data.Hourly) != 0 {
|
|
t.Errorf("expected no hourly data for empty doc, got %d", len(data.Hourly))
|
|
}
|
|
}
|
|
|
|
func TestExtractWeather_NoPrecipitation(t *testing.T) {
|
|
// Daily item without precipitation or icon
|
|
dayWed := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p:first-child": {&extractortest.MockNode{TextValue: "Wed"}},
|
|
"p:last-of-type": {
|
|
&extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"span": {
|
|
&extractortest.MockNode{TextValue: "85°"},
|
|
&extractortest.MockNode{TextValue: "70°"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
// Hourly item without precipitation or icon
|
|
hourlyItem := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"p": {
|
|
&extractortest.MockNode{TextValue: "12 PM"},
|
|
&extractortest.MockNode{TextValue: "82°"},
|
|
},
|
|
},
|
|
}
|
|
|
|
section := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"div:first-child": {&extractortest.MockNode{}},
|
|
"div:nth-child(2)": {&extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"ul > li": {hourlyItem},
|
|
},
|
|
}},
|
|
"ul > div": {dayWed},
|
|
},
|
|
}
|
|
|
|
widget := &extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"section": {section},
|
|
},
|
|
}
|
|
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"article:has(img[src*='weatherkit'])": {widget},
|
|
},
|
|
},
|
|
}
|
|
|
|
data, err := extractWeather(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractWeather() error: %v", err)
|
|
}
|
|
|
|
if len(data.Forecast) != 1 {
|
|
t.Fatalf("Forecast len = %d, want 1", len(data.Forecast))
|
|
}
|
|
if data.Forecast[0].Precipitation != -1 {
|
|
t.Errorf("Forecast[0].Precipitation = %d, want -1 (unavailable)", data.Forecast[0].Precipitation)
|
|
}
|
|
if data.Forecast[0].IconHint != "" {
|
|
t.Errorf("Forecast[0].IconHint = %q, want empty", data.Forecast[0].IconHint)
|
|
}
|
|
|
|
if len(data.Hourly) != 1 {
|
|
t.Fatalf("Hourly len = %d, want 1", len(data.Hourly))
|
|
}
|
|
if data.Hourly[0].Precipitation != -1 {
|
|
t.Errorf("Hourly[0].Precipitation = %d, want -1 (unavailable)", data.Hourly[0].Precipitation)
|
|
}
|
|
if data.Hourly[0].IconHint != "" {
|
|
t.Errorf("Hourly[0].IconHint = %q, want empty", data.Hourly[0].IconHint)
|
|
}
|
|
}
|
|
|
|
func TestExtractIconHint_Priority(t *testing.T) {
|
|
// aria-label takes priority over title and alt
|
|
nodes := extractor.Nodes{
|
|
&extractortest.MockNode{
|
|
Attrs: map[string]string{
|
|
"aria-label": "Snow",
|
|
"title": "SnowTitle",
|
|
"alt": "SnowAlt",
|
|
},
|
|
},
|
|
}
|
|
if got := extractIconHint(nodes); got != "Snow" {
|
|
t.Errorf("extractIconHint() = %q, want %q (aria-label priority)", got, "Snow")
|
|
}
|
|
|
|
// title used when aria-label absent
|
|
nodes = extractor.Nodes{
|
|
&extractortest.MockNode{
|
|
Attrs: map[string]string{
|
|
"title": "Drizzle",
|
|
"alt": "DrizzleAlt",
|
|
},
|
|
},
|
|
}
|
|
if got := extractIconHint(nodes); got != "Drizzle" {
|
|
t.Errorf("extractIconHint() = %q, want %q (title fallback)", got, "Drizzle")
|
|
}
|
|
|
|
// alt used as last fallback
|
|
nodes = extractor.Nodes{
|
|
&extractortest.MockNode{
|
|
Attrs: map[string]string{"alt": "MostlyClear"},
|
|
},
|
|
}
|
|
if got := extractIconHint(nodes); got != "MostlyClear" {
|
|
t.Errorf("extractIconHint() = %q, want %q (alt fallback)", got, "MostlyClear")
|
|
}
|
|
|
|
// empty when no nodes
|
|
if got := extractIconHint(nil); got != "" {
|
|
t.Errorf("extractIconHint(nil) = %q, want empty", got)
|
|
}
|
|
}
|