From 132817144eed6d8dc77e18d6b0108e73399d26a9 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sun, 15 Feb 2026 16:26:54 +0000 Subject: [PATCH] refactor: deduplicate numericOnly and DuckDuckGo result extraction - Extract identical numericOnly inline functions from powerball and megamillions into shared sites/internal/parse.NumericOnly with tests - Extract duplicated DuckDuckGo result parsing from Search() and GetResults() into shared extractResults() helper Closes #13, #14 Co-Authored-By: Claude Opus 4.6 --- sites/duckduckgo/duckduckgo.go | 36 +----------------------------- sites/duckduckgo/page.go | 7 +++++- sites/internal/parse/parse.go | 21 +++++++++++++++++ sites/internal/parse/parse_test.go | 25 +++++++++++++++++++++ sites/megamillions/megamillions.go | 26 ++------------------- sites/powerball/powerball.go | 26 ++------------------- 6 files changed, 57 insertions(+), 84 deletions(-) create mode 100644 sites/internal/parse/parse.go create mode 100644 sites/internal/parse/parse_test.go diff --git a/sites/duckduckgo/duckduckgo.go b/sites/duckduckgo/duckduckgo.go index 28bcdce..de07d97 100644 --- a/sites/duckduckgo/duckduckgo.go +++ b/sites/duckduckgo/duckduckgo.go @@ -95,39 +95,5 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ( } defer extractor.DeferClose(doc) - var res []Result - - err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error { - var r Result - - links := n.Select(`a[href][target="_self"]`) - - if len(links) == 0 { - return nil - } - - r.URL, err = links[0].Attr(`href`) - - if err != nil { - return fmt.Errorf("failed to get link: %w", err) - } - - titles := n.Select("h2") - - if len(titles) != 0 { - r.Title, _ = titles[0].Text() - } - - descriptions := n.Select("span > span") - - if len(descriptions) != 0 { - r.Description, _ = descriptions[0].Text() - } - - res = append(res, r) - - return nil - }) - - return res, err + return extractResults(doc) } diff --git a/sites/duckduckgo/page.go b/sites/duckduckgo/page.go index 70b2242..a1e0df4 100644 --- a/sites/duckduckgo/page.go +++ b/sites/duckduckgo/page.go @@ -18,9 +18,14 @@ type searchPage struct { } func (s searchPage) GetResults() ([]Result, error) { + return extractResults(s.doc) +} + +// extractResults parses search results from a DuckDuckGo results page. +func extractResults(doc extractor.Node) ([]Result, error) { var res []Result - err := s.doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error { + err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error { var r Result links := n.Select(`a[href][target="_self"]`) diff --git a/sites/internal/parse/parse.go b/sites/internal/parse/parse.go new file mode 100644 index 0000000..c1dc9df --- /dev/null +++ b/sites/internal/parse/parse.go @@ -0,0 +1,21 @@ +package parse + +import "strconv" + +// NumericOnly extracts only digits and decimal points from a string and +// returns the result as a float64. Returns 0 if parsing fails. +func NumericOnly(in string) float64 { + var out string + for _, r := range in { + if (r >= '0' && r <= '9') || r == '.' { + out += string(r) + } + } + + val, err := strconv.ParseFloat(out, 64) + if err != nil { + return 0 + } + + return val +} diff --git a/sites/internal/parse/parse_test.go b/sites/internal/parse/parse_test.go new file mode 100644 index 0000000..12821f5 --- /dev/null +++ b/sites/internal/parse/parse_test.go @@ -0,0 +1,25 @@ +package parse + +import "testing" + +func TestNumericOnly(t *testing.T) { + tests := []struct { + input string + want float64 + }{ + {"$1.5 Billion", 1.5}, + {"$100 Million", 100}, + {"$200,000", 200000}, + {"no numbers", 0}, + {"", 0}, + {"42", 42}, + {"3.14159", 3.14159}, + } + + for _, tt := range tests { + got := NumericOnly(tt.input) + if got != tt.want { + t.Errorf("NumericOnly(%q) = %v, want %v", tt.input, got, tt.want) + } + } +} diff --git a/sites/megamillions/megamillions.go b/sites/megamillions/megamillions.go index 173bb7e..a10aaf3 100644 --- a/sites/megamillions/megamillions.go +++ b/sites/megamillions/megamillions.go @@ -8,6 +8,7 @@ import ( "time" "gitea.stevedudenhoeffer.com/steve/go-extractor" + "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse" "golang.org/x/text/currency" ) @@ -166,30 +167,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er // jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc - // make one filter to only get the numeric part of the jackpot - - numericOnly := func(in string) float64 { - var out string - for _, r := range in { - if r >= '0' && r <= '9' { - out += string(r) - } - - if r == '.' { - out += string(r) - } - } - - val, err := strconv.ParseFloat(out, 64) - - if err != nil { - return 0 - } - - return val - } - - numeric := numericOnly(txt) + numeric := parse.NumericOnly(txt) if strings.Contains(txt, "Billion") { nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000000) diff --git a/sites/powerball/powerball.go b/sites/powerball/powerball.go index 4b3f9c7..a32f078 100644 --- a/sites/powerball/powerball.go +++ b/sites/powerball/powerball.go @@ -8,6 +8,7 @@ import ( "time" "gitea.stevedudenhoeffer.com/steve/go-extractor" + "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse" ) type Config struct { @@ -144,30 +145,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er // jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc - // make one filter to only get the numeric part of the jackpot - - numericOnly := func(in string) float64 { - var out string - for _, r := range in { - if r >= '0' && r <= '9' { - out += string(r) - } - - if r == '.' { - out += string(r) - } - } - - val, err := strconv.ParseFloat(out, 64) - - if err != nil { - return 0 - } - - return val - } - - numeric := numericOnly(txt) + numeric := parse.NumericOnly(txt) if strings.Contains(txt, "Billion") { nextDrawing.JackpotDollars = int(numeric * 1000000000)