refactor: deduplicate numericOnly and DuckDuckGo result extraction
- Extract identical numericOnly inline functions from powerball and megamillions into shared sites/internal/parse.NumericOnly with tests - Extract duplicated DuckDuckGo result parsing from Search() and GetResults() into shared extractResults() helper Closes #13, #14 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -95,39 +95,5 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
|
||||
}
|
||||
defer extractor.DeferClose(doc)
|
||||
|
||||
var res []Result
|
||||
|
||||
err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||
var r Result
|
||||
|
||||
links := n.Select(`a[href][target="_self"]`)
|
||||
|
||||
if len(links) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
r.URL, err = links[0].Attr(`href`)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get link: %w", err)
|
||||
}
|
||||
|
||||
titles := n.Select("h2")
|
||||
|
||||
if len(titles) != 0 {
|
||||
r.Title, _ = titles[0].Text()
|
||||
}
|
||||
|
||||
descriptions := n.Select("span > span")
|
||||
|
||||
if len(descriptions) != 0 {
|
||||
r.Description, _ = descriptions[0].Text()
|
||||
}
|
||||
|
||||
res = append(res, r)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return res, err
|
||||
return extractResults(doc)
|
||||
}
|
||||
|
||||
@@ -18,9 +18,14 @@ type searchPage struct {
|
||||
}
|
||||
|
||||
func (s searchPage) GetResults() ([]Result, error) {
|
||||
return extractResults(s.doc)
|
||||
}
|
||||
|
||||
// extractResults parses search results from a DuckDuckGo results page.
|
||||
func extractResults(doc extractor.Node) ([]Result, error) {
|
||||
var res []Result
|
||||
|
||||
err := s.doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||
var r Result
|
||||
|
||||
links := n.Select(`a[href][target="_self"]`)
|
||||
|
||||
21
sites/internal/parse/parse.go
Normal file
21
sites/internal/parse/parse.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package parse
|
||||
|
||||
import "strconv"
|
||||
|
||||
// NumericOnly extracts only digits and decimal points from a string and
|
||||
// returns the result as a float64. Returns 0 if parsing fails.
|
||||
func NumericOnly(in string) float64 {
|
||||
var out string
|
||||
for _, r := range in {
|
||||
if (r >= '0' && r <= '9') || r == '.' {
|
||||
out += string(r)
|
||||
}
|
||||
}
|
||||
|
||||
val, err := strconv.ParseFloat(out, 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
25
sites/internal/parse/parse_test.go
Normal file
25
sites/internal/parse/parse_test.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package parse
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestNumericOnly(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want float64
|
||||
}{
|
||||
{"$1.5 Billion", 1.5},
|
||||
{"$100 Million", 100},
|
||||
{"$200,000", 200000},
|
||||
{"no numbers", 0},
|
||||
{"", 0},
|
||||
{"42", 42},
|
||||
{"3.14159", 3.14159},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := NumericOnly(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("NumericOnly(%q) = %v, want %v", tt.input, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
|
||||
|
||||
"golang.org/x/text/currency"
|
||||
)
|
||||
@@ -166,30 +167,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er
|
||||
|
||||
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
||||
|
||||
// make one filter to only get the numeric part of the jackpot
|
||||
|
||||
numericOnly := func(in string) float64 {
|
||||
var out string
|
||||
for _, r := range in {
|
||||
if r >= '0' && r <= '9' {
|
||||
out += string(r)
|
||||
}
|
||||
|
||||
if r == '.' {
|
||||
out += string(r)
|
||||
}
|
||||
}
|
||||
|
||||
val, err := strconv.ParseFloat(out, 64)
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
numeric := numericOnly(txt)
|
||||
numeric := parse.NumericOnly(txt)
|
||||
|
||||
if strings.Contains(txt, "Billion") {
|
||||
nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000000)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
@@ -144,30 +145,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er
|
||||
|
||||
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
||||
|
||||
// make one filter to only get the numeric part of the jackpot
|
||||
|
||||
numericOnly := func(in string) float64 {
|
||||
var out string
|
||||
for _, r := range in {
|
||||
if r >= '0' && r <= '9' {
|
||||
out += string(r)
|
||||
}
|
||||
|
||||
if r == '.' {
|
||||
out += string(r)
|
||||
}
|
||||
}
|
||||
|
||||
val, err := strconv.ParseFloat(out, 64)
|
||||
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
numeric := numericOnly(txt)
|
||||
numeric := parse.NumericOnly(txt)
|
||||
|
||||
if strings.Contains(txt, "Billion") {
|
||||
nextDrawing.JackpotDollars = int(numeric * 1000000000)
|
||||
|
||||
Reference in New Issue
Block a user