Merge pull request 'Deduplicate helpers (#13, #14)' (#40) from refactor/deduplicate-helpers into main
This commit was merged in pull request #40.
This commit is contained in:
@@ -95,39 +95,5 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
|
|||||||
}
|
}
|
||||||
defer extractor.DeferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
var res []Result
|
return extractResults(doc)
|
||||||
|
|
||||||
err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
|
||||||
var r Result
|
|
||||||
|
|
||||||
links := n.Select(`a[href][target="_self"]`)
|
|
||||||
|
|
||||||
if len(links) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
r.URL, err = links[0].Attr(`href`)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get link: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
titles := n.Select("h2")
|
|
||||||
|
|
||||||
if len(titles) != 0 {
|
|
||||||
r.Title, _ = titles[0].Text()
|
|
||||||
}
|
|
||||||
|
|
||||||
descriptions := n.Select("span > span")
|
|
||||||
|
|
||||||
if len(descriptions) != 0 {
|
|
||||||
r.Description, _ = descriptions[0].Text()
|
|
||||||
}
|
|
||||||
|
|
||||||
res = append(res, r)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
return res, err
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,9 +18,14 @@ type searchPage struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s searchPage) GetResults() ([]Result, error) {
|
func (s searchPage) GetResults() ([]Result, error) {
|
||||||
|
return extractResults(s.doc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractResults parses search results from a DuckDuckGo results page.
|
||||||
|
func extractResults(doc extractor.Node) ([]Result, error) {
|
||||||
var res []Result
|
var res []Result
|
||||||
|
|
||||||
err := s.doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
||||||
var r Result
|
var r Result
|
||||||
|
|
||||||
links := n.Select(`a[href][target="_self"]`)
|
links := n.Select(`a[href][target="_self"]`)
|
||||||
|
|||||||
21
sites/internal/parse/parse.go
Normal file
21
sites/internal/parse/parse.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package parse
|
||||||
|
|
||||||
|
import "strconv"
|
||||||
|
|
||||||
|
// NumericOnly extracts only digits and decimal points from a string and
|
||||||
|
// returns the result as a float64. Returns 0 if parsing fails.
|
||||||
|
func NumericOnly(in string) float64 {
|
||||||
|
var out string
|
||||||
|
for _, r := range in {
|
||||||
|
if (r >= '0' && r <= '9') || r == '.' {
|
||||||
|
out += string(r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val, err := strconv.ParseFloat(out, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return val
|
||||||
|
}
|
||||||
25
sites/internal/parse/parse_test.go
Normal file
25
sites/internal/parse/parse_test.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package parse
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestNumericOnly(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
{"$1.5 Billion", 1.5},
|
||||||
|
{"$100 Million", 100},
|
||||||
|
{"$200,000", 200000},
|
||||||
|
{"no numbers", 0},
|
||||||
|
{"", 0},
|
||||||
|
{"42", 42},
|
||||||
|
{"3.14159", 3.14159},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := NumericOnly(tt.input)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("NumericOnly(%q) = %v, want %v", tt.input, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
|
||||||
|
|
||||||
"golang.org/x/text/currency"
|
"golang.org/x/text/currency"
|
||||||
)
|
)
|
||||||
@@ -166,30 +167,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er
|
|||||||
|
|
||||||
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
||||||
|
|
||||||
// make one filter to only get the numeric part of the jackpot
|
numeric := parse.NumericOnly(txt)
|
||||||
|
|
||||||
numericOnly := func(in string) float64 {
|
|
||||||
var out string
|
|
||||||
for _, r := range in {
|
|
||||||
if r >= '0' && r <= '9' {
|
|
||||||
out += string(r)
|
|
||||||
}
|
|
||||||
|
|
||||||
if r == '.' {
|
|
||||||
out += string(r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val, err := strconv.ParseFloat(out, 64)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return val
|
|
||||||
}
|
|
||||||
|
|
||||||
numeric := numericOnly(txt)
|
|
||||||
|
|
||||||
if strings.Contains(txt, "Billion") {
|
if strings.Contains(txt, "Billion") {
|
||||||
nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000000)
|
nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000000)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/internal/parse"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
@@ -144,30 +145,7 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er
|
|||||||
|
|
||||||
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
// jackpot is in the format of "$1.5 billion", "$100 million", or "$200,000" etc
|
||||||
|
|
||||||
// make one filter to only get the numeric part of the jackpot
|
numeric := parse.NumericOnly(txt)
|
||||||
|
|
||||||
numericOnly := func(in string) float64 {
|
|
||||||
var out string
|
|
||||||
for _, r := range in {
|
|
||||||
if r >= '0' && r <= '9' {
|
|
||||||
out += string(r)
|
|
||||||
}
|
|
||||||
|
|
||||||
if r == '.' {
|
|
||||||
out += string(r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val, err := strconv.ParseFloat(out, 64)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return val
|
|
||||||
}
|
|
||||||
|
|
||||||
numeric := numericOnly(txt)
|
|
||||||
|
|
||||||
if strings.Contains(txt, "Billion") {
|
if strings.Contains(txt, "Billion") {
|
||||||
nextDrawing.JackpotDollars = int(numeric * 1000000000)
|
nextDrawing.JackpotDollars = int(numeric * 1000000000)
|
||||||
|
|||||||
Reference in New Issue
Block a user