- Extract identical numericOnly inline functions from powerball and megamillions into shared sites/internal/parse.NumericOnly with tests - Extract duplicated DuckDuckGo result parsing from Search() and GetResults() into shared extractResults() helper Closes #13, #14 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
74 lines
1.3 KiB
Go
74 lines
1.3 KiB
Go
package duckduckgo
|
|
|
|
import (
|
|
"fmt"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"io"
|
|
"log/slog"
|
|
)
|
|
|
|
type SearchPage interface {
|
|
io.Closer
|
|
GetResults() ([]Result, error)
|
|
LoadMore() error
|
|
}
|
|
|
|
type searchPage struct {
|
|
doc extractor.Document
|
|
}
|
|
|
|
func (s searchPage) GetResults() ([]Result, error) {
|
|
return extractResults(s.doc)
|
|
}
|
|
|
|
// extractResults parses search results from a DuckDuckGo results page.
|
|
func extractResults(doc extractor.Node) ([]Result, error) {
|
|
var res []Result
|
|
|
|
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
|
var r Result
|
|
|
|
links := n.Select(`a[href][target="_self"]`)
|
|
|
|
if len(links) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var err error
|
|
r.URL, err = links[0].Attr(`href`)
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get link: %w", err)
|
|
}
|
|
|
|
titles := n.Select("h2")
|
|
|
|
if len(titles) != 0 {
|
|
r.Title, _ = titles[0].Text()
|
|
}
|
|
|
|
descriptions := n.Select("span > span")
|
|
|
|
if len(descriptions) != 0 {
|
|
r.Description, _ = descriptions[0].Text()
|
|
}
|
|
|
|
res = append(res, r)
|
|
|
|
return nil
|
|
})
|
|
|
|
return res, err
|
|
}
|
|
|
|
func (s searchPage) LoadMore() error {
|
|
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
|
|
slog.Info("clicking load more", "node", n)
|
|
return n.Click()
|
|
})
|
|
}
|
|
|
|
func (s searchPage) Close() error {
|
|
return s.doc.Close()
|
|
}
|