Files
go-extractor/sites/duckduckgo/page.go
Steve Dudenhoeffer 132817144e
All checks were successful
CI / build (pull_request) Successful in 29s
CI / vet (pull_request) Successful in 1m1s
CI / test (pull_request) Successful in 1m4s
refactor: deduplicate numericOnly and DuckDuckGo result extraction
- Extract identical numericOnly inline functions from powerball and
  megamillions into shared sites/internal/parse.NumericOnly with tests
- Extract duplicated DuckDuckGo result parsing from Search() and
  GetResults() into shared extractResults() helper

Closes #13, #14

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:26:54 +00:00

74 lines
1.3 KiB
Go

package duckduckgo
import (
"fmt"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"io"
"log/slog"
)
type SearchPage interface {
io.Closer
GetResults() ([]Result, error)
LoadMore() error
}
type searchPage struct {
doc extractor.Document
}
func (s searchPage) GetResults() ([]Result, error) {
return extractResults(s.doc)
}
// extractResults parses search results from a DuckDuckGo results page.
func extractResults(doc extractor.Node) ([]Result, error) {
var res []Result
err := doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
var r Result
links := n.Select(`a[href][target="_self"]`)
if len(links) == 0 {
return nil
}
var err error
r.URL, err = links[0].Attr(`href`)
if err != nil {
return fmt.Errorf("failed to get link: %w", err)
}
titles := n.Select("h2")
if len(titles) != 0 {
r.Title, _ = titles[0].Text()
}
descriptions := n.Select("span > span")
if len(descriptions) != 0 {
r.Description, _ = descriptions[0].Text()
}
res = append(res, r)
return nil
})
return res, err
}
func (s searchPage) LoadMore() error {
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
slog.Info("clicking load more", "node", n)
return n.Click()
})
}
func (s searchPage) Close() error {
return s.doc.Close()
}