go-extractor/sites/duckduckgo/duckduckgo.go

127 lines
2.3 KiB
Go
Raw Normal View History

package duckduckgo
import (
"context"
"fmt"
"io"
"log/slog"
"net/url"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type SafeSearch int
const (
SafeSearchOn SafeSearch = 1
SafeSearchModerate SafeSearch = -1
SafeSearchOff SafeSearch = -2
)
type Config struct {
// SafeSearch is the safe-search level to use. If empty, SafeSearchOff will be used.
SafeSearch SafeSearch
// Region is the region to use for the search engine.
// See: https://duckduckgo.com/duckduckgo-help-pages/settings/params/ for more values
Region string
}
func (c Config) validate() Config {
if c.SafeSearch == 0 {
c.SafeSearch = SafeSearchOff
}
return c
}
func (c Config) ToSearchURL(query string) *url.URL {
c = c.validate()
res, _ := url.Parse("https://duckduckgo.com/")
var vals = res.Query()
switch c.SafeSearch {
case SafeSearchOn:
vals.Set("kp", "1")
case SafeSearchModerate:
vals.Set("kp", "-1")
case SafeSearchOff:
vals.Set("kp", "-2")
}
if c.Region != "" {
vals.Set("kl", c.Region)
}
vals.Set("q", query)
res.RawQuery = vals.Encode()
return res
}
var DefaultConfig = Config{
SafeSearch: SafeSearchOff,
}
type Result struct {
URL string
Title string
Description string
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
u := c.ToSearchURL(query)
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
defer deferClose(doc)
if err != nil {
return nil, fmt.Errorf("failed to open url: %w", err)
}
var res []Result
err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
var r Result
links := n.Select(`a[href][target="_self"]`)
if len(links) == 0 {
return nil
}
r.URL, err = links[0].Attr(`href`)
if err != nil {
return fmt.Errorf("failed to get link: %w", err)
}
titles := n.Select("div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)")
if len(titles) != 0 {
r.Title, _ = titles[0].Text()
}
descriptions := n.Select("span > span")
if len(descriptions) != 0 {
r.Description, _ = descriptions[0].Text()
}
res = append(res, r)
return nil
})
return res, nil
}