Introduced the `OpenSearch` method and `SearchPage` interface to streamline search operations and allow for loading more results dynamically. Updated dependencies and modified the DuckDuckGo CLI to utilize these enhancements.
142 lines
2.6 KiB
Go
142 lines
2.6 KiB
Go
package duckduckgo
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
)
|
|
|
|
type SafeSearch int
|
|
|
|
const (
|
|
SafeSearchOn SafeSearch = 1
|
|
SafeSearchModerate SafeSearch = -1
|
|
SafeSearchOff SafeSearch = -2
|
|
)
|
|
|
|
type Config struct {
|
|
// SafeSearch is the safe-search level to use. If empty, SafeSearchOff will be used.
|
|
SafeSearch SafeSearch
|
|
|
|
// Region is the region to use for the search engine.
|
|
// See: https://duckduckgo.com/duckduckgo-help-pages/settings/params/ for more values
|
|
Region string
|
|
}
|
|
|
|
func (c Config) validate() Config {
|
|
if c.SafeSearch == 0 {
|
|
c.SafeSearch = SafeSearchOff
|
|
}
|
|
|
|
return c
|
|
}
|
|
func (c Config) ToSearchURL(query string) *url.URL {
|
|
c = c.validate()
|
|
|
|
res, _ := url.Parse("https://duckduckgo.com/")
|
|
|
|
var vals = res.Query()
|
|
|
|
switch c.SafeSearch {
|
|
case SafeSearchOn:
|
|
vals.Set("kp", "1")
|
|
case SafeSearchModerate:
|
|
vals.Set("kp", "-1")
|
|
case SafeSearchOff:
|
|
vals.Set("kp", "-2")
|
|
}
|
|
|
|
if c.Region != "" {
|
|
vals.Set("kl", c.Region)
|
|
}
|
|
|
|
vals.Set("q", query)
|
|
|
|
res.RawQuery = vals.Encode()
|
|
|
|
return res
|
|
}
|
|
|
|
var DefaultConfig = Config{
|
|
SafeSearch: SafeSearchOff,
|
|
}
|
|
|
|
type Result struct {
|
|
URL string
|
|
Title string
|
|
Description string
|
|
}
|
|
|
|
func deferClose(cl io.Closer) {
|
|
if cl != nil {
|
|
_ = cl.Close()
|
|
}
|
|
}
|
|
|
|
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
|
|
u := c.ToSearchURL(query)
|
|
|
|
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
if err != nil {
|
|
if doc != nil {
|
|
_ = doc.Close()
|
|
}
|
|
return nil, fmt.Errorf("failed to open url: %w", err)
|
|
}
|
|
|
|
return searchPage{doc}, nil
|
|
}
|
|
|
|
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
|
|
u := c.ToSearchURL(query)
|
|
|
|
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
defer deferClose(doc)
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open url: %w", err)
|
|
}
|
|
|
|
var res []Result
|
|
|
|
err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
|
|
var r Result
|
|
|
|
links := n.Select(`a[href][target="_self"]`)
|
|
|
|
if len(links) == 0 {
|
|
return nil
|
|
}
|
|
|
|
r.URL, err = links[0].Attr(`href`)
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get link: %w", err)
|
|
}
|
|
|
|
titles := n.Select("h2")
|
|
|
|
if len(titles) != 0 {
|
|
r.Title, _ = titles[0].Text()
|
|
}
|
|
|
|
descriptions := n.Select("span > span")
|
|
|
|
if len(descriptions) != 0 {
|
|
r.Description, _ = descriptions[0].Text()
|
|
}
|
|
|
|
res = append(res, r)
|
|
|
|
return nil
|
|
})
|
|
|
|
return res, nil
|
|
}
|