diff --git a/sites/duckduckgo/cmd/duckduckgo/main.go b/sites/duckduckgo/cmd/duckduckgo/main.go new file mode 100644 index 0000000..5919a47 --- /dev/null +++ b/sites/duckduckgo/cmd/duckduckgo/main.go @@ -0,0 +1,102 @@ +package main + +import ( + "context" + "fmt" + "io" + "os" + "strings" + + "gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser" + + "github.com/urfave/cli/v3" + + "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" +) + +type DuckDuckGoFlags []cli.Flag + +var Flags = DuckDuckGoFlags{ + &cli.StringFlag{ + Name: "region", + Aliases: []string{"r"}, + }, + &cli.StringFlag{ + Name: "safesearch", + Aliases: []string{"s"}, + }, +} + +func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) duckduckgo.Config { + var res = duckduckgo.DefaultConfig + + if r := cmd.String("region"); r != "" { + res.Region = r + } + + if s := cmd.String("safesearch"); s != "" { + switch s { + case "on": + res.SafeSearch = duckduckgo.SafeSearchOn + case "moderate": + res.SafeSearch = duckduckgo.SafeSearchModerate + case "off": + res.SafeSearch = duckduckgo.SafeSearchOff + default: + panic("invalid safe search value") + } + } + + return res +} + +func deferClose(cl io.Closer) { + if cl != nil { + _ = cl.Close() + } +} + +func main() { + var flags []cli.Flag + + flags = append(flags, Flags...) + + cli := &cli.Command{ + Name: "duckduckgo", + Usage: "Search DuckDuckGo", + Flags: flags, + Action: func(ctx context.Context, command *cli.Command) error { + c := Flags.ToConfig(command) + defer deferClose(nil) + + query := strings.TrimSpace(strings.Join(command.Args().Slice(), " ")) + + if query == "" { + return cli.Exit("usage: duckduckgo ", 1) + } + + b, err := browser.FromCommand(ctx, command) + defer deferClose(b) + + if err != nil { + return fmt.Errorf("failed to create browser: %w", err) + } + + res, err := c.Search(ctx, b, query) + + if err != nil { + return fmt.Errorf("failed to search: %w", err) + } + + fmt.Println(res) + + return nil + }, + } + + err := cli.Run(context.Background(), os.Args) + + if err != nil { + panic(err) + } +} diff --git a/sites/duckduckgo/duckduckgo.go b/sites/duckduckgo/duckduckgo.go new file mode 100644 index 0000000..48a11c8 --- /dev/null +++ b/sites/duckduckgo/duckduckgo.go @@ -0,0 +1,126 @@ +package duckduckgo + +import ( + "context" + "fmt" + "io" + "log/slog" + "net/url" + + "gitea.stevedudenhoeffer.com/steve/go-extractor" +) + +type SafeSearch int + +const ( + SafeSearchOn SafeSearch = 1 + SafeSearchModerate SafeSearch = -1 + SafeSearchOff SafeSearch = -2 +) + +type Config struct { + // SafeSearch is the safe-search level to use. If empty, SafeSearchOff will be used. + SafeSearch SafeSearch + + // Region is the region to use for the search engine. + // See: https://duckduckgo.com/duckduckgo-help-pages/settings/params/ for more values + Region string +} + +func (c Config) validate() Config { + if c.SafeSearch == 0 { + c.SafeSearch = SafeSearchOff + } + + return c +} +func (c Config) ToSearchURL(query string) *url.URL { + c = c.validate() + + res, _ := url.Parse("https://duckduckgo.com/") + + var vals = res.Query() + + switch c.SafeSearch { + case SafeSearchOn: + vals.Set("kp", "1") + case SafeSearchModerate: + vals.Set("kp", "-1") + case SafeSearchOff: + vals.Set("kp", "-2") + } + + if c.Region != "" { + vals.Set("kl", c.Region) + } + + vals.Set("q", query) + + res.RawQuery = vals.Encode() + + return res +} + +var DefaultConfig = Config{ + SafeSearch: SafeSearchOff, +} + +type Result struct { + URL string + Title string + Description string +} + +func deferClose(cl io.Closer) { + if cl != nil { + _ = cl.Close() + } +} + +func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) { + u := c.ToSearchURL(query) + + slog.Info("searching", "url", u, "query", query, "config", c, "browser", b) + doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{}) + defer deferClose(doc) + + if err != nil { + return nil, fmt.Errorf("failed to open url: %w", err) + } + + var res []Result + + err = doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error { + var r Result + + links := n.Select(`a[href][target="_self"]`) + + if len(links) == 0 { + return nil + } + + r.URL, err = links[0].Attr(`href`) + + if err != nil { + return fmt.Errorf("failed to get link: %w", err) + } + + titles := n.Select("div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)") + + if len(titles) != 0 { + r.Title, _ = titles[0].Text() + } + + descriptions := n.Select("span > span") + + if len(descriptions) != 0 { + r.Description, _ = descriptions[0].Text() + } + + res = append(res, r) + + return nil + }) + + return res, nil +}