From 36707dec17fe454f46aa6bca2cf7b9cfe459a78e Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Tue, 24 Dec 2024 12:15:48 -0500 Subject: [PATCH] added useragents to go-extractor --- sites/useragents/cmd/useragents.go | 58 ++++++++++++++++++++++++ sites/useragents/useragents.go | 72 ++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 sites/useragents/cmd/useragents.go create mode 100644 sites/useragents/useragents.go diff --git a/sites/useragents/cmd/useragents.go b/sites/useragents/cmd/useragents.go new file mode 100644 index 0000000..cd849f6 --- /dev/null +++ b/sites/useragents/cmd/useragents.go @@ -0,0 +1,58 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/urfave/cli/v3" + + "gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser" + "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/useragents" +) + +type UseragentsFlags []cli.Flag + +var Flags = UseragentsFlags{} + +func (f UseragentsFlags) ToConfig(_ *cli.Command) useragents.Config { + c := useragents.DefaultConfig + return c +} + +func main() { + var flags []cli.Flag + + flags = append(flags, browser.Flags...) + flags = append(flags, Flags...) + + cli := &cli.Command{ + Name: "useragents", + Usage: "Get useragents.me information", + Flags: flags, + + Action: func(ctx context.Context, cli *cli.Command) error { + b, err := browser.FromCommand(ctx, cli) + + if err != nil { + return err + } + + mostCommon, err := Flags.ToConfig(cli).GetMostCommonDesktopUserAgent(ctx, b) + + if err != nil { + return err + } + + fmt.Printf("Most Common User Agent: %+v\n", mostCommon) + return nil + }, + } + + err := cli.Run(context.Background(), os.Args) + + if err != nil { + panic(err) + } + +} diff --git a/sites/useragents/useragents.go b/sites/useragents/useragents.go new file mode 100644 index 0000000..8613f00 --- /dev/null +++ b/sites/useragents/useragents.go @@ -0,0 +1,72 @@ +package useragents + +import ( + "context" + "encoding/json" + "fmt" + "io" + + "gitea.stevedudenhoeffer.com/steve/go-extractor" +) + +type Config struct{} + +var DefaultConfig = Config{} + +func deferClose(cl io.Closer) { + if cl != nil { + _ = cl.Close() + } +} + +func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) { + return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b) +} + +func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) { + doc, err := b.Open(ctx, "https://www.useragents.me/", extractor.OpenPageOptions{}) + + if err != nil { + return "", fmt.Errorf("failed to open useragents.me: %w", err) + } + + defer deferClose(doc) + s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)") + + text := "" + + for _, el := range s { + t, err := el.Content() + if err != nil { + return "", fmt.Errorf("failed to get text: %w", err) + } + text += t + } + data := []map[string]any{} + + fmt.Println("text", text) + + err = json.Unmarshal([]byte(text), &data) + + if err != nil { + return "", err + } + + highestAgent := "" + highestPct := 0.0 + + for _, agent := range data { + pct, ok := agent["pct"].(float64) + + if !ok { + continue + } + + if pct > highestPct { + highestPct = pct + highestAgent = agent["ua"].(string) + } + } + + return highestAgent, nil +}