added useragents to go-extractor

This commit is contained in:
Steve Dudenhoeffer 2024-12-24 12:15:48 -05:00
parent 276b7ad38d
commit 36707dec17
2 changed files with 130 additions and 0 deletions

View File

@ -0,0 +1,58 @@
package main
import (
"context"
"fmt"
"os"
"github.com/urfave/cli/v3"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/useragents"
)
type UseragentsFlags []cli.Flag
var Flags = UseragentsFlags{}
func (f UseragentsFlags) ToConfig(_ *cli.Command) useragents.Config {
c := useragents.DefaultConfig
return c
}
func main() {
var flags []cli.Flag
flags = append(flags, browser.Flags...)
flags = append(flags, Flags...)
cli := &cli.Command{
Name: "useragents",
Usage: "Get useragents.me information",
Flags: flags,
Action: func(ctx context.Context, cli *cli.Command) error {
b, err := browser.FromCommand(ctx, cli)
if err != nil {
return err
}
mostCommon, err := Flags.ToConfig(cli).GetMostCommonDesktopUserAgent(ctx, b)
if err != nil {
return err
}
fmt.Printf("Most Common User Agent: %+v\n", mostCommon)
return nil
},
}
err := cli.Run(context.Background(), os.Args)
if err != nil {
panic(err)
}
}

View File

@ -0,0 +1,72 @@
package useragents
import (
"context"
"encoding/json"
"fmt"
"io"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type Config struct{}
var DefaultConfig = Config{}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b)
}
func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
doc, err := b.Open(ctx, "https://www.useragents.me/", extractor.OpenPageOptions{})
if err != nil {
return "", fmt.Errorf("failed to open useragents.me: %w", err)
}
defer deferClose(doc)
s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)")
text := ""
for _, el := range s {
t, err := el.Content()
if err != nil {
return "", fmt.Errorf("failed to get text: %w", err)
}
text += t
}
data := []map[string]any{}
fmt.Println("text", text)
err = json.Unmarshal([]byte(text), &data)
if err != nil {
return "", err
}
highestAgent := ""
highestPct := 0.0
for _, agent := range data {
pct, ok := agent["pct"].(float64)
if !ok {
continue
}
if pct > highestPct {
highestPct = pct
highestAgent = agent["ua"].(string)
}
}
return highestAgent, nil
}