added archive, megamillions, and powerball site logic
This commit is contained in:
79
cmd/browser/main.go
Normal file
79
cmd/browser/main.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/urfave/cli/v3"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||
)
|
||||
|
||||
func deferClose(cl io.Closer) {
|
||||
_ = cl.Close()
|
||||
}
|
||||
func main() {
|
||||
cmd := &cli.Command{
|
||||
Name: "browser",
|
||||
Flags: browser.Flags,
|
||||
Usage: "<url>",
|
||||
Action: func(ctx context.Context, cli *cli.Command) error {
|
||||
target := cli.Args().First()
|
||||
if target == "" {
|
||||
return fmt.Errorf("no url specified")
|
||||
}
|
||||
|
||||
b, err := browser.FromCommand(ctx, cli)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer deferClose(b)
|
||||
|
||||
// now open the user specified url
|
||||
doc, err := b.Open(ctx, target, extractor.OpenPageOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer deferClose(doc)
|
||||
|
||||
article, err := extractor.Readability(ctx, doc)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
content := ""
|
||||
|
||||
if article.Content != "" {
|
||||
|
||||
if len(article.Content) > 32 {
|
||||
content = article.Content[:32] + "..."
|
||||
} else {
|
||||
content = article.Content
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("Title:", article.Title)
|
||||
fmt.Println("Byline:", article.Byline)
|
||||
fmt.Println("Site:", article.SiteName)
|
||||
fmt.Println("Published:", article.PublishedTime)
|
||||
fmt.Println("Excerpt:", article.Excerpt)
|
||||
fmt.Println("Length:", article.Length)
|
||||
fmt.Println("Lang:", article.Lang)
|
||||
fmt.Println("Content:", content)
|
||||
fmt.Println("TextContent:", article.TextContent)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := cmd.Run(context.Background(), os.Args)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
76
cmd/browser/pkg/browser/flags.go
Normal file
76
cmd/browser/pkg/browser/flags.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package browser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v3"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
)
|
||||
|
||||
type BrowserFlags []cli.Flag
|
||||
|
||||
var Flags = BrowserFlags{
|
||||
&cli.StringFlag{
|
||||
Name: "user-agent",
|
||||
Aliases: []string{"ua"},
|
||||
Usage: "User-Agent to use for requests",
|
||||
DefaultText: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "timeout",
|
||||
Aliases: []string{"t"},
|
||||
Usage: "Timeout for requests",
|
||||
DefaultText: "30s",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "browser",
|
||||
Aliases: []string{"b"},
|
||||
Usage: "Browser to use, one of: chromium, firefox, webkit",
|
||||
DefaultText: "firefox",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "cookies-file",
|
||||
Aliases: []string{"c"},
|
||||
Usage: "cookies.txt file to load cookies from",
|
||||
DefaultText: "",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "visible",
|
||||
Usage: "If set, the browser will be visible, if not set, the browser will be headless",
|
||||
DefaultText: "false",
|
||||
},
|
||||
}
|
||||
|
||||
func FromCommand(_ context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
||||
var opts extractor.PlayWrightBrowserOptions
|
||||
|
||||
if ua := cmd.String("user-agent"); ua != "" {
|
||||
opts.UserAgent = ua
|
||||
}
|
||||
|
||||
if to := cmd.String("timeout"); to != "" {
|
||||
d, err := time.ParseDuration(to)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opts.Timeout = &d
|
||||
}
|
||||
|
||||
if b := cmd.String("browser"); b != "" {
|
||||
opts.Browser = extractor.PlayWrightBrowserSelection(b)
|
||||
}
|
||||
|
||||
if cf := cmd.String("cookies-file"); cf != "" {
|
||||
cookies, err := extractor.LoadCookiesFile(cf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
opts.CookieJar = cookies
|
||||
}
|
||||
|
||||
opts.ShowBrowser = cmd.Bool("visible")
|
||||
|
||||
return extractor.NewPlayWrightBrowser(opts)
|
||||
}
|
Reference in New Issue
Block a user