added archive, megamillions, and powerball site logic
This commit is contained in:
@@ -25,6 +25,7 @@ type PlayWrightBrowserSelection string
|
||||
|
||||
var (
|
||||
ErrInvalidBrowserSelection = errors.New("invalid browser selection")
|
||||
ErrPageNotFound = errors.New("page not found")
|
||||
ErrInvalidStatusCode = errors.New("invalid status code")
|
||||
)
|
||||
|
||||
@@ -35,20 +36,22 @@ const (
|
||||
)
|
||||
|
||||
type PlayWrightBrowserOptions struct {
|
||||
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
|
||||
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
|
||||
Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
|
||||
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
|
||||
|
||||
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
|
||||
// browser into the cookie jar for each request.
|
||||
CookieJar
|
||||
|
||||
ShowBrowser bool // If false, browser will be headless
|
||||
}
|
||||
|
||||
func cookieToPlaywrightOptionalCookie(cookie Cookie) playwright.OptionalCookie {
|
||||
return playwright.OptionalCookie{
|
||||
Name: cookie.Name,
|
||||
Value: cookie.Value,
|
||||
Domain: playwright.String(cookie.Domain),
|
||||
Domain: playwright.String(cookie.Host),
|
||||
Path: playwright.String(cookie.Path),
|
||||
Expires: playwright.Float(float64(cookie.Expires.Unix())),
|
||||
HttpOnly: playwright.Bool(cookie.HttpOnly),
|
||||
@@ -59,7 +62,7 @@ func playwrightCookieToCookie(cookie playwright.Cookie) Cookie {
|
||||
return Cookie{
|
||||
Name: cookie.Name,
|
||||
Value: cookie.Value,
|
||||
Domain: cookie.Domain,
|
||||
Host: cookie.Domain,
|
||||
Path: cookie.Path,
|
||||
Expires: time.Unix(int64(cookie.Expires), 0),
|
||||
HttpOnly: cookie.HttpOnly,
|
||||
@@ -69,7 +72,7 @@ func playwrightCookieToCookie(cookie playwright.Cookie) Cookie {
|
||||
func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
|
||||
var thirtySeconds = 30 * time.Second
|
||||
opt := PlayWrightBrowserOptions{
|
||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3",
|
||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
|
||||
Browser: PlayWrightBrowserSelectionFirefox,
|
||||
Timeout: &thirtySeconds,
|
||||
}
|
||||
@@ -87,16 +90,23 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
|
||||
if o.CookieJar != nil {
|
||||
opt.CookieJar = o.CookieJar
|
||||
}
|
||||
}
|
||||
|
||||
err := playwright.Install()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
opt.ShowBrowser = o.ShowBrowser
|
||||
}
|
||||
|
||||
pw, err := playwright.Run()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
err = playwright.Install()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pw, err = playwright.Run()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var bt playwright.BrowserType
|
||||
@@ -116,7 +126,7 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
|
||||
}
|
||||
|
||||
browser, err := bt.Launch(playwright.BrowserTypeLaunchOptions{
|
||||
Headless: playwright.Bool(true),
|
||||
Headless: playwright.Bool(!opt.ShowBrowser),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -175,21 +185,26 @@ func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b playWrightBrowser) openPage(_ context.Context, target string) (playwright.Page, error) {
|
||||
func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenPageOptions) (playwright.Page, error) {
|
||||
page, err := b.ctx.NewPage()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
opts := playwright.PageGotoOptions{
|
||||
pwOpts := playwright.PageGotoOptions{
|
||||
WaitUntil: playwright.WaitUntilStateLoad,
|
||||
}
|
||||
|
||||
if b.timeout > 0 {
|
||||
var ms = float64(b.timeout.Milliseconds())
|
||||
opts.Timeout = &ms
|
||||
pwOpts.Timeout = &ms
|
||||
}
|
||||
resp, err := page.Goto(target, opts)
|
||||
|
||||
if opts.Referer != "" {
|
||||
pwOpts.Referer = playwright.String(opts.Referer)
|
||||
}
|
||||
|
||||
resp, err := page.Goto(target, pwOpts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -197,6 +212,14 @@ func (b playWrightBrowser) openPage(_ context.Context, target string) (playwrigh
|
||||
slog.Info("opened document", "url", target, "status", resp.Status(), "request", resp.Request())
|
||||
|
||||
if resp.Status() != 200 {
|
||||
time.Sleep(999 * time.Hour * 24)
|
||||
time.Sleep(25 * time.Second)
|
||||
|
||||
_ = page.Close()
|
||||
|
||||
if resp.Status() == 404 {
|
||||
return nil, ErrPageNotFound
|
||||
}
|
||||
slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request())
|
||||
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
|
||||
}
|
||||
@@ -204,13 +227,12 @@ func (b playWrightBrowser) openPage(_ context.Context, target string) (playwrigh
|
||||
return page, nil
|
||||
}
|
||||
|
||||
func (b playWrightBrowser) Open(ctx context.Context, url string) (Document, error) {
|
||||
func (b playWrightBrowser) Open(ctx context.Context, url string, opts OpenPageOptions) (Document, error) {
|
||||
|
||||
page, err := b.openPage(ctx, url)
|
||||
page, err := b.openPage(ctx, url, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer page.Close()
|
||||
|
||||
err = b.updateCookies(ctx, page)
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user