added screenshots
This commit is contained in:
parent
cbd6682257
commit
a51f8200ea
15
browser.go
15
browser.go
@ -5,8 +5,23 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ScreenshotStyle string
|
||||||
|
|
||||||
|
const (
|
||||||
|
ScreenshotStyleFullPage ScreenshotStyle = "full"
|
||||||
|
ScreenshotStyleViewport ScreenshotStyle = "viewport"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ScreenshotOptions struct {
|
||||||
|
Style ScreenshotStyle
|
||||||
|
Width int
|
||||||
|
Height int
|
||||||
|
}
|
||||||
|
|
||||||
type Browser interface {
|
type Browser interface {
|
||||||
io.Closer
|
io.Closer
|
||||||
|
|
||||||
Open(ctx context.Context, url string) (Source, error)
|
Open(ctx context.Context, url string) (Source, error)
|
||||||
|
Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error)
|
||||||
|
OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error)
|
||||||
}
|
}
|
||||||
|
115
playwright.go
115
playwright.go
@ -35,7 +35,7 @@ const (
|
|||||||
|
|
||||||
type PlayWrightBrowserOptions struct {
|
type PlayWrightBrowserOptions struct {
|
||||||
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
|
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
|
||||||
Browser PlayWrightBrowserSelection // If unset defaults to Chromium
|
Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
|
||||||
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
|
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
|
||||||
|
|
||||||
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
|
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
|
||||||
@ -69,7 +69,7 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
|
|||||||
var thirtySeconds = 30 * time.Second
|
var thirtySeconds = 30 * time.Second
|
||||||
opt := PlayWrightBrowserOptions{
|
opt := PlayWrightBrowserOptions{
|
||||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3",
|
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3",
|
||||||
Browser: PlayWrightBrowserSelectionChromium,
|
Browser: PlayWrightBrowserSelectionFirefox,
|
||||||
Timeout: &thirtySeconds,
|
Timeout: &thirtySeconds,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,11 +156,24 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
|
func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page) error {
|
||||||
if b.userAgent == "" {
|
if b.cookieJar != nil {
|
||||||
b.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
|
cookies, err := page.Context().Cookies(page.URL())
|
||||||
|
|
||||||
|
for _, cookie := range cookies {
|
||||||
|
// TODO: add support for deleting cookies from the jar which are deleted in the browser
|
||||||
|
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error setting cookie in cookie jar: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b playWrightBrowser) openPage(_ context.Context, target string) (playwright.Page, error) {
|
||||||
page, err := b.ctx.NewPage()
|
page, err := b.ctx.NewPage()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -174,34 +187,37 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
|
|||||||
var ms = float64(b.timeout.Milliseconds())
|
var ms = float64(b.timeout.Milliseconds())
|
||||||
opts.Timeout = &ms
|
opts.Timeout = &ms
|
||||||
}
|
}
|
||||||
resp, err := page.Goto(url, opts)
|
resp, err := page.Goto(target, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("response", "response", resp)
|
slog.Info("opened page", "url", target, "status", resp.Status(), "request", resp.Request())
|
||||||
|
|
||||||
if resp.Status() != 200 {
|
if resp.Status() != 200 {
|
||||||
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status)
|
slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request())
|
||||||
|
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
|
||||||
}
|
}
|
||||||
|
|
||||||
text, err := resp.Text()
|
return page, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b playWrightBrowser) Open(ctx context.Context, url string) (Source, error) {
|
||||||
|
|
||||||
|
page, err := b.openPage(ctx, url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
text, err := page.Content()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if b.cookieJar != nil {
|
err = b.updateCookies(ctx, page)
|
||||||
cookies, err := page.Context().Cookies(page.URL())
|
|
||||||
|
|
||||||
for _, cookie := range cookies {
|
|
||||||
// TODO: add support for deleting cookies from the jar which are deleted in the browser
|
|
||||||
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error setting cookie in cookie jar: %w", err)
|
return nil, err
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return source{
|
return source{
|
||||||
@ -210,6 +226,67 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b playWrightBrowser) getScreenshot(_ context.Context, page playwright.Page, opts ScreenshotOptions) ([]byte, error) {
|
||||||
|
var pwOpts playwright.PageScreenshotOptions
|
||||||
|
|
||||||
|
if opts.Style == "" {
|
||||||
|
opts.Style = ScreenshotStyleFullPage
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Style == ScreenshotStyleFullPage {
|
||||||
|
pwOpts.FullPage = playwright.Bool(true)
|
||||||
|
} else if opts.Style == ScreenshotStyleViewport {
|
||||||
|
pwOpts.FullPage = playwright.Bool(false)
|
||||||
|
|
||||||
|
if opts.Width > 0 || opts.Height > 0 {
|
||||||
|
pwOpts.Clip = &playwright.Rect{
|
||||||
|
Width: float64(opts.Width),
|
||||||
|
Height: float64(opts.Height),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return page.Screenshot(pwOpts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b playWrightBrowser) Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error) {
|
||||||
|
page, err := b.openPage(ctx, url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
return b.getScreenshot(ctx, page, opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b playWrightBrowser) OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error) {
|
||||||
|
page, err := b.openPage(ctx, url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
text, err := page.Content()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
screenshot, err := b.getScreenshot(ctx, page, opts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = b.updateCookies(ctx, page)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return source{
|
||||||
|
sourceUrl: url,
|
||||||
|
content: text,
|
||||||
|
}, screenshot, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (b playWrightBrowser) Close() error {
|
func (b playWrightBrowser) Close() error {
|
||||||
return errors.Join(
|
return errors.Join(
|
||||||
b.ctx.Close(),
|
b.ctx.Close(),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user