added screenshots

This commit is contained in:
Steve Dudenhoeffer 2024-12-09 13:51:00 -05:00
parent cbd6682257
commit a51f8200ea
2 changed files with 112 additions and 20 deletions

View File

@ -5,8 +5,23 @@ import (
"io"
)
type ScreenshotStyle string
const (
ScreenshotStyleFullPage ScreenshotStyle = "full"
ScreenshotStyleViewport ScreenshotStyle = "viewport"
)
type ScreenshotOptions struct {
Style ScreenshotStyle
Width int
Height int
}
type Browser interface {
io.Closer
Open(ctx context.Context, url string) (Source, error)
Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error)
OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error)
}

View File

@ -35,7 +35,7 @@ const (
type PlayWrightBrowserOptions struct {
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
Browser PlayWrightBrowserSelection // If unset defaults to Chromium
Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
@ -69,7 +69,7 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
var thirtySeconds = 30 * time.Second
opt := PlayWrightBrowserOptions{
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3",
Browser: PlayWrightBrowserSelectionChromium,
Browser: PlayWrightBrowserSelectionFirefox,
Timeout: &thirtySeconds,
}
@ -156,11 +156,24 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
}, nil
}
func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
if b.userAgent == "" {
b.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3"
func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page) error {
if b.cookieJar != nil {
cookies, err := page.Context().Cookies(page.URL())
for _, cookie := range cookies {
// TODO: add support for deleting cookies from the jar which are deleted in the browser
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
if err != nil {
return fmt.Errorf("error setting cookie in cookie jar: %w", err)
}
}
}
return nil
}
func (b playWrightBrowser) openPage(_ context.Context, target string) (playwright.Page, error) {
page, err := b.ctx.NewPage()
if err != nil {
return nil, err
@ -174,34 +187,37 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
var ms = float64(b.timeout.Milliseconds())
opts.Timeout = &ms
}
resp, err := page.Goto(url, opts)
resp, err := page.Goto(target, opts)
if err != nil {
return nil, err
}
slog.Info("response", "response", resp)
slog.Info("opened page", "url", target, "status", resp.Status(), "request", resp.Request())
if resp.Status() != 200 {
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status)
slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request())
return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status())
}
text, err := resp.Text()
return page, nil
}
func (b playWrightBrowser) Open(ctx context.Context, url string) (Source, error) {
page, err := b.openPage(ctx, url)
if err != nil {
return nil, err
}
defer page.Close()
text, err := page.Content()
if err != nil {
return nil, err
}
if b.cookieJar != nil {
cookies, err := page.Context().Cookies(page.URL())
for _, cookie := range cookies {
// TODO: add support for deleting cookies from the jar which are deleted in the browser
err = b.cookieJar.Set(playwrightCookieToCookie(cookie))
err = b.updateCookies(ctx, page)
if err != nil {
return nil, fmt.Errorf("error setting cookie in cookie jar: %w", err)
}
}
return nil, err
}
return source{
@ -210,6 +226,67 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) {
}, nil
}
func (b playWrightBrowser) getScreenshot(_ context.Context, page playwright.Page, opts ScreenshotOptions) ([]byte, error) {
var pwOpts playwright.PageScreenshotOptions
if opts.Style == "" {
opts.Style = ScreenshotStyleFullPage
}
if opts.Style == ScreenshotStyleFullPage {
pwOpts.FullPage = playwright.Bool(true)
} else if opts.Style == ScreenshotStyleViewport {
pwOpts.FullPage = playwright.Bool(false)
if opts.Width > 0 || opts.Height > 0 {
pwOpts.Clip = &playwright.Rect{
Width: float64(opts.Width),
Height: float64(opts.Height),
}
}
}
return page.Screenshot(pwOpts)
}
func (b playWrightBrowser) Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error) {
page, err := b.openPage(ctx, url)
if err != nil {
return nil, err
}
defer page.Close()
return b.getScreenshot(ctx, page, opts)
}
func (b playWrightBrowser) OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error) {
page, err := b.openPage(ctx, url)
if err != nil {
return nil, nil, err
}
defer page.Close()
text, err := page.Content()
if err != nil {
return nil, nil, err
}
screenshot, err := b.getScreenshot(ctx, page, opts)
if err != nil {
return nil, nil, err
}
err = b.updateCookies(ctx, page)
if err != nil {
return nil, nil, err
}
return source{
sourceUrl: url,
content: text,
}, screenshot, nil
}
func (b playWrightBrowser) Close() error {
return errors.Join(
b.ctx.Close(),