From a51f8200ea97de3edca054d492c527ed62c15775 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Mon, 9 Dec 2024 13:51:00 -0500 Subject: [PATCH] added screenshots --- browser.go | 15 +++++++ playwright.go | 117 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/browser.go b/browser.go index 30c61b2..73a91bf 100644 --- a/browser.go +++ b/browser.go @@ -5,8 +5,23 @@ import ( "io" ) +type ScreenshotStyle string + +const ( + ScreenshotStyleFullPage ScreenshotStyle = "full" + ScreenshotStyleViewport ScreenshotStyle = "viewport" +) + +type ScreenshotOptions struct { + Style ScreenshotStyle + Width int + Height int +} + type Browser interface { io.Closer Open(ctx context.Context, url string) (Source, error) + Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error) + OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error) } diff --git a/playwright.go b/playwright.go index e67db5e..bf4500b 100644 --- a/playwright.go +++ b/playwright.go @@ -35,7 +35,7 @@ const ( type PlayWrightBrowserOptions struct { UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3" - Browser PlayWrightBrowserSelection // If unset defaults to Chromium + Browser PlayWrightBrowserSelection // If unset defaults to Firefox. Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout // CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the @@ -69,7 +69,7 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) { var thirtySeconds = 30 * time.Second opt := PlayWrightBrowserOptions{ UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3", - Browser: PlayWrightBrowserSelectionChromium, + Browser: PlayWrightBrowserSelectionFirefox, Timeout: &thirtySeconds, } @@ -156,11 +156,24 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) { }, nil } -func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) { - if b.userAgent == "" { - b.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.3" +func (b playWrightBrowser) updateCookies(_ context.Context, page playwright.Page) error { + if b.cookieJar != nil { + cookies, err := page.Context().Cookies(page.URL()) + + for _, cookie := range cookies { + // TODO: add support for deleting cookies from the jar which are deleted in the browser + err = b.cookieJar.Set(playwrightCookieToCookie(cookie)) + + if err != nil { + return fmt.Errorf("error setting cookie in cookie jar: %w", err) + } + } } + return nil +} + +func (b playWrightBrowser) openPage(_ context.Context, target string) (playwright.Page, error) { page, err := b.ctx.NewPage() if err != nil { return nil, err @@ -174,34 +187,37 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) { var ms = float64(b.timeout.Milliseconds()) opts.Timeout = &ms } - resp, err := page.Goto(url, opts) + resp, err := page.Goto(target, opts) if err != nil { return nil, err } - slog.Info("response", "response", resp) + slog.Info("opened page", "url", target, "status", resp.Status(), "request", resp.Request()) if resp.Status() != 200 { - return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status) + slog.Info("invalid status code", "status", resp.Status(), "request", resp.Request()) + return nil, fmt.Errorf("%w: %d", ErrInvalidStatusCode, resp.Status()) } - text, err := resp.Text() + return page, nil +} +func (b playWrightBrowser) Open(ctx context.Context, url string) (Source, error) { + + page, err := b.openPage(ctx, url) + if err != nil { + return nil, err + } + defer page.Close() + + text, err := page.Content() if err != nil { return nil, err } - if b.cookieJar != nil { - cookies, err := page.Context().Cookies(page.URL()) - - for _, cookie := range cookies { - // TODO: add support for deleting cookies from the jar which are deleted in the browser - err = b.cookieJar.Set(playwrightCookieToCookie(cookie)) - - if err != nil { - return nil, fmt.Errorf("error setting cookie in cookie jar: %w", err) - } - } + err = b.updateCookies(ctx, page) + if err != nil { + return nil, err } return source{ @@ -210,6 +226,67 @@ func (b playWrightBrowser) Open(_ context.Context, url string) (Source, error) { }, nil } +func (b playWrightBrowser) getScreenshot(_ context.Context, page playwright.Page, opts ScreenshotOptions) ([]byte, error) { + var pwOpts playwright.PageScreenshotOptions + + if opts.Style == "" { + opts.Style = ScreenshotStyleFullPage + } + + if opts.Style == ScreenshotStyleFullPage { + pwOpts.FullPage = playwright.Bool(true) + } else if opts.Style == ScreenshotStyleViewport { + pwOpts.FullPage = playwright.Bool(false) + + if opts.Width > 0 || opts.Height > 0 { + pwOpts.Clip = &playwright.Rect{ + Width: float64(opts.Width), + Height: float64(opts.Height), + } + } + } + + return page.Screenshot(pwOpts) +} + +func (b playWrightBrowser) Screenshot(ctx context.Context, url string, opts ScreenshotOptions) ([]byte, error) { + page, err := b.openPage(ctx, url) + if err != nil { + return nil, err + } + defer page.Close() + + return b.getScreenshot(ctx, page, opts) +} + +func (b playWrightBrowser) OpenAndScreenshot(ctx context.Context, url string, opts ScreenshotOptions) (Source, []byte, error) { + page, err := b.openPage(ctx, url) + if err != nil { + return nil, nil, err + } + defer page.Close() + + text, err := page.Content() + if err != nil { + return nil, nil, err + } + + screenshot, err := b.getScreenshot(ctx, page, opts) + if err != nil { + return nil, nil, err + } + + err = b.updateCookies(ctx, page) + if err != nil { + return nil, nil, err + } + + return source{ + sourceUrl: url, + content: text, + }, screenshot, nil +} + func (b playWrightBrowser) Close() error { return errors.Join( b.ctx.Close(),