refactor: restructure API, deduplicate code, expand test coverage
- Extract shared DeferClose helper, removing 14 duplicate copies - Rename PlayWright-prefixed types to cleaner names (BrowserOptions, BrowserSelection, NewBrowser, etc.) - Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure) - Extract shared initBrowser/mergeOptions into browser_init.go, deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser - Remove unused locator field from document struct - Add tests for all previously untested packages (archive, aislegopher, wegmans, useragents, powerball) and expand existing test suites - Add MIGRATION.md documenting all breaking API changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
37
MIGRATION.md
Normal file
37
MIGRATION.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# Migration Guide
|
||||||
|
|
||||||
|
This guide documents all breaking API changes from the restructuring of go-extractor.
|
||||||
|
|
||||||
|
All core interfaces (`Browser`, `Document`, `Node`, `CookieJar`, `InteractiveBrowser`) are **unchanged**.
|
||||||
|
|
||||||
|
## Type and Function Renames
|
||||||
|
|
||||||
|
```
|
||||||
|
extractor.NewPlayWrightBrowser -> extractor.NewBrowser
|
||||||
|
extractor.PlayWrightBrowserOptions -> extractor.BrowserOptions
|
||||||
|
extractor.PlayWrightBrowserSelection -> extractor.BrowserSelection
|
||||||
|
|
||||||
|
extractor.PlayWrightBrowserSelectionChromium -> extractor.BrowserChromium
|
||||||
|
extractor.PlayWrightBrowserSelectionFirefox -> extractor.BrowserFirefox
|
||||||
|
extractor.PlayWrightBrowserSelectionWebKit -> extractor.BrowserWebKit
|
||||||
|
```
|
||||||
|
|
||||||
|
## Field Renames (inside BrowserOptions)
|
||||||
|
|
||||||
|
```
|
||||||
|
.PlayWrightServerAddress -> .ServerAddress
|
||||||
|
.DontLaunchOnConnectFailure -> .RequireServer
|
||||||
|
```
|
||||||
|
|
||||||
|
The `RequireServer` field is semantically identical to `DontLaunchOnConnectFailure`:
|
||||||
|
|
||||||
|
- Old: `DontLaunchOnConnectFailure: true` meant "fail if can't connect to server"
|
||||||
|
- New: `RequireServer: true` means the same thing
|
||||||
|
|
||||||
|
## New Helper
|
||||||
|
|
||||||
|
```go
|
||||||
|
extractor.DeferClose(closer)
|
||||||
|
```
|
||||||
|
|
||||||
|
Nil-safe defer close helper. Replaces the `deferClose` functions that were previously copy-pasted across packages.
|
||||||
29
article_test.go
Normal file
29
article_test.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestArticle_ZeroValue(t *testing.T) {
|
||||||
|
var a Article
|
||||||
|
if a.Title != "" || a.Content != "" || a.Length != 0 {
|
||||||
|
t.Error("zero-value Article should have empty fields")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestArticle_FieldAssignment(t *testing.T) {
|
||||||
|
a := Article{
|
||||||
|
Title: "Test Title",
|
||||||
|
Content: "<p>hello</p>",
|
||||||
|
TextContent: "hello",
|
||||||
|
Length: 5,
|
||||||
|
Excerpt: "hello",
|
||||||
|
Byline: "Author",
|
||||||
|
SiteName: "Example",
|
||||||
|
Lang: "en",
|
||||||
|
}
|
||||||
|
if a.Title != "Test Title" {
|
||||||
|
t.Errorf("Title = %q, want %q", a.Title, "Test Title")
|
||||||
|
}
|
||||||
|
if a.Length != 5 {
|
||||||
|
t.Errorf("Length = %d, want 5", a.Length)
|
||||||
|
}
|
||||||
|
}
|
||||||
160
browser_init.go
Normal file
160
browser_init.go
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/playwright-community/playwright-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// browserInitResult holds the result of shared browser initialization.
|
||||||
|
type browserInitResult struct {
|
||||||
|
pw *playwright.Playwright
|
||||||
|
browser playwright.Browser
|
||||||
|
bctx playwright.BrowserContext
|
||||||
|
opt BrowserOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
// initBrowser performs the shared browser initialization steps:
|
||||||
|
// start Playwright, select browser type, connect or launch, create context, load cookies.
|
||||||
|
func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
|
||||||
|
pw, err := playwright.Run()
|
||||||
|
if err != nil {
|
||||||
|
err = playwright.Install()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to install playwright: %w", err)
|
||||||
|
}
|
||||||
|
pw, err = playwright.Run()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to start playwright: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var bt playwright.BrowserType
|
||||||
|
switch opt.Browser {
|
||||||
|
case BrowserChromium:
|
||||||
|
bt = pw.Chromium
|
||||||
|
if opt.ServerAddress == "" {
|
||||||
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
|
||||||
|
}
|
||||||
|
case BrowserFirefox:
|
||||||
|
bt = pw.Firefox
|
||||||
|
if opt.ServerAddress == "" {
|
||||||
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
|
||||||
|
}
|
||||||
|
case BrowserWebKit:
|
||||||
|
bt = pw.WebKit
|
||||||
|
if opt.ServerAddress == "" {
|
||||||
|
opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil, ErrInvalidBrowserSelection
|
||||||
|
}
|
||||||
|
|
||||||
|
var browser playwright.Browser
|
||||||
|
launch := true
|
||||||
|
|
||||||
|
if opt.ServerAddress != "" && !opt.UseLocalOnly {
|
||||||
|
launch = false
|
||||||
|
slog.Info("connecting to playwright server", "address", opt.ServerAddress)
|
||||||
|
var timeout float64 = 30000
|
||||||
|
browser, err = bt.Connect(opt.ServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
||||||
|
if err != nil {
|
||||||
|
if opt.RequireServer {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
|
||||||
|
launch = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if launch {
|
||||||
|
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
||||||
|
Headless: playwright.Bool(!opt.ShowBrowser),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to launch browser: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var viewport *playwright.Size
|
||||||
|
if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
|
||||||
|
viewport = &playwright.Size{
|
||||||
|
Width: opt.Dimensions.Width,
|
||||||
|
Height: opt.Dimensions.Height,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var scheme *playwright.ColorScheme
|
||||||
|
if opt.DarkMode {
|
||||||
|
scheme = playwright.ColorSchemeDark
|
||||||
|
} else {
|
||||||
|
scheme = playwright.ColorSchemeNoPreference
|
||||||
|
}
|
||||||
|
|
||||||
|
bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
||||||
|
UserAgent: playwright.String(opt.UserAgent),
|
||||||
|
Viewport: viewport,
|
||||||
|
ColorScheme: scheme,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create browser context: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CookieJar != nil {
|
||||||
|
cookies, err := opt.CookieJar.GetAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)
|
||||||
|
}
|
||||||
|
pwCookies := make([]playwright.OptionalCookie, len(cookies))
|
||||||
|
for i, c := range cookies {
|
||||||
|
pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
|
||||||
|
}
|
||||||
|
if err := bctx.AddCookies(pwCookies); err != nil {
|
||||||
|
return nil, fmt.Errorf("error adding cookies to browser: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &browserInitResult{
|
||||||
|
pw: pw,
|
||||||
|
browser: browser,
|
||||||
|
bctx: bctx,
|
||||||
|
opt: opt,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergeOptions merges variadic BrowserOptions into a base set of defaults.
|
||||||
|
func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
|
||||||
|
for _, o := range opts {
|
||||||
|
if o.UserAgent != "" {
|
||||||
|
base.UserAgent = o.UserAgent
|
||||||
|
}
|
||||||
|
if o.Browser != "" {
|
||||||
|
base.Browser = o.Browser
|
||||||
|
}
|
||||||
|
if o.Timeout != nil {
|
||||||
|
base.Timeout = o.Timeout
|
||||||
|
}
|
||||||
|
if o.CookieJar != nil {
|
||||||
|
base.CookieJar = o.CookieJar
|
||||||
|
}
|
||||||
|
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
||||||
|
base.Dimensions = o.Dimensions
|
||||||
|
}
|
||||||
|
if o.DarkMode {
|
||||||
|
base.DarkMode = true
|
||||||
|
}
|
||||||
|
if o.ServerAddress != "" {
|
||||||
|
base.ServerAddress = o.ServerAddress
|
||||||
|
}
|
||||||
|
if o.RequireServer {
|
||||||
|
base.RequireServer = true
|
||||||
|
}
|
||||||
|
if o.UseLocalOnly {
|
||||||
|
base.UseLocalOnly = true
|
||||||
|
}
|
||||||
|
base.ShowBrowser = o.ShowBrowser
|
||||||
|
}
|
||||||
|
return base
|
||||||
|
}
|
||||||
11
close.go
Normal file
11
close.go
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import "io"
|
||||||
|
|
||||||
|
// DeferClose safely closes an io.Closer, ignoring the error.
|
||||||
|
// Intended for use in defer statements.
|
||||||
|
func DeferClose(cl io.Closer) {
|
||||||
|
if cl != nil {
|
||||||
|
_ = cl.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
38
close_test.go
Normal file
38
close_test.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mockCloser struct {
|
||||||
|
closed bool
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockCloser) Close() error {
|
||||||
|
m.closed = true
|
||||||
|
return m.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeferClose_Nil(t *testing.T) {
|
||||||
|
// Should not panic on nil.
|
||||||
|
DeferClose(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeferClose_Valid(t *testing.T) {
|
||||||
|
m := &mockCloser{}
|
||||||
|
DeferClose(m)
|
||||||
|
if !m.closed {
|
||||||
|
t.Error("DeferClose did not call Close()")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeferClose_ErrorIgnored(t *testing.T) {
|
||||||
|
m := &mockCloser{err: errors.New("close error")}
|
||||||
|
// Should not panic even when Close returns an error.
|
||||||
|
DeferClose(m)
|
||||||
|
if !m.closed {
|
||||||
|
t.Error("DeferClose did not call Close()")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
@@ -12,9 +11,6 @@ import (
|
|||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||||
)
|
)
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
func main() {
|
func main() {
|
||||||
cmd := &cli.Command{
|
cmd := &cli.Command{
|
||||||
Name: "browser",
|
Name: "browser",
|
||||||
@@ -31,7 +27,7 @@ func main() {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(b)
|
defer extractor.DeferClose(b)
|
||||||
|
|
||||||
// now open the user specified url
|
// now open the user specified url
|
||||||
doc, err := b.Open(ctx, target, extractor.OpenPageOptions{})
|
doc, err := b.Open(ctx, target, extractor.OpenPageOptions{})
|
||||||
@@ -39,7 +35,7 @@ func main() {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
article, err := extractor.Readability(ctx, doc)
|
article, err := extractor.Readability(ctx, doc)
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ var Flags = BrowserFlags{
|
|||||||
}
|
}
|
||||||
|
|
||||||
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
|
||||||
var opts extractor.PlayWrightBrowserOptions
|
var opts extractor.BrowserOptions
|
||||||
|
|
||||||
if ua := cmd.String("user-agent"); ua != "" {
|
if ua := cmd.String("user-agent"); ua != "" {
|
||||||
opts.UserAgent = ua
|
opts.UserAgent = ua
|
||||||
@@ -59,7 +59,7 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
if b := cmd.String("browser"); b != "" {
|
if b := cmd.String("browser"); b != "" {
|
||||||
opts.Browser = extractor.PlayWrightBrowserSelection(b)
|
opts.Browser = extractor.BrowserSelection(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cf := cmd.String("cookies-file"); cf != "" {
|
if cf := cmd.String("cookies-file"); cf != "" {
|
||||||
@@ -72,5 +72,5 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
|
|||||||
|
|
||||||
opts.ShowBrowser = cmd.Bool("visible")
|
opts.ShowBrowser = cmd.Bool("visible")
|
||||||
|
|
||||||
return extractor.NewPlayWrightBrowser(ctx, opts)
|
return extractor.NewBrowser(ctx, opts)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ type document struct {
|
|||||||
pw *playwright.Playwright
|
pw *playwright.Playwright
|
||||||
browser playwright.Browser
|
browser playwright.Browser
|
||||||
page playwright.Page
|
page playwright.Page
|
||||||
locator playwright.Locator
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
|
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
|
||||||
|
|||||||
136
interactive.go
136
interactive.go
@@ -56,48 +56,17 @@ type interactiveBrowser struct {
|
|||||||
|
|
||||||
// NewInteractiveBrowser creates a headless browser with a page ready for interactive control.
|
// NewInteractiveBrowser creates a headless browser with a page ready for interactive control.
|
||||||
// The context is only used for cancellation during setup.
|
// The context is only used for cancellation during setup.
|
||||||
func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (InteractiveBrowser, error) {
|
func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (InteractiveBrowser, error) {
|
||||||
var thirtySeconds = 30 * time.Second
|
var thirtySeconds = 30 * time.Second
|
||||||
opt := PlayWrightBrowserOptions{
|
opt := mergeOptions(BrowserOptions{
|
||||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
||||||
Browser: PlayWrightBrowserSelectionChromium,
|
Browser: BrowserChromium,
|
||||||
Timeout: &thirtySeconds,
|
Timeout: &thirtySeconds,
|
||||||
Dimensions: Size{
|
Dimensions: Size{
|
||||||
Width: 1280,
|
Width: 1280,
|
||||||
Height: 720,
|
Height: 720,
|
||||||
},
|
},
|
||||||
}
|
}, opts)
|
||||||
|
|
||||||
for _, o := range opts {
|
|
||||||
if o.UserAgent != "" {
|
|
||||||
opt.UserAgent = o.UserAgent
|
|
||||||
}
|
|
||||||
if o.Browser != "" {
|
|
||||||
opt.Browser = o.Browser
|
|
||||||
}
|
|
||||||
if o.Timeout != nil {
|
|
||||||
opt.Timeout = o.Timeout
|
|
||||||
}
|
|
||||||
if o.CookieJar != nil {
|
|
||||||
opt.CookieJar = o.CookieJar
|
|
||||||
}
|
|
||||||
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
|
||||||
opt.Dimensions = o.Dimensions
|
|
||||||
}
|
|
||||||
if o.DarkMode {
|
|
||||||
opt.DarkMode = true
|
|
||||||
}
|
|
||||||
if o.PlayWrightServerAddress != "" {
|
|
||||||
opt.PlayWrightServerAddress = o.PlayWrightServerAddress
|
|
||||||
}
|
|
||||||
if o.DontLaunchOnConnectFailure {
|
|
||||||
opt.DontLaunchOnConnectFailure = true
|
|
||||||
}
|
|
||||||
if o.UseLocalOnly {
|
|
||||||
opt.UseLocalOnly = true
|
|
||||||
}
|
|
||||||
opt.ShowBrowser = o.ShowBrowser
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -111,98 +80,13 @@ func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions
|
|||||||
ch := make(chan result, 1)
|
ch := make(chan result, 1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
pw, err := playwright.Run()
|
res, err := initBrowser(opt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = playwright.Install()
|
ch <- result{nil, err}
|
||||||
if err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("failed to install playwright: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
pw, err = playwright.Run()
|
|
||||||
if err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("failed to start playwright: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var bt playwright.BrowserType
|
|
||||||
switch opt.Browser {
|
|
||||||
case PlayWrightBrowserSelectionChromium:
|
|
||||||
bt = pw.Chromium
|
|
||||||
case PlayWrightBrowserSelectionFirefox:
|
|
||||||
bt = pw.Firefox
|
|
||||||
case PlayWrightBrowserSelectionWebKit:
|
|
||||||
bt = pw.WebKit
|
|
||||||
default:
|
|
||||||
ch <- result{nil, ErrInvalidBrowserSelection}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var browser playwright.Browser
|
page, err := res.bctx.NewPage()
|
||||||
var launch = true
|
|
||||||
|
|
||||||
if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
|
|
||||||
launch = false
|
|
||||||
var timeout float64 = 30000
|
|
||||||
browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
|
||||||
if err != nil {
|
|
||||||
if opt.DontLaunchOnConnectFailure {
|
|
||||||
ch <- result{nil, err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
launch = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if launch {
|
|
||||||
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
|
||||||
Headless: playwright.Bool(!opt.ShowBrowser),
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("failed to launch browser: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
viewport := &playwright.Size{
|
|
||||||
Width: opt.Dimensions.Width,
|
|
||||||
Height: opt.Dimensions.Height,
|
|
||||||
}
|
|
||||||
|
|
||||||
var scheme *playwright.ColorScheme
|
|
||||||
if opt.DarkMode {
|
|
||||||
scheme = playwright.ColorSchemeDark
|
|
||||||
} else {
|
|
||||||
scheme = playwright.ColorSchemeNoPreference
|
|
||||||
}
|
|
||||||
|
|
||||||
bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
||||||
UserAgent: playwright.String(opt.UserAgent),
|
|
||||||
Viewport: viewport,
|
|
||||||
ColorScheme: scheme,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("failed to create browser context: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if opt.CookieJar != nil {
|
|
||||||
cookies, err := opt.CookieJar.GetAll()
|
|
||||||
if err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
pwCookies := make([]playwright.OptionalCookie, len(cookies))
|
|
||||||
for i, c := range cookies {
|
|
||||||
pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
|
|
||||||
}
|
|
||||||
if err := bctx.AddCookies(pwCookies); err != nil {
|
|
||||||
ch <- result{nil, fmt.Errorf("error adding cookies: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
page, err := bctx.NewPage()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
|
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
|
||||||
return
|
return
|
||||||
@@ -210,9 +94,9 @@ func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions
|
|||||||
|
|
||||||
ch <- result{
|
ch <- result{
|
||||||
ib: &interactiveBrowser{
|
ib: &interactiveBrowser{
|
||||||
pw: pw,
|
pw: res.pw,
|
||||||
browser: browser,
|
browser: res.browser,
|
||||||
ctx: bctx,
|
ctx: res.bctx,
|
||||||
page: page,
|
page: page,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
16
mock_test.go
Normal file
16
mock_test.go
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// mockDocument implements the Document interface for testing without Playwright.
|
||||||
|
type mockDocument struct {
|
||||||
|
mockNode
|
||||||
|
url string
|
||||||
|
content string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m mockDocument) URL() string { return m.url }
|
||||||
|
func (m mockDocument) Refresh() error { return nil }
|
||||||
|
func (m mockDocument) Content() (string, error) { return m.content, nil }
|
||||||
|
func (m mockDocument) Close() error { return nil }
|
||||||
|
func (m mockDocument) WaitForNetworkIdle(_ *time.Duration) error { return nil }
|
||||||
23
node_test.go
Normal file
23
node_test.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestEscapeJavaScript(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"hello", "hello"},
|
||||||
|
{"it's", "it\\'s"},
|
||||||
|
{`back\slash`, `back\\slash`},
|
||||||
|
{`both\'`, `both\\\'`},
|
||||||
|
{"", ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := escapeJavaScript(tt.input)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("escapeJavaScript(%q) = %q, want %q", tt.input, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
214
playwright.go
214
playwright.go
@@ -4,9 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
@@ -24,7 +22,7 @@ type playWrightBrowser struct {
|
|||||||
|
|
||||||
var _ Browser = playWrightBrowser{}
|
var _ Browser = playWrightBrowser{}
|
||||||
|
|
||||||
type PlayWrightBrowserSelection string
|
type BrowserSelection string
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrInvalidBrowserSelection = errors.New("invalid browser selection")
|
ErrInvalidBrowserSelection = errors.New("invalid browser selection")
|
||||||
@@ -33,18 +31,18 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
PlayWrightBrowserSelectionChromium PlayWrightBrowserSelection = "chromium"
|
BrowserChromium BrowserSelection = "chromium"
|
||||||
PlayWrightBrowserSelectionFirefox PlayWrightBrowserSelection = "firefox"
|
BrowserFirefox BrowserSelection = "firefox"
|
||||||
PlayWrightBrowserSelectionWebKit PlayWrightBrowserSelection = "webkit"
|
BrowserWebKit BrowserSelection = "webkit"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Size struct {
|
type Size struct {
|
||||||
Width int
|
Width int
|
||||||
Height int
|
Height int
|
||||||
}
|
}
|
||||||
type PlayWrightBrowserOptions struct {
|
type BrowserOptions struct {
|
||||||
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
|
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
|
||||||
Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
|
Browser BrowserSelection // If unset defaults to Firefox.
|
||||||
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
|
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
|
||||||
|
|
||||||
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
|
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
|
||||||
@@ -56,15 +54,15 @@ type PlayWrightBrowserOptions struct {
|
|||||||
Dimensions Size
|
Dimensions Size
|
||||||
DarkMode bool
|
DarkMode bool
|
||||||
|
|
||||||
// PlayWrightServerAddress is the address of a PlayWright server to connect to.
|
// ServerAddress is the address of a Playwright server to connect to.
|
||||||
// Defaults to the value of the environment variable PLAYWRIGHT_SERVER_ADDRESS.
|
// Defaults to the value of the environment variable PLAYWRIGHT_SERVER_ADDRESS.
|
||||||
PlayWrightServerAddress string
|
ServerAddress string
|
||||||
|
|
||||||
// DontLaunchOnConnectFailure will, if set, not launch the browser if the connection to the PlayWright server,
|
// RequireServer will, if set, return an error if the connection to the
|
||||||
// and return an error if the connection fails.
|
// Playwright server fails instead of falling back to a local browser launch.
|
||||||
DontLaunchOnConnectFailure bool
|
RequireServer bool
|
||||||
|
|
||||||
// UseLocalOnly will, if set, not connect to the PlayWright server, and instead use the local PlayWright server.
|
// UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
|
||||||
UseLocalOnly bool
|
UseLocalOnly bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -90,48 +88,14 @@ func playwrightCookieToCookie(cookie playwright.Cookie) Cookie {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPlayWrightBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (Browser, error) {
|
func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
|
||||||
var thirtySeconds = 30 * time.Second
|
var thirtySeconds = 30 * time.Second
|
||||||
opt := PlayWrightBrowserOptions{
|
opt := mergeOptions(BrowserOptions{
|
||||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
||||||
Browser: PlayWrightBrowserSelectionFirefox,
|
Browser: BrowserFirefox,
|
||||||
Timeout: &thirtySeconds,
|
Timeout: &thirtySeconds,
|
||||||
DarkMode: false,
|
}, opts)
|
||||||
PlayWrightServerAddress: "",
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, o := range opts {
|
|
||||||
if o.UserAgent != "" {
|
|
||||||
opt.UserAgent = o.UserAgent
|
|
||||||
}
|
|
||||||
if o.Browser != "" {
|
|
||||||
opt.Browser = o.Browser
|
|
||||||
}
|
|
||||||
if o.Timeout != nil {
|
|
||||||
opt.Timeout = o.Timeout
|
|
||||||
}
|
|
||||||
if o.CookieJar != nil {
|
|
||||||
opt.CookieJar = o.CookieJar
|
|
||||||
}
|
|
||||||
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
|
|
||||||
opt.Dimensions = o.Dimensions
|
|
||||||
}
|
|
||||||
if o.DarkMode {
|
|
||||||
opt.DarkMode = true
|
|
||||||
}
|
|
||||||
if o.PlayWrightServerAddress != "" {
|
|
||||||
opt.PlayWrightServerAddress = o.PlayWrightServerAddress
|
|
||||||
}
|
|
||||||
if o.DontLaunchOnConnectFailure {
|
|
||||||
opt.DontLaunchOnConnectFailure = true
|
|
||||||
}
|
|
||||||
if o.UseLocalOnly {
|
|
||||||
opt.UseLocalOnly = true
|
|
||||||
}
|
|
||||||
opt.ShowBrowser = o.ShowBrowser
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if context is already done
|
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -141,145 +105,28 @@ func NewPlayWrightBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions)
|
|||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a channel for the result
|
|
||||||
resultCh := make(chan browserResult, 1)
|
resultCh := make(chan browserResult, 1)
|
||||||
|
|
||||||
// Launch browser initialization in a separate goroutine
|
|
||||||
go func() {
|
go func() {
|
||||||
pw, err := playwright.Run()
|
res, err := initBrowser(opt)
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
err = playwright.Install()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
resultCh <- browserResult{nil, err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
pw, err = playwright.Run()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
resultCh <- browserResult{nil, err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var bt playwright.BrowserType
|
|
||||||
|
|
||||||
switch opt.Browser {
|
|
||||||
case PlayWrightBrowserSelectionChromium:
|
|
||||||
bt = pw.Chromium
|
|
||||||
if opt.PlayWrightServerAddress == "" {
|
|
||||||
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
|
|
||||||
}
|
|
||||||
|
|
||||||
case PlayWrightBrowserSelectionFirefox:
|
|
||||||
bt = pw.Firefox
|
|
||||||
if opt.PlayWrightServerAddress == "" {
|
|
||||||
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
|
|
||||||
}
|
|
||||||
|
|
||||||
case PlayWrightBrowserSelectionWebKit:
|
|
||||||
bt = pw.WebKit
|
|
||||||
if opt.PlayWrightServerAddress == "" {
|
|
||||||
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
resultCh <- browserResult{nil, ErrInvalidBrowserSelection}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var browser playwright.Browser
|
|
||||||
|
|
||||||
var launch = true
|
|
||||||
if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
|
|
||||||
launch = false
|
|
||||||
slog.Info("connecting to playwright server", "address", opt.PlayWrightServerAddress)
|
|
||||||
var timeout float64 = 30000
|
|
||||||
browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
if opt.DontLaunchOnConnectFailure {
|
|
||||||
resultCh <- browserResult{nil, err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
|
|
||||||
launch = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if launch {
|
|
||||||
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
|
|
||||||
Headless: playwright.Bool(!opt.ShowBrowser),
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
resultCh <- browserResult{nil, err}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var viewport *playwright.Size
|
|
||||||
if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
|
|
||||||
viewport = &playwright.Size{
|
|
||||||
Width: opt.Dimensions.Width,
|
|
||||||
Height: opt.Dimensions.Height,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var scheme *playwright.ColorScheme
|
|
||||||
|
|
||||||
if opt.DarkMode {
|
|
||||||
scheme = playwright.ColorSchemeDark
|
|
||||||
} else {
|
|
||||||
scheme = playwright.ColorSchemeNoPreference
|
|
||||||
}
|
|
||||||
|
|
||||||
c, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
||||||
UserAgent: playwright.String(opt.UserAgent),
|
|
||||||
Viewport: viewport,
|
|
||||||
ColorScheme: scheme,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resultCh <- browserResult{nil, err}
|
resultCh <- browserResult{nil, err}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.CookieJar != nil {
|
|
||||||
cookies, err := opt.CookieJar.GetAll()
|
|
||||||
if err != nil {
|
|
||||||
resultCh <- browserResult{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
pwCookies := make([]playwright.OptionalCookie, len(cookies))
|
|
||||||
|
|
||||||
for i, cookie := range cookies {
|
|
||||||
pwCookies[i] = cookieToPlaywrightOptionalCookie(cookie)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = c.AddCookies(pwCookies)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
resultCh <- browserResult{nil, fmt.Errorf("error adding cookies to browser: %w", err)}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resultCh <- browserResult{
|
resultCh <- browserResult{
|
||||||
browser: playWrightBrowser{
|
browser: playWrightBrowser{
|
||||||
pw: pw,
|
pw: res.pw,
|
||||||
browser: browser,
|
browser: res.browser,
|
||||||
userAgent: opt.UserAgent,
|
userAgent: res.opt.UserAgent,
|
||||||
timeout: *opt.Timeout,
|
timeout: *res.opt.Timeout,
|
||||||
cookieJar: opt.CookieJar,
|
cookieJar: res.opt.CookieJar,
|
||||||
ctx: c,
|
ctx: res.bctx,
|
||||||
serverAddr: opt.PlayWrightServerAddress,
|
serverAddr: res.opt.ServerAddress,
|
||||||
},
|
},
|
||||||
err: nil,
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait for either context cancellation or browser initialization completion
|
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return nil, ctx.Err()
|
return nil, ctx.Err()
|
||||||
@@ -367,12 +214,9 @@ func (b playWrightBrowser) Close() error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
func Screenshot(ctx context.Context, target string, timeout time.Duration) ([]byte, error) {
|
func Screenshot(ctx context.Context, target string, timeout time.Duration) ([]byte, error) {
|
||||||
browser, err := NewPlayWrightBrowser(ctx, PlayWrightBrowserOptions{
|
browser, err := NewBrowser(ctx, BrowserOptions{
|
||||||
Timeout: &timeout,
|
Timeout: &timeout,
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -380,14 +224,14 @@ func Screenshot(ctx context.Context, target string, timeout time.Duration) ([]by
|
|||||||
return nil, fmt.Errorf("error creating browser: %w", err)
|
return nil, fmt.Errorf("error creating browser: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(browser)
|
defer DeferClose(browser)
|
||||||
|
|
||||||
doc, err := browser.Open(ctx, target, OpenPageOptions{})
|
doc, err := browser.Open(ctx, target, OpenPageOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error opening page: %w", err)
|
return nil, fmt.Errorf("error opening page: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer DeferClose(doc)
|
||||||
|
|
||||||
return doc.Screenshot()
|
return doc.Screenshot()
|
||||||
}
|
}
|
||||||
|
|||||||
72
readability_test.go
Normal file
72
readability_test.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
package extractor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReadability_ValidHTML(t *testing.T) {
|
||||||
|
html := `<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head><title>Test Article</title></head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Test Article</h1>
|
||||||
|
<p>This is a test article with enough content to be parsed by readability.
|
||||||
|
It needs to have a reasonable amount of text so the algorithm considers it
|
||||||
|
a valid article. Let us add several sentences to make sure this works
|
||||||
|
correctly. The readability library requires a minimum amount of content
|
||||||
|
to successfully extract an article from a page.</p>
|
||||||
|
<p>Here is another paragraph to add more content. We want to make sure
|
||||||
|
that the content is substantial enough for the readability algorithm to
|
||||||
|
consider this a valid article and extract the text properly.</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
|
||||||
|
doc := mockDocument{
|
||||||
|
url: "https://example.com/article",
|
||||||
|
content: html,
|
||||||
|
}
|
||||||
|
|
||||||
|
article, err := Readability(context.Background(), doc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Readability() error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if article.Title != "Test Article" {
|
||||||
|
t.Errorf("Title = %q, want %q", article.Title, "Test Article")
|
||||||
|
}
|
||||||
|
|
||||||
|
if article.TextContent == "" {
|
||||||
|
t.Error("TextContent should not be empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadability_EmptyContent(t *testing.T) {
|
||||||
|
doc := mockDocument{
|
||||||
|
url: "https://example.com/empty",
|
||||||
|
content: "",
|
||||||
|
}
|
||||||
|
|
||||||
|
article, err := Readability(context.Background(), doc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Readability() unexpected error = %v", err)
|
||||||
|
}
|
||||||
|
// Empty content should produce an empty article.
|
||||||
|
if article.Title != "" && article.TextContent != "" {
|
||||||
|
t.Error("expected empty article from empty content")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadability_InvalidURL(t *testing.T) {
|
||||||
|
doc := mockDocument{
|
||||||
|
url: "://invalid",
|
||||||
|
content: "<html><body><p>text</p></body></html>",
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := Readability(context.Background(), doc)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Readability() expected error for invalid URL, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -27,11 +26,6 @@ type Item struct {
|
|||||||
Price float64
|
Price float64
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
||||||
return DefaultConfig.GetItemFromURL(ctx, b, u)
|
return DefaultConfig.GetItemFromURL(ctx, b, u)
|
||||||
}
|
}
|
||||||
@@ -57,7 +51,7 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
|
|||||||
res.ID, _ = strconv.Atoi(a[3])
|
res.ID, _ = strconv.Atoi(a[3])
|
||||||
|
|
||||||
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return res, fmt.Errorf("failed to open page: %w", err)
|
return res, fmt.Errorf("failed to open page: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
39
sites/aislegopher/aislegopher_test.go
Normal file
39
sites/aislegopher/aislegopher_test.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
package aislegopher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/url"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetItemFromURL_InvalidHost(t *testing.T) {
|
||||||
|
u, _ := url.Parse("https://example.com/p/slug/123")
|
||||||
|
_, err := GetItemFromURL(context.Background(), nil, u)
|
||||||
|
if err != ErrInvalidURL {
|
||||||
|
t.Errorf("expected ErrInvalidURL, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetItemFromURL_InvalidPath_NoP(t *testing.T) {
|
||||||
|
u, _ := url.Parse("https://aislegopher.com/x/slug/123")
|
||||||
|
_, err := GetItemFromURL(context.Background(), nil, u)
|
||||||
|
if err != ErrInvalidURL {
|
||||||
|
t.Errorf("expected ErrInvalidURL, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetItemFromURL_InvalidPath_TooShort(t *testing.T) {
|
||||||
|
u, _ := url.Parse("https://aislegopher.com/p/slug")
|
||||||
|
_, err := GetItemFromURL(context.Background(), nil, u)
|
||||||
|
if err != ErrInvalidURL {
|
||||||
|
t.Errorf("expected ErrInvalidURL, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetItemFromURL_InvalidPath_TooLong(t *testing.T) {
|
||||||
|
u, _ := url.Parse("https://aislegopher.com/p/slug/123/extra")
|
||||||
|
_, err := GetItemFromURL(context.Background(), nil, u)
|
||||||
|
if err != ErrInvalidURL {
|
||||||
|
t.Errorf("expected ErrInvalidURL, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,10 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/aislegopher"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/aislegopher"
|
||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
@@ -22,11 +22,6 @@ func (f AisleGopherFlags) ToConfig(_ *cli.Command) aislegopher.Config {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func main() {
|
func main() {
|
||||||
var flags []cli.Flag
|
var flags []cli.Flag
|
||||||
flags = append(flags, browser.Flags...)
|
flags = append(flags, browser.Flags...)
|
||||||
@@ -44,7 +39,7 @@ func main() {
|
|||||||
return fmt.Errorf("failed to create browser: %w", err)
|
return fmt.Errorf("failed to create browser: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(b)
|
defer extractor.DeferClose(b)
|
||||||
|
|
||||||
arg := c.Args().First()
|
arg := c.Args().First()
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -39,12 +38,6 @@ func (c Config) validate() Config {
|
|||||||
|
|
||||||
var DefaultConfig = Config{}
|
var DefaultConfig = Config{}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsArchived checks if a url is archived. It returns the archived url if it is archived, or an empty string if it is not.
|
// IsArchived checks if a url is archived. It returns the archived url if it is archived, or an empty string if it is not.
|
||||||
func (c Config) IsArchived(ctx context.Context, b extractor.Browser, target string) (extractor.Document, error) {
|
func (c Config) IsArchived(ctx context.Context, b extractor.Browser, target string) (extractor.Document, error) {
|
||||||
c = c.validate()
|
c = c.validate()
|
||||||
|
|||||||
37
sites/archive/archive_test.go
Normal file
37
sites/archive/archive_test.go
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
package archive
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConfig_Validate_Defaults(t *testing.T) {
|
||||||
|
c := Config{}
|
||||||
|
c = c.validate()
|
||||||
|
|
||||||
|
if c.Endpoint != "https://archive.ph" {
|
||||||
|
t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.ph")
|
||||||
|
}
|
||||||
|
if c.Timeout == nil {
|
||||||
|
t.Fatal("Timeout should not be nil after validate")
|
||||||
|
}
|
||||||
|
if *c.Timeout != 1*time.Hour {
|
||||||
|
t.Errorf("Timeout = %v, want %v", *c.Timeout, 1*time.Hour)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_Validate_Preserves(t *testing.T) {
|
||||||
|
timeout := 5 * time.Minute
|
||||||
|
c := Config{
|
||||||
|
Endpoint: "https://archive.org",
|
||||||
|
Timeout: &timeout,
|
||||||
|
}
|
||||||
|
c = c.validate()
|
||||||
|
|
||||||
|
if c.Endpoint != "https://archive.org" {
|
||||||
|
t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.org")
|
||||||
|
}
|
||||||
|
if *c.Timeout != 5*time.Minute {
|
||||||
|
t.Errorf("Timeout = %v, want %v", *c.Timeout, 5*time.Minute)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,12 +3,13 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/urfave/cli/v3"
|
|
||||||
"io"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/urfave/cli/v3"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
|
||||||
)
|
)
|
||||||
@@ -49,12 +50,6 @@ func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) (duckduckgo.Config, error) {
|
|||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var flags []cli.Flag
|
var flags []cli.Flag
|
||||||
|
|
||||||
@@ -78,7 +73,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
b, err := browser.FromCommand(ctx, command)
|
b, err := browser.FromCommand(ctx, command)
|
||||||
defer deferClose(b)
|
defer extractor.DeferClose(b)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create browser: %w", err)
|
return fmt.Errorf("failed to create browser: %w", err)
|
||||||
@@ -89,7 +84,7 @@ func main() {
|
|||||||
return fmt.Errorf("failed to open search: %w", err)
|
return fmt.Errorf("failed to open search: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(search)
|
defer extractor.DeferClose(search)
|
||||||
|
|
||||||
res := search.GetResults()
|
res := search.GetResults()
|
||||||
fmt.Println("Results:", res)
|
fmt.Println("Results:", res)
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package duckduckgo
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
@@ -71,12 +70,6 @@ type Result struct {
|
|||||||
Description string
|
Description string
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
|
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
|
||||||
u := c.ToSearchURL(query)
|
u := c.ToSearchURL(query)
|
||||||
|
|
||||||
@@ -97,7 +90,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
|
|||||||
|
|
||||||
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
|
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
|
||||||
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to open url: %w", err)
|
return nil, fmt.Errorf("failed to open url: %w", err)
|
||||||
|
|||||||
@@ -83,3 +83,34 @@ func TestConfig_ToSearchURL_NoRegion(t *testing.T) {
|
|||||||
t.Errorf("kl should be empty when no region, got %q", u.Query().Get("kl"))
|
t.Errorf("kl should be empty when no region, got %q", u.Query().Get("kl"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_ToSearchURL_Scheme(t *testing.T) {
|
||||||
|
c := Config{SafeSearch: SafeSearchOff}
|
||||||
|
u := c.ToSearchURL("test")
|
||||||
|
|
||||||
|
if u.Scheme != "https" {
|
||||||
|
t.Errorf("Scheme = %q, want %q", u.Scheme, "https")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_ToSearchURL_SpecialChars(t *testing.T) {
|
||||||
|
c := Config{SafeSearch: SafeSearchOff}
|
||||||
|
u := c.ToSearchURL("go lang & testing")
|
||||||
|
|
||||||
|
if u.Query().Get("q") != "go lang & testing" {
|
||||||
|
t.Errorf("q = %q, want %q", u.Query().Get("q"), "go lang & testing")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResult_ZeroValue(t *testing.T) {
|
||||||
|
var r Result
|
||||||
|
if r.URL != "" || r.Title != "" || r.Description != "" {
|
||||||
|
t.Error("zero-value Result should have empty fields")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDefaultConfig_SafeSearch(t *testing.T) {
|
||||||
|
if DefaultConfig.SafeSearch != SafeSearchOff {
|
||||||
|
t.Errorf("DefaultConfig.SafeSearch = %d, want %d", DefaultConfig.SafeSearch, SafeSearchOff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,12 +3,12 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/google"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/google"
|
||||||
)
|
)
|
||||||
@@ -42,12 +42,6 @@ func (f GoogleFlags) ToConfig(_ context.Context, cmd *cli.Command) google.Config
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var flags []cli.Flag
|
var flags []cli.Flag
|
||||||
|
|
||||||
@@ -67,7 +61,7 @@ func main() {
|
|||||||
|
|
||||||
b, err := browser.FromCommand(ctx, cli)
|
b, err := browser.FromCommand(ctx, cli)
|
||||||
|
|
||||||
defer deferClose(b)
|
defer extractor.DeferClose(b)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package google
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
@@ -48,12 +47,6 @@ type Result struct {
|
|||||||
Description string
|
Description string
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
|
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
|
||||||
c = c.validate()
|
c = c.validate()
|
||||||
|
|
||||||
@@ -99,7 +92,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
|
|||||||
return nil, fmt.Errorf("failed to open url: %w", err)
|
return nil, fmt.Errorf("failed to open url: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
var res []Result
|
var res []Result
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package megamillions
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -33,12 +32,6 @@ type NextDrawing struct {
|
|||||||
Jackpot currency.Amount
|
Jackpot currency.Amount
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func netTicksToTime(t int64) time.Time {
|
func netTicksToTime(t int64) time.Time {
|
||||||
return time.Unix(0, t*100).Add(-621355968000000000)
|
return time.Unix(0, t*100).Add(-621355968000000000)
|
||||||
}
|
}
|
||||||
@@ -218,7 +211,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
|
|||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
d, err := getDrawing(ctx, doc)
|
d, err := getDrawing(ctx, doc)
|
||||||
|
|
||||||
|
|||||||
@@ -41,3 +41,33 @@ func TestNetTicksToTime_DifferenceIsCorrect(t *testing.T) {
|
|||||||
t.Errorf("expected 1 second difference, got %v", diff)
|
t.Errorf("expected 1 second difference, got %v", diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNetTicksToTime_NotZero(t *testing.T) {
|
||||||
|
// Verify the function produces a non-zero time for typical ticks values.
|
||||||
|
ticks := int64(638396256000000000)
|
||||||
|
result := netTicksToTime(ticks)
|
||||||
|
|
||||||
|
if result.IsZero() {
|
||||||
|
t.Error("netTicksToTime should not return zero time for valid ticks")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_Validate(t *testing.T) {
|
||||||
|
c := Config{}
|
||||||
|
c = c.validate()
|
||||||
|
_ = c // validate is a no-op, just verify no panic
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDrawing_ZeroValue(t *testing.T) {
|
||||||
|
var d Drawing
|
||||||
|
if d.MegaBall != 0 || d.Megaplier != 0 {
|
||||||
|
t.Error("zero-value Drawing should have zero fields")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNextDrawing_ZeroValue(t *testing.T) {
|
||||||
|
var nd NextDrawing
|
||||||
|
if nd.Date != "" {
|
||||||
|
t.Error("zero-value NextDrawing should have empty date")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package powerball
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -32,12 +31,6 @@ type NextDrawing struct {
|
|||||||
JackpotDollars int
|
JackpotDollars int
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) {
|
func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) {
|
||||||
var drawing Drawing
|
var drawing Drawing
|
||||||
|
|
||||||
@@ -196,7 +189,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
|
|||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
d, err := getDrawing(ctx, doc)
|
d, err := getDrawing(ctx, doc)
|
||||||
|
|
||||||
|
|||||||
34
sites/powerball/powerball_test.go
Normal file
34
sites/powerball/powerball_test.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package powerball
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestConfig_Validate(t *testing.T) {
|
||||||
|
c := Config{}
|
||||||
|
c = c.validate()
|
||||||
|
// validate is a no-op for powerball Config, just verify it doesn't panic.
|
||||||
|
_ = c
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDefaultConfig(t *testing.T) {
|
||||||
|
c := DefaultConfig
|
||||||
|
_ = c
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDrawing_ZeroValue(t *testing.T) {
|
||||||
|
var d Drawing
|
||||||
|
if d.PowerBall != 0 || d.PowerPlay != 0 {
|
||||||
|
t.Error("zero-value Drawing should have zero fields")
|
||||||
|
}
|
||||||
|
for i, n := range d.Numbers {
|
||||||
|
if n != 0 {
|
||||||
|
t.Errorf("Numbers[%d] = %d, want 0", i, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNextDrawing_ZeroValue(t *testing.T) {
|
||||||
|
var nd NextDrawing
|
||||||
|
if nd.Date != "" || nd.JackpotDollars != 0 {
|
||||||
|
t.Error("zero-value NextDrawing should have empty/zero fields")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,8 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -13,12 +11,6 @@ type Config struct{}
|
|||||||
|
|
||||||
var DefaultConfig = Config{}
|
var DefaultConfig = Config{}
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
|
func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
|
||||||
return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b)
|
return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b)
|
||||||
}
|
}
|
||||||
@@ -30,7 +22,7 @@ func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.B
|
|||||||
return "", fmt.Errorf("failed to open useragents.me: %w", err)
|
return "", fmt.Errorf("failed to open useragents.me: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)")
|
s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)")
|
||||||
|
|
||||||
text := ""
|
text := ""
|
||||||
|
|||||||
9
sites/useragents/useragents_test.go
Normal file
9
sites/useragents/useragents_test.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package useragents
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestDefaultConfig(t *testing.T) {
|
||||||
|
// DefaultConfig should be a zero-value Config.
|
||||||
|
c := DefaultConfig
|
||||||
|
_ = c // Just verify it exists and is usable.
|
||||||
|
}
|
||||||
@@ -3,10 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||||
|
|
||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
@@ -14,12 +14,6 @@ import (
|
|||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/wegmans"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/wegmans"
|
||||||
)
|
)
|
||||||
|
|
||||||
func deferClose(cl io.Closer) {
|
|
||||||
if cl != nil {
|
|
||||||
_ = cl.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type WegmansFlags []cli.Flag
|
type WegmansFlags []cli.Flag
|
||||||
|
|
||||||
var Flags = WegmansFlags{}
|
var Flags = WegmansFlags{}
|
||||||
@@ -44,7 +38,7 @@ func main() {
|
|||||||
cfg := Flags.ToConfig(cmd)
|
cfg := Flags.ToConfig(cmd)
|
||||||
|
|
||||||
b, err := browser.FromCommand(ctx, cmd)
|
b, err := browser.FromCommand(ctx, cmd)
|
||||||
defer deferClose(b)
|
defer extractor.DeferClose(b)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error creating browser: %w", err)
|
return fmt.Errorf("error creating browser: %w", err)
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package wegmans
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -30,12 +29,6 @@ type Item struct {
|
|||||||
Unit string
|
Unit string
|
||||||
}
|
}
|
||||||
|
|
||||||
func deferClose(c io.Closer) {
|
|
||||||
if c != nil {
|
|
||||||
_ = c.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
||||||
|
|
||||||
if b == nil {
|
if b == nil {
|
||||||
@@ -68,7 +61,7 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
}
|
}
|
||||||
|
|
||||||
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
||||||
defer deferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Item{}, err
|
return Item{}, err
|
||||||
|
|||||||
39
sites/wegmans/wegmans_test.go
Normal file
39
sites/wegmans/wegmans_test.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
package wegmans
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/url"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetItemPrice_NilBrowser(t *testing.T) {
|
||||||
|
u, _ := url.Parse("https://shop.wegmans.com/product/24921")
|
||||||
|
_, err := DefaultConfig.GetItemPrice(context.Background(), nil, u)
|
||||||
|
if err != ErrNilBrowser {
|
||||||
|
t.Errorf("expected ErrNilBrowser, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetItemPrice_NilURL(t *testing.T) {
|
||||||
|
// NilBrowser check comes before NilURL, so we can't test NilURL
|
||||||
|
// independently without a real browser. Verify the error sentinel exists.
|
||||||
|
if ErrNilURL.Error() != "url is nil" {
|
||||||
|
t.Errorf("ErrNilURL = %q, want %q", ErrNilURL.Error(), "url is nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetItemPrice_ErrorSentinels(t *testing.T) {
|
||||||
|
if ErrInvalidURL.Error() != "invalid url" {
|
||||||
|
t.Errorf("ErrInvalidURL = %q, want %q", ErrInvalidURL.Error(), "invalid url")
|
||||||
|
}
|
||||||
|
if ErrNilBrowser.Error() != "browser is nil" {
|
||||||
|
t.Errorf("ErrNilBrowser = %q, want %q", ErrNilBrowser.Error(), "browser is nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestItem_ZeroValue(t *testing.T) {
|
||||||
|
var item Item
|
||||||
|
if item.ID != 0 || item.Name != "" || item.Price != 0 || item.UnitPrice != 0 || item.Unit != "" {
|
||||||
|
t.Error("zero-value Item should have empty/zero fields")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user