diff --git a/MIGRATION.md b/MIGRATION.md
new file mode 100644
index 0000000..b31f9d1
--- /dev/null
+++ b/MIGRATION.md
@@ -0,0 +1,37 @@
+# Migration Guide
+
+This guide documents all breaking API changes from the restructuring of go-extractor.
+
+All core interfaces (`Browser`, `Document`, `Node`, `CookieJar`, `InteractiveBrowser`) are **unchanged**.
+
+## Type and Function Renames
+
+```
+extractor.NewPlayWrightBrowser -> extractor.NewBrowser
+extractor.PlayWrightBrowserOptions -> extractor.BrowserOptions
+extractor.PlayWrightBrowserSelection -> extractor.BrowserSelection
+
+extractor.PlayWrightBrowserSelectionChromium -> extractor.BrowserChromium
+extractor.PlayWrightBrowserSelectionFirefox -> extractor.BrowserFirefox
+extractor.PlayWrightBrowserSelectionWebKit -> extractor.BrowserWebKit
+```
+
+## Field Renames (inside BrowserOptions)
+
+```
+.PlayWrightServerAddress -> .ServerAddress
+.DontLaunchOnConnectFailure -> .RequireServer
+```
+
+The `RequireServer` field is semantically identical to `DontLaunchOnConnectFailure`:
+
+- Old: `DontLaunchOnConnectFailure: true` meant "fail if can't connect to server"
+- New: `RequireServer: true` means the same thing
+
+## New Helper
+
+```go
+extractor.DeferClose(closer)
+```
+
+Nil-safe defer close helper. Replaces the `deferClose` functions that were previously copy-pasted across packages.
diff --git a/article_test.go b/article_test.go
new file mode 100644
index 0000000..bb7a6b4
--- /dev/null
+++ b/article_test.go
@@ -0,0 +1,29 @@
+package extractor
+
+import "testing"
+
+func TestArticle_ZeroValue(t *testing.T) {
+ var a Article
+ if a.Title != "" || a.Content != "" || a.Length != 0 {
+ t.Error("zero-value Article should have empty fields")
+ }
+}
+
+func TestArticle_FieldAssignment(t *testing.T) {
+ a := Article{
+ Title: "Test Title",
+ Content: "
hello
",
+ TextContent: "hello",
+ Length: 5,
+ Excerpt: "hello",
+ Byline: "Author",
+ SiteName: "Example",
+ Lang: "en",
+ }
+ if a.Title != "Test Title" {
+ t.Errorf("Title = %q, want %q", a.Title, "Test Title")
+ }
+ if a.Length != 5 {
+ t.Errorf("Length = %d, want 5", a.Length)
+ }
+}
diff --git a/browser_init.go b/browser_init.go
new file mode 100644
index 0000000..f8d8f6a
--- /dev/null
+++ b/browser_init.go
@@ -0,0 +1,160 @@
+package extractor
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+
+ "github.com/playwright-community/playwright-go"
+)
+
+// browserInitResult holds the result of shared browser initialization.
+type browserInitResult struct {
+ pw *playwright.Playwright
+ browser playwright.Browser
+ bctx playwright.BrowserContext
+ opt BrowserOptions
+}
+
+// initBrowser performs the shared browser initialization steps:
+// start Playwright, select browser type, connect or launch, create context, load cookies.
+func initBrowser(opt BrowserOptions) (*browserInitResult, error) {
+ pw, err := playwright.Run()
+ if err != nil {
+ err = playwright.Install()
+ if err != nil {
+ return nil, fmt.Errorf("failed to install playwright: %w", err)
+ }
+ pw, err = playwright.Run()
+ if err != nil {
+ return nil, fmt.Errorf("failed to start playwright: %w", err)
+ }
+ }
+
+ var bt playwright.BrowserType
+ switch opt.Browser {
+ case BrowserChromium:
+ bt = pw.Chromium
+ if opt.ServerAddress == "" {
+ opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
+ }
+ case BrowserFirefox:
+ bt = pw.Firefox
+ if opt.ServerAddress == "" {
+ opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
+ }
+ case BrowserWebKit:
+ bt = pw.WebKit
+ if opt.ServerAddress == "" {
+ opt.ServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
+ }
+ default:
+ return nil, ErrInvalidBrowserSelection
+ }
+
+ var browser playwright.Browser
+ launch := true
+
+ if opt.ServerAddress != "" && !opt.UseLocalOnly {
+ launch = false
+ slog.Info("connecting to playwright server", "address", opt.ServerAddress)
+ var timeout float64 = 30000
+ browser, err = bt.Connect(opt.ServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
+ if err != nil {
+ if opt.RequireServer {
+ return nil, err
+ }
+ slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
+ launch = true
+ }
+ }
+
+ if launch {
+ browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
+ Headless: playwright.Bool(!opt.ShowBrowser),
+ })
+ if err != nil {
+ return nil, fmt.Errorf("failed to launch browser: %w", err)
+ }
+ }
+
+ var viewport *playwright.Size
+ if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
+ viewport = &playwright.Size{
+ Width: opt.Dimensions.Width,
+ Height: opt.Dimensions.Height,
+ }
+ }
+
+ var scheme *playwright.ColorScheme
+ if opt.DarkMode {
+ scheme = playwright.ColorSchemeDark
+ } else {
+ scheme = playwright.ColorSchemeNoPreference
+ }
+
+ bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
+ UserAgent: playwright.String(opt.UserAgent),
+ Viewport: viewport,
+ ColorScheme: scheme,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("failed to create browser context: %w", err)
+ }
+
+ if opt.CookieJar != nil {
+ cookies, err := opt.CookieJar.GetAll()
+ if err != nil {
+ return nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)
+ }
+ pwCookies := make([]playwright.OptionalCookie, len(cookies))
+ for i, c := range cookies {
+ pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
+ }
+ if err := bctx.AddCookies(pwCookies); err != nil {
+ return nil, fmt.Errorf("error adding cookies to browser: %w", err)
+ }
+ }
+
+ return &browserInitResult{
+ pw: pw,
+ browser: browser,
+ bctx: bctx,
+ opt: opt,
+ }, nil
+}
+
+// mergeOptions merges variadic BrowserOptions into a base set of defaults.
+func mergeOptions(base BrowserOptions, opts []BrowserOptions) BrowserOptions {
+ for _, o := range opts {
+ if o.UserAgent != "" {
+ base.UserAgent = o.UserAgent
+ }
+ if o.Browser != "" {
+ base.Browser = o.Browser
+ }
+ if o.Timeout != nil {
+ base.Timeout = o.Timeout
+ }
+ if o.CookieJar != nil {
+ base.CookieJar = o.CookieJar
+ }
+ if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
+ base.Dimensions = o.Dimensions
+ }
+ if o.DarkMode {
+ base.DarkMode = true
+ }
+ if o.ServerAddress != "" {
+ base.ServerAddress = o.ServerAddress
+ }
+ if o.RequireServer {
+ base.RequireServer = true
+ }
+ if o.UseLocalOnly {
+ base.UseLocalOnly = true
+ }
+ base.ShowBrowser = o.ShowBrowser
+ }
+ return base
+}
diff --git a/close.go b/close.go
new file mode 100644
index 0000000..0268efe
--- /dev/null
+++ b/close.go
@@ -0,0 +1,11 @@
+package extractor
+
+import "io"
+
+// DeferClose safely closes an io.Closer, ignoring the error.
+// Intended for use in defer statements.
+func DeferClose(cl io.Closer) {
+ if cl != nil {
+ _ = cl.Close()
+ }
+}
diff --git a/close_test.go b/close_test.go
new file mode 100644
index 0000000..50d7fc8
--- /dev/null
+++ b/close_test.go
@@ -0,0 +1,38 @@
+package extractor
+
+import (
+ "errors"
+ "testing"
+)
+
+type mockCloser struct {
+ closed bool
+ err error
+}
+
+func (m *mockCloser) Close() error {
+ m.closed = true
+ return m.err
+}
+
+func TestDeferClose_Nil(t *testing.T) {
+ // Should not panic on nil.
+ DeferClose(nil)
+}
+
+func TestDeferClose_Valid(t *testing.T) {
+ m := &mockCloser{}
+ DeferClose(m)
+ if !m.closed {
+ t.Error("DeferClose did not call Close()")
+ }
+}
+
+func TestDeferClose_ErrorIgnored(t *testing.T) {
+ m := &mockCloser{err: errors.New("close error")}
+ // Should not panic even when Close returns an error.
+ DeferClose(m)
+ if !m.closed {
+ t.Error("DeferClose did not call Close()")
+ }
+}
diff --git a/cmd/browser/main.go b/cmd/browser/main.go
index 4f5a8a4..ef1c43f 100644
--- a/cmd/browser/main.go
+++ b/cmd/browser/main.go
@@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
- "io"
"os"
"github.com/urfave/cli/v3"
@@ -12,9 +11,6 @@ import (
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
)
-func deferClose(cl io.Closer) {
- _ = cl.Close()
-}
func main() {
cmd := &cli.Command{
Name: "browser",
@@ -31,7 +27,7 @@ func main() {
return err
}
- defer deferClose(b)
+ defer extractor.DeferClose(b)
// now open the user specified url
doc, err := b.Open(ctx, target, extractor.OpenPageOptions{})
@@ -39,7 +35,7 @@ func main() {
return err
}
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
article, err := extractor.Readability(ctx, doc)
diff --git a/cmd/browser/pkg/browser/flags.go b/cmd/browser/pkg/browser/flags.go
index a30f100..9cba6e4 100644
--- a/cmd/browser/pkg/browser/flags.go
+++ b/cmd/browser/pkg/browser/flags.go
@@ -44,7 +44,7 @@ var Flags = BrowserFlags{
}
func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, error) {
- var opts extractor.PlayWrightBrowserOptions
+ var opts extractor.BrowserOptions
if ua := cmd.String("user-agent"); ua != "" {
opts.UserAgent = ua
@@ -59,7 +59,7 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
}
if b := cmd.String("browser"); b != "" {
- opts.Browser = extractor.PlayWrightBrowserSelection(b)
+ opts.Browser = extractor.BrowserSelection(b)
}
if cf := cmd.String("cookies-file"); cf != "" {
@@ -72,5 +72,5 @@ func FromCommand(ctx context.Context, cmd *cli.Command) (extractor.Browser, erro
opts.ShowBrowser = cmd.Bool("visible")
- return extractor.NewPlayWrightBrowser(ctx, opts)
+ return extractor.NewBrowser(ctx, opts)
}
diff --git a/document.go b/document.go
index ef057f2..0666348 100644
--- a/document.go
+++ b/document.go
@@ -25,7 +25,6 @@ type document struct {
pw *playwright.Playwright
browser playwright.Browser
page playwright.Page
- locator playwright.Locator
}
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
diff --git a/interactive.go b/interactive.go
index aa3c681..cd96b61 100644
--- a/interactive.go
+++ b/interactive.go
@@ -56,48 +56,17 @@ type interactiveBrowser struct {
// NewInteractiveBrowser creates a headless browser with a page ready for interactive control.
// The context is only used for cancellation during setup.
-func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (InteractiveBrowser, error) {
+func NewInteractiveBrowser(ctx context.Context, opts ...BrowserOptions) (InteractiveBrowser, error) {
var thirtySeconds = 30 * time.Second
- opt := PlayWrightBrowserOptions{
+ opt := mergeOptions(BrowserOptions{
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
- Browser: PlayWrightBrowserSelectionChromium,
+ Browser: BrowserChromium,
Timeout: &thirtySeconds,
Dimensions: Size{
Width: 1280,
Height: 720,
},
- }
-
- for _, o := range opts {
- if o.UserAgent != "" {
- opt.UserAgent = o.UserAgent
- }
- if o.Browser != "" {
- opt.Browser = o.Browser
- }
- if o.Timeout != nil {
- opt.Timeout = o.Timeout
- }
- if o.CookieJar != nil {
- opt.CookieJar = o.CookieJar
- }
- if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
- opt.Dimensions = o.Dimensions
- }
- if o.DarkMode {
- opt.DarkMode = true
- }
- if o.PlayWrightServerAddress != "" {
- opt.PlayWrightServerAddress = o.PlayWrightServerAddress
- }
- if o.DontLaunchOnConnectFailure {
- opt.DontLaunchOnConnectFailure = true
- }
- if o.UseLocalOnly {
- opt.UseLocalOnly = true
- }
- opt.ShowBrowser = o.ShowBrowser
- }
+ }, opts)
if err := ctx.Err(); err != nil {
return nil, err
@@ -111,98 +80,13 @@ func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions
ch := make(chan result, 1)
go func() {
- pw, err := playwright.Run()
+ res, err := initBrowser(opt)
if err != nil {
- err = playwright.Install()
- if err != nil {
- ch <- result{nil, fmt.Errorf("failed to install playwright: %w", err)}
- return
- }
- pw, err = playwright.Run()
- if err != nil {
- ch <- result{nil, fmt.Errorf("failed to start playwright: %w", err)}
- return
- }
- }
-
- var bt playwright.BrowserType
- switch opt.Browser {
- case PlayWrightBrowserSelectionChromium:
- bt = pw.Chromium
- case PlayWrightBrowserSelectionFirefox:
- bt = pw.Firefox
- case PlayWrightBrowserSelectionWebKit:
- bt = pw.WebKit
- default:
- ch <- result{nil, ErrInvalidBrowserSelection}
+ ch <- result{nil, err}
return
}
- var browser playwright.Browser
- var launch = true
-
- if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
- launch = false
- var timeout float64 = 30000
- browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
- if err != nil {
- if opt.DontLaunchOnConnectFailure {
- ch <- result{nil, err}
- return
- }
- launch = true
- }
- }
-
- if launch {
- browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
- Headless: playwright.Bool(!opt.ShowBrowser),
- })
- if err != nil {
- ch <- result{nil, fmt.Errorf("failed to launch browser: %w", err)}
- return
- }
- }
-
- viewport := &playwright.Size{
- Width: opt.Dimensions.Width,
- Height: opt.Dimensions.Height,
- }
-
- var scheme *playwright.ColorScheme
- if opt.DarkMode {
- scheme = playwright.ColorSchemeDark
- } else {
- scheme = playwright.ColorSchemeNoPreference
- }
-
- bctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
- UserAgent: playwright.String(opt.UserAgent),
- Viewport: viewport,
- ColorScheme: scheme,
- })
- if err != nil {
- ch <- result{nil, fmt.Errorf("failed to create browser context: %w", err)}
- return
- }
-
- if opt.CookieJar != nil {
- cookies, err := opt.CookieJar.GetAll()
- if err != nil {
- ch <- result{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)}
- return
- }
- pwCookies := make([]playwright.OptionalCookie, len(cookies))
- for i, c := range cookies {
- pwCookies[i] = cookieToPlaywrightOptionalCookie(c)
- }
- if err := bctx.AddCookies(pwCookies); err != nil {
- ch <- result{nil, fmt.Errorf("error adding cookies: %w", err)}
- return
- }
- }
-
- page, err := bctx.NewPage()
+ page, err := res.bctx.NewPage()
if err != nil {
ch <- result{nil, fmt.Errorf("failed to create page: %w", err)}
return
@@ -210,9 +94,9 @@ func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions
ch <- result{
ib: &interactiveBrowser{
- pw: pw,
- browser: browser,
- ctx: bctx,
+ pw: res.pw,
+ browser: res.browser,
+ ctx: res.bctx,
page: page,
},
}
diff --git a/mock_test.go b/mock_test.go
new file mode 100644
index 0000000..6055aef
--- /dev/null
+++ b/mock_test.go
@@ -0,0 +1,16 @@
+package extractor
+
+import "time"
+
+// mockDocument implements the Document interface for testing without Playwright.
+type mockDocument struct {
+ mockNode
+ url string
+ content string
+}
+
+func (m mockDocument) URL() string { return m.url }
+func (m mockDocument) Refresh() error { return nil }
+func (m mockDocument) Content() (string, error) { return m.content, nil }
+func (m mockDocument) Close() error { return nil }
+func (m mockDocument) WaitForNetworkIdle(_ *time.Duration) error { return nil }
diff --git a/node_test.go b/node_test.go
new file mode 100644
index 0000000..311f4cf
--- /dev/null
+++ b/node_test.go
@@ -0,0 +1,23 @@
+package extractor
+
+import "testing"
+
+func TestEscapeJavaScript(t *testing.T) {
+ tests := []struct {
+ input string
+ want string
+ }{
+ {"hello", "hello"},
+ {"it's", "it\\'s"},
+ {`back\slash`, `back\\slash`},
+ {`both\'`, `both\\\'`},
+ {"", ""},
+ }
+
+ for _, tt := range tests {
+ got := escapeJavaScript(tt.input)
+ if got != tt.want {
+ t.Errorf("escapeJavaScript(%q) = %q, want %q", tt.input, got, tt.want)
+ }
+ }
+}
diff --git a/playwright.go b/playwright.go
index 8dab934..427e0fa 100644
--- a/playwright.go
+++ b/playwright.go
@@ -4,9 +4,7 @@ import (
"context"
"errors"
"fmt"
- "io"
"log/slog"
- "os"
"time"
"github.com/playwright-community/playwright-go"
@@ -24,7 +22,7 @@ type playWrightBrowser struct {
var _ Browser = playWrightBrowser{}
-type PlayWrightBrowserSelection string
+type BrowserSelection string
var (
ErrInvalidBrowserSelection = errors.New("invalid browser selection")
@@ -33,18 +31,18 @@ var (
)
const (
- PlayWrightBrowserSelectionChromium PlayWrightBrowserSelection = "chromium"
- PlayWrightBrowserSelectionFirefox PlayWrightBrowserSelection = "firefox"
- PlayWrightBrowserSelectionWebKit PlayWrightBrowserSelection = "webkit"
+ BrowserChromium BrowserSelection = "chromium"
+ BrowserFirefox BrowserSelection = "firefox"
+ BrowserWebKit BrowserSelection = "webkit"
)
type Size struct {
Width int
Height int
}
-type PlayWrightBrowserOptions struct {
+type BrowserOptions struct {
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
- Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
+ Browser BrowserSelection // If unset defaults to Firefox.
Timeout *time.Duration // If unset defaults to 30 seconds timeout. If set to 0, no timeout
// CookieJar will, if set, load all cookies from the cookie jar into the browser and save all cookies from the
@@ -56,15 +54,15 @@ type PlayWrightBrowserOptions struct {
Dimensions Size
DarkMode bool
- // PlayWrightServerAddress is the address of a PlayWright server to connect to.
+ // ServerAddress is the address of a Playwright server to connect to.
// Defaults to the value of the environment variable PLAYWRIGHT_SERVER_ADDRESS.
- PlayWrightServerAddress string
+ ServerAddress string
- // DontLaunchOnConnectFailure will, if set, not launch the browser if the connection to the PlayWright server,
- // and return an error if the connection fails.
- DontLaunchOnConnectFailure bool
+ // RequireServer will, if set, return an error if the connection to the
+ // Playwright server fails instead of falling back to a local browser launch.
+ RequireServer bool
- // UseLocalOnly will, if set, not connect to the PlayWright server, and instead use the local PlayWright server.
+ // UseLocalOnly will, if set, not connect to the Playwright server, and instead launch a local browser.
UseLocalOnly bool
}
@@ -90,48 +88,14 @@ func playwrightCookieToCookie(cookie playwright.Cookie) Cookie {
}
}
-func NewPlayWrightBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions) (Browser, error) {
+func NewBrowser(ctx context.Context, opts ...BrowserOptions) (Browser, error) {
var thirtySeconds = 30 * time.Second
- opt := PlayWrightBrowserOptions{
- UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
- Browser: PlayWrightBrowserSelectionFirefox,
- Timeout: &thirtySeconds,
- DarkMode: false,
- PlayWrightServerAddress: "",
- }
+ opt := mergeOptions(BrowserOptions{
+ UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
+ Browser: BrowserFirefox,
+ Timeout: &thirtySeconds,
+ }, opts)
- for _, o := range opts {
- if o.UserAgent != "" {
- opt.UserAgent = o.UserAgent
- }
- if o.Browser != "" {
- opt.Browser = o.Browser
- }
- if o.Timeout != nil {
- opt.Timeout = o.Timeout
- }
- if o.CookieJar != nil {
- opt.CookieJar = o.CookieJar
- }
- if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
- opt.Dimensions = o.Dimensions
- }
- if o.DarkMode {
- opt.DarkMode = true
- }
- if o.PlayWrightServerAddress != "" {
- opt.PlayWrightServerAddress = o.PlayWrightServerAddress
- }
- if o.DontLaunchOnConnectFailure {
- opt.DontLaunchOnConnectFailure = true
- }
- if o.UseLocalOnly {
- opt.UseLocalOnly = true
- }
- opt.ShowBrowser = o.ShowBrowser
- }
-
- // Check if context is already done
if err := ctx.Err(); err != nil {
return nil, err
}
@@ -141,145 +105,28 @@ func NewPlayWrightBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions)
err error
}
- // Create a channel for the result
resultCh := make(chan browserResult, 1)
- // Launch browser initialization in a separate goroutine
go func() {
- pw, err := playwright.Run()
-
- if err != nil {
- err = playwright.Install()
-
- if err != nil {
- resultCh <- browserResult{nil, err}
- return
- }
-
- pw, err = playwright.Run()
-
- if err != nil {
- resultCh <- browserResult{nil, err}
- return
- }
- }
-
- var bt playwright.BrowserType
-
- switch opt.Browser {
- case PlayWrightBrowserSelectionChromium:
- bt = pw.Chromium
- if opt.PlayWrightServerAddress == "" {
- opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
- }
-
- case PlayWrightBrowserSelectionFirefox:
- bt = pw.Firefox
- if opt.PlayWrightServerAddress == "" {
- opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
- }
-
- case PlayWrightBrowserSelectionWebKit:
- bt = pw.WebKit
- if opt.PlayWrightServerAddress == "" {
- opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
- }
-
- default:
- resultCh <- browserResult{nil, ErrInvalidBrowserSelection}
- return
- }
- var browser playwright.Browser
-
- var launch = true
- if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
- launch = false
- slog.Info("connecting to playwright server", "address", opt.PlayWrightServerAddress)
- var timeout float64 = 30000
- browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
-
- if err != nil {
- if opt.DontLaunchOnConnectFailure {
- resultCh <- browserResult{nil, err}
- return
- }
- slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
- launch = true
- }
- }
-
- if launch {
- browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
- Headless: playwright.Bool(!opt.ShowBrowser),
- })
- if err != nil {
- resultCh <- browserResult{nil, err}
- return
- }
- }
-
- var viewport *playwright.Size
- if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
- viewport = &playwright.Size{
- Width: opt.Dimensions.Width,
- Height: opt.Dimensions.Height,
- }
- }
-
- var scheme *playwright.ColorScheme
-
- if opt.DarkMode {
- scheme = playwright.ColorSchemeDark
- } else {
- scheme = playwright.ColorSchemeNoPreference
- }
-
- c, err := browser.NewContext(playwright.BrowserNewContextOptions{
- UserAgent: playwright.String(opt.UserAgent),
- Viewport: viewport,
- ColorScheme: scheme,
- })
+ res, err := initBrowser(opt)
if err != nil {
resultCh <- browserResult{nil, err}
return
}
- if opt.CookieJar != nil {
- cookies, err := opt.CookieJar.GetAll()
- if err != nil {
- resultCh <- browserResult{nil, fmt.Errorf("error getting cookies from cookie jar: %w", err)}
- return
- }
-
- pwCookies := make([]playwright.OptionalCookie, len(cookies))
-
- for i, cookie := range cookies {
- pwCookies[i] = cookieToPlaywrightOptionalCookie(cookie)
- }
-
- err = c.AddCookies(pwCookies)
-
- if err != nil {
- resultCh <- browserResult{nil, fmt.Errorf("error adding cookies to browser: %w", err)}
- return
- }
- }
-
resultCh <- browserResult{
browser: playWrightBrowser{
- pw: pw,
- browser: browser,
- userAgent: opt.UserAgent,
- timeout: *opt.Timeout,
- cookieJar: opt.CookieJar,
- ctx: c,
- serverAddr: opt.PlayWrightServerAddress,
+ pw: res.pw,
+ browser: res.browser,
+ userAgent: res.opt.UserAgent,
+ timeout: *res.opt.Timeout,
+ cookieJar: res.opt.CookieJar,
+ ctx: res.bctx,
+ serverAddr: res.opt.ServerAddress,
},
- err: nil,
}
}()
- // Wait for either context cancellation or browser initialization completion
select {
case <-ctx.Done():
return nil, ctx.Err()
@@ -367,12 +214,9 @@ func (b playWrightBrowser) Close() error {
)
}
-func deferClose(cl io.Closer) {
- _ = cl.Close()
-}
func Screenshot(ctx context.Context, target string, timeout time.Duration) ([]byte, error) {
- browser, err := NewPlayWrightBrowser(ctx, PlayWrightBrowserOptions{
+ browser, err := NewBrowser(ctx, BrowserOptions{
Timeout: &timeout,
})
@@ -380,14 +224,14 @@ func Screenshot(ctx context.Context, target string, timeout time.Duration) ([]by
return nil, fmt.Errorf("error creating browser: %w", err)
}
- defer deferClose(browser)
+ defer DeferClose(browser)
doc, err := browser.Open(ctx, target, OpenPageOptions{})
if err != nil {
return nil, fmt.Errorf("error opening page: %w", err)
}
- defer deferClose(doc)
+ defer DeferClose(doc)
return doc.Screenshot()
}
diff --git a/readability_test.go b/readability_test.go
new file mode 100644
index 0000000..6033bfc
--- /dev/null
+++ b/readability_test.go
@@ -0,0 +1,72 @@
+package extractor
+
+import (
+ "context"
+ "testing"
+)
+
+func TestReadability_ValidHTML(t *testing.T) {
+ html := `
+
+Test Article
+
+
+Test Article
+This is a test article with enough content to be parsed by readability.
+It needs to have a reasonable amount of text so the algorithm considers it
+a valid article. Let us add several sentences to make sure this works
+correctly. The readability library requires a minimum amount of content
+to successfully extract an article from a page.
+Here is another paragraph to add more content. We want to make sure
+that the content is substantial enough for the readability algorithm to
+consider this a valid article and extract the text properly.
+
+
+`
+
+ doc := mockDocument{
+ url: "https://example.com/article",
+ content: html,
+ }
+
+ article, err := Readability(context.Background(), doc)
+ if err != nil {
+ t.Fatalf("Readability() error = %v", err)
+ }
+
+ if article.Title != "Test Article" {
+ t.Errorf("Title = %q, want %q", article.Title, "Test Article")
+ }
+
+ if article.TextContent == "" {
+ t.Error("TextContent should not be empty")
+ }
+}
+
+func TestReadability_EmptyContent(t *testing.T) {
+ doc := mockDocument{
+ url: "https://example.com/empty",
+ content: "",
+ }
+
+ article, err := Readability(context.Background(), doc)
+ if err != nil {
+ t.Fatalf("Readability() unexpected error = %v", err)
+ }
+ // Empty content should produce an empty article.
+ if article.Title != "" && article.TextContent != "" {
+ t.Error("expected empty article from empty content")
+ }
+}
+
+func TestReadability_InvalidURL(t *testing.T) {
+ doc := mockDocument{
+ url: "://invalid",
+ content: "text
",
+ }
+
+ _, err := Readability(context.Background(), doc)
+ if err == nil {
+ t.Error("Readability() expected error for invalid URL, got nil")
+ }
+}
diff --git a/sites/aislegopher/aislegopher.go b/sites/aislegopher/aislegopher.go
index 9756850..4ef7348 100644
--- a/sites/aislegopher/aislegopher.go
+++ b/sites/aislegopher/aislegopher.go
@@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
- "io"
"net/url"
"strconv"
"strings"
@@ -27,11 +26,6 @@ type Item struct {
Price float64
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
return DefaultConfig.GetItemFromURL(ctx, b, u)
}
@@ -57,7 +51,7 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
res.ID, _ = strconv.Atoi(a[3])
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
if err != nil {
return res, fmt.Errorf("failed to open page: %w", err)
}
diff --git a/sites/aislegopher/aislegopher_test.go b/sites/aislegopher/aislegopher_test.go
new file mode 100644
index 0000000..41c936c
--- /dev/null
+++ b/sites/aislegopher/aislegopher_test.go
@@ -0,0 +1,39 @@
+package aislegopher
+
+import (
+ "context"
+ "net/url"
+ "testing"
+)
+
+func TestGetItemFromURL_InvalidHost(t *testing.T) {
+ u, _ := url.Parse("https://example.com/p/slug/123")
+ _, err := GetItemFromURL(context.Background(), nil, u)
+ if err != ErrInvalidURL {
+ t.Errorf("expected ErrInvalidURL, got %v", err)
+ }
+}
+
+func TestGetItemFromURL_InvalidPath_NoP(t *testing.T) {
+ u, _ := url.Parse("https://aislegopher.com/x/slug/123")
+ _, err := GetItemFromURL(context.Background(), nil, u)
+ if err != ErrInvalidURL {
+ t.Errorf("expected ErrInvalidURL, got %v", err)
+ }
+}
+
+func TestGetItemFromURL_InvalidPath_TooShort(t *testing.T) {
+ u, _ := url.Parse("https://aislegopher.com/p/slug")
+ _, err := GetItemFromURL(context.Background(), nil, u)
+ if err != ErrInvalidURL {
+ t.Errorf("expected ErrInvalidURL, got %v", err)
+ }
+}
+
+func TestGetItemFromURL_InvalidPath_TooLong(t *testing.T) {
+ u, _ := url.Parse("https://aislegopher.com/p/slug/123/extra")
+ _, err := GetItemFromURL(context.Background(), nil, u)
+ if err != ErrInvalidURL {
+ t.Errorf("expected ErrInvalidURL, got %v", err)
+ }
+}
diff --git a/sites/aislegopher/cmd/aislegopher/aislegopher.go b/sites/aislegopher/cmd/aislegopher/aislegopher.go
index c14127b..9724605 100644
--- a/sites/aislegopher/cmd/aislegopher/aislegopher.go
+++ b/sites/aislegopher/cmd/aislegopher/aislegopher.go
@@ -3,10 +3,10 @@ package main
import (
"context"
"fmt"
- "io"
"net/url"
"os"
+ "gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/aislegopher"
"github.com/urfave/cli/v3"
@@ -22,11 +22,6 @@ func (f AisleGopherFlags) ToConfig(_ *cli.Command) aislegopher.Config {
return res
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
func main() {
var flags []cli.Flag
flags = append(flags, browser.Flags...)
@@ -44,7 +39,7 @@ func main() {
return fmt.Errorf("failed to create browser: %w", err)
}
- defer deferClose(b)
+ defer extractor.DeferClose(b)
arg := c.Args().First()
diff --git a/sites/archive/archive.go b/sites/archive/archive.go
index cdf8c66..5d2ded9 100644
--- a/sites/archive/archive.go
+++ b/sites/archive/archive.go
@@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
- "io"
"log/slog"
"net/url"
"strings"
@@ -39,12 +38,6 @@ func (c Config) validate() Config {
var DefaultConfig = Config{}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
// IsArchived checks if a url is archived. It returns the archived url if it is archived, or an empty string if it is not.
func (c Config) IsArchived(ctx context.Context, b extractor.Browser, target string) (extractor.Document, error) {
c = c.validate()
diff --git a/sites/archive/archive_test.go b/sites/archive/archive_test.go
new file mode 100644
index 0000000..d219530
--- /dev/null
+++ b/sites/archive/archive_test.go
@@ -0,0 +1,37 @@
+package archive
+
+import (
+ "testing"
+ "time"
+)
+
+func TestConfig_Validate_Defaults(t *testing.T) {
+ c := Config{}
+ c = c.validate()
+
+ if c.Endpoint != "https://archive.ph" {
+ t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.ph")
+ }
+ if c.Timeout == nil {
+ t.Fatal("Timeout should not be nil after validate")
+ }
+ if *c.Timeout != 1*time.Hour {
+ t.Errorf("Timeout = %v, want %v", *c.Timeout, 1*time.Hour)
+ }
+}
+
+func TestConfig_Validate_Preserves(t *testing.T) {
+ timeout := 5 * time.Minute
+ c := Config{
+ Endpoint: "https://archive.org",
+ Timeout: &timeout,
+ }
+ c = c.validate()
+
+ if c.Endpoint != "https://archive.org" {
+ t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.org")
+ }
+ if *c.Timeout != 5*time.Minute {
+ t.Errorf("Timeout = %v, want %v", *c.Timeout, 5*time.Minute)
+ }
+}
diff --git a/sites/duckduckgo/cmd/duckduckgo/main.go b/sites/duckduckgo/cmd/duckduckgo/main.go
index f332b9f..cd3ca25 100644
--- a/sites/duckduckgo/cmd/duckduckgo/main.go
+++ b/sites/duckduckgo/cmd/duckduckgo/main.go
@@ -3,12 +3,13 @@ package main
import (
"context"
"fmt"
- "github.com/urfave/cli/v3"
- "io"
"os"
"strings"
"time"
+ "github.com/urfave/cli/v3"
+
+ "gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
)
@@ -49,12 +50,6 @@ func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) (duckduckgo.Config, error) {
return res, nil
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func main() {
var flags []cli.Flag
@@ -78,7 +73,7 @@ func main() {
}
b, err := browser.FromCommand(ctx, command)
- defer deferClose(b)
+ defer extractor.DeferClose(b)
if err != nil {
return fmt.Errorf("failed to create browser: %w", err)
@@ -89,7 +84,7 @@ func main() {
return fmt.Errorf("failed to open search: %w", err)
}
- defer deferClose(search)
+ defer extractor.DeferClose(search)
res := search.GetResults()
fmt.Println("Results:", res)
diff --git a/sites/duckduckgo/duckduckgo.go b/sites/duckduckgo/duckduckgo.go
index bebe820..820bd87 100644
--- a/sites/duckduckgo/duckduckgo.go
+++ b/sites/duckduckgo/duckduckgo.go
@@ -3,7 +3,6 @@ package duckduckgo
import (
"context"
"fmt"
- "io"
"log/slog"
"net/url"
@@ -71,12 +70,6 @@ type Result struct {
Description string
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
u := c.ToSearchURL(query)
@@ -97,7 +90,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
if err != nil {
return nil, fmt.Errorf("failed to open url: %w", err)
diff --git a/sites/duckduckgo/duckduckgo_test.go b/sites/duckduckgo/duckduckgo_test.go
index 0733b9e..02e4254 100644
--- a/sites/duckduckgo/duckduckgo_test.go
+++ b/sites/duckduckgo/duckduckgo_test.go
@@ -83,3 +83,34 @@ func TestConfig_ToSearchURL_NoRegion(t *testing.T) {
t.Errorf("kl should be empty when no region, got %q", u.Query().Get("kl"))
}
}
+
+func TestConfig_ToSearchURL_Scheme(t *testing.T) {
+ c := Config{SafeSearch: SafeSearchOff}
+ u := c.ToSearchURL("test")
+
+ if u.Scheme != "https" {
+ t.Errorf("Scheme = %q, want %q", u.Scheme, "https")
+ }
+}
+
+func TestConfig_ToSearchURL_SpecialChars(t *testing.T) {
+ c := Config{SafeSearch: SafeSearchOff}
+ u := c.ToSearchURL("go lang & testing")
+
+ if u.Query().Get("q") != "go lang & testing" {
+ t.Errorf("q = %q, want %q", u.Query().Get("q"), "go lang & testing")
+ }
+}
+
+func TestResult_ZeroValue(t *testing.T) {
+ var r Result
+ if r.URL != "" || r.Title != "" || r.Description != "" {
+ t.Error("zero-value Result should have empty fields")
+ }
+}
+
+func TestDefaultConfig_SafeSearch(t *testing.T) {
+ if DefaultConfig.SafeSearch != SafeSearchOff {
+ t.Errorf("DefaultConfig.SafeSearch = %d, want %d", DefaultConfig.SafeSearch, SafeSearchOff)
+ }
+}
diff --git a/sites/google/cmd/google/main.go b/sites/google/cmd/google/main.go
index d195b53..5df404a 100644
--- a/sites/google/cmd/google/main.go
+++ b/sites/google/cmd/google/main.go
@@ -3,12 +3,12 @@ package main
import (
"context"
"fmt"
- "io"
"os"
"strings"
"github.com/urfave/cli/v3"
+ "gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/google"
)
@@ -42,12 +42,6 @@ func (f GoogleFlags) ToConfig(_ context.Context, cmd *cli.Command) google.Config
return c
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func main() {
var flags []cli.Flag
@@ -67,7 +61,7 @@ func main() {
b, err := browser.FromCommand(ctx, cli)
- defer deferClose(b)
+ defer extractor.DeferClose(b)
if err != nil {
return err
diff --git a/sites/google/google.go b/sites/google/google.go
index d592e23..d31e688 100644
--- a/sites/google/google.go
+++ b/sites/google/google.go
@@ -3,7 +3,6 @@ package google
import (
"context"
"fmt"
- "io"
"net/url"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
@@ -48,12 +47,6 @@ type Result struct {
Description string
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
c = c.validate()
@@ -99,7 +92,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
return nil, fmt.Errorf("failed to open url: %w", err)
}
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
var res []Result
diff --git a/sites/megamillions/megamillions.go b/sites/megamillions/megamillions.go
index fa92cf4..173bb7e 100644
--- a/sites/megamillions/megamillions.go
+++ b/sites/megamillions/megamillions.go
@@ -3,7 +3,6 @@ package megamillions
import (
"context"
"fmt"
- "io"
"strconv"
"strings"
"time"
@@ -33,12 +32,6 @@ type NextDrawing struct {
Jackpot currency.Amount
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func netTicksToTime(t int64) time.Time {
return time.Unix(0, t*100).Add(-621355968000000000)
}
@@ -218,7 +211,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
return nil, nil, err
}
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
d, err := getDrawing(ctx, doc)
diff --git a/sites/megamillions/megamillions_test.go b/sites/megamillions/megamillions_test.go
index a6d6ba8..673595b 100644
--- a/sites/megamillions/megamillions_test.go
+++ b/sites/megamillions/megamillions_test.go
@@ -41,3 +41,33 @@ func TestNetTicksToTime_DifferenceIsCorrect(t *testing.T) {
t.Errorf("expected 1 second difference, got %v", diff)
}
}
+
+func TestNetTicksToTime_NotZero(t *testing.T) {
+ // Verify the function produces a non-zero time for typical ticks values.
+ ticks := int64(638396256000000000)
+ result := netTicksToTime(ticks)
+
+ if result.IsZero() {
+ t.Error("netTicksToTime should not return zero time for valid ticks")
+ }
+}
+
+func TestConfig_Validate(t *testing.T) {
+ c := Config{}
+ c = c.validate()
+ _ = c // validate is a no-op, just verify no panic
+}
+
+func TestDrawing_ZeroValue(t *testing.T) {
+ var d Drawing
+ if d.MegaBall != 0 || d.Megaplier != 0 {
+ t.Error("zero-value Drawing should have zero fields")
+ }
+}
+
+func TestNextDrawing_ZeroValue(t *testing.T) {
+ var nd NextDrawing
+ if nd.Date != "" {
+ t.Error("zero-value NextDrawing should have empty date")
+ }
+}
diff --git a/sites/powerball/powerball.go b/sites/powerball/powerball.go
index 6951e03..4b3f9c7 100644
--- a/sites/powerball/powerball.go
+++ b/sites/powerball/powerball.go
@@ -3,7 +3,6 @@ package powerball
import (
"context"
"fmt"
- "io"
"strconv"
"strings"
"time"
@@ -32,12 +31,6 @@ type NextDrawing struct {
JackpotDollars int
}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) {
var drawing Drawing
@@ -196,7 +189,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
return nil, nil, err
}
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
d, err := getDrawing(ctx, doc)
diff --git a/sites/powerball/powerball_test.go b/sites/powerball/powerball_test.go
new file mode 100644
index 0000000..214e046
--- /dev/null
+++ b/sites/powerball/powerball_test.go
@@ -0,0 +1,34 @@
+package powerball
+
+import "testing"
+
+func TestConfig_Validate(t *testing.T) {
+ c := Config{}
+ c = c.validate()
+ // validate is a no-op for powerball Config, just verify it doesn't panic.
+ _ = c
+}
+
+func TestDefaultConfig(t *testing.T) {
+ c := DefaultConfig
+ _ = c
+}
+
+func TestDrawing_ZeroValue(t *testing.T) {
+ var d Drawing
+ if d.PowerBall != 0 || d.PowerPlay != 0 {
+ t.Error("zero-value Drawing should have zero fields")
+ }
+ for i, n := range d.Numbers {
+ if n != 0 {
+ t.Errorf("Numbers[%d] = %d, want 0", i, n)
+ }
+ }
+}
+
+func TestNextDrawing_ZeroValue(t *testing.T) {
+ var nd NextDrawing
+ if nd.Date != "" || nd.JackpotDollars != 0 {
+ t.Error("zero-value NextDrawing should have empty/zero fields")
+ }
+}
diff --git a/sites/useragents/useragents.go b/sites/useragents/useragents.go
index 696b4e7..6d04ae0 100644
--- a/sites/useragents/useragents.go
+++ b/sites/useragents/useragents.go
@@ -4,8 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
- "io"
-
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
@@ -13,12 +11,6 @@ type Config struct{}
var DefaultConfig = Config{}
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b)
}
@@ -30,7 +22,7 @@ func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.B
return "", fmt.Errorf("failed to open useragents.me: %w", err)
}
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)")
text := ""
diff --git a/sites/useragents/useragents_test.go b/sites/useragents/useragents_test.go
new file mode 100644
index 0000000..2d151bd
--- /dev/null
+++ b/sites/useragents/useragents_test.go
@@ -0,0 +1,9 @@
+package useragents
+
+import "testing"
+
+func TestDefaultConfig(t *testing.T) {
+ // DefaultConfig should be a zero-value Config.
+ c := DefaultConfig
+ _ = c // Just verify it exists and is usable.
+}
diff --git a/sites/wegmans/cmd/wegmans/main.go b/sites/wegmans/cmd/wegmans/main.go
index 7b26b26..d9a2a19 100644
--- a/sites/wegmans/cmd/wegmans/main.go
+++ b/sites/wegmans/cmd/wegmans/main.go
@@ -3,10 +3,10 @@ package main
import (
"context"
"fmt"
- "io"
"net/url"
"os"
+ "gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"github.com/urfave/cli/v3"
@@ -14,12 +14,6 @@ import (
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/wegmans"
)
-func deferClose(cl io.Closer) {
- if cl != nil {
- _ = cl.Close()
- }
-}
-
type WegmansFlags []cli.Flag
var Flags = WegmansFlags{}
@@ -44,7 +38,7 @@ func main() {
cfg := Flags.ToConfig(cmd)
b, err := browser.FromCommand(ctx, cmd)
- defer deferClose(b)
+ defer extractor.DeferClose(b)
if err != nil {
return fmt.Errorf("error creating browser: %w", err)
diff --git a/sites/wegmans/wegmans.go b/sites/wegmans/wegmans.go
index e70c63b..8959233 100644
--- a/sites/wegmans/wegmans.go
+++ b/sites/wegmans/wegmans.go
@@ -3,7 +3,6 @@ package wegmans
import (
"context"
"errors"
- "io"
"log/slog"
"net/url"
"strconv"
@@ -30,12 +29,6 @@ type Item struct {
Unit string
}
-func deferClose(c io.Closer) {
- if c != nil {
- _ = c.Close()
- }
-}
-
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
if b == nil {
@@ -68,7 +61,7 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
- defer deferClose(doc)
+ defer extractor.DeferClose(doc)
if err != nil {
return Item{}, err
diff --git a/sites/wegmans/wegmans_test.go b/sites/wegmans/wegmans_test.go
new file mode 100644
index 0000000..9a8ee05
--- /dev/null
+++ b/sites/wegmans/wegmans_test.go
@@ -0,0 +1,39 @@
+package wegmans
+
+import (
+ "context"
+ "net/url"
+ "testing"
+)
+
+func TestGetItemPrice_NilBrowser(t *testing.T) {
+ u, _ := url.Parse("https://shop.wegmans.com/product/24921")
+ _, err := DefaultConfig.GetItemPrice(context.Background(), nil, u)
+ if err != ErrNilBrowser {
+ t.Errorf("expected ErrNilBrowser, got %v", err)
+ }
+}
+
+func TestGetItemPrice_NilURL(t *testing.T) {
+ // NilBrowser check comes before NilURL, so we can't test NilURL
+ // independently without a real browser. Verify the error sentinel exists.
+ if ErrNilURL.Error() != "url is nil" {
+ t.Errorf("ErrNilURL = %q, want %q", ErrNilURL.Error(), "url is nil")
+ }
+}
+
+func TestGetItemPrice_ErrorSentinels(t *testing.T) {
+ if ErrInvalidURL.Error() != "invalid url" {
+ t.Errorf("ErrInvalidURL = %q, want %q", ErrInvalidURL.Error(), "invalid url")
+ }
+ if ErrNilBrowser.Error() != "browser is nil" {
+ t.Errorf("ErrNilBrowser = %q, want %q", ErrNilBrowser.Error(), "browser is nil")
+ }
+}
+
+func TestItem_ZeroValue(t *testing.T) {
+ var item Item
+ if item.ID != 0 || item.Name != "" || item.Price != 0 || item.UnitPrice != 0 || item.Unit != "" {
+ t.Error("zero-value Item should have empty/zero fields")
+ }
+}