refactor: restructure API, deduplicate code, expand test coverage
Some checks failed
CI / build (push) Failing after 2m4s
CI / test (push) Failing after 2m6s
CI / vet (push) Failing after 2m19s

- Extract shared DeferClose helper, removing 14 duplicate copies
- Rename PlayWright-prefixed types to cleaner names (BrowserOptions,
  BrowserSelection, NewBrowser, etc.)
- Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure)
- Extract shared initBrowser/mergeOptions into browser_init.go,
  deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser
- Remove unused locator field from document struct
- Add tests for all previously untested packages (archive, aislegopher,
  wegmans, useragents, powerball) and expand existing test suites
- Add MIGRATION.md documenting all breaking API changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 13:59:47 -05:00
parent e7b7e78796
commit cb2ed10cfd
32 changed files with 667 additions and 417 deletions

View File

@@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"io"
"net/url"
"strconv"
"strings"
@@ -27,11 +26,6 @@ type Item struct {
Price float64
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
return DefaultConfig.GetItemFromURL(ctx, b, u)
}
@@ -57,7 +51,7 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
res.ID, _ = strconv.Atoi(a[3])
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
defer deferClose(doc)
defer extractor.DeferClose(doc)
if err != nil {
return res, fmt.Errorf("failed to open page: %w", err)
}

View File

@@ -0,0 +1,39 @@
package aislegopher
import (
"context"
"net/url"
"testing"
)
func TestGetItemFromURL_InvalidHost(t *testing.T) {
u, _ := url.Parse("https://example.com/p/slug/123")
_, err := GetItemFromURL(context.Background(), nil, u)
if err != ErrInvalidURL {
t.Errorf("expected ErrInvalidURL, got %v", err)
}
}
func TestGetItemFromURL_InvalidPath_NoP(t *testing.T) {
u, _ := url.Parse("https://aislegopher.com/x/slug/123")
_, err := GetItemFromURL(context.Background(), nil, u)
if err != ErrInvalidURL {
t.Errorf("expected ErrInvalidURL, got %v", err)
}
}
func TestGetItemFromURL_InvalidPath_TooShort(t *testing.T) {
u, _ := url.Parse("https://aislegopher.com/p/slug")
_, err := GetItemFromURL(context.Background(), nil, u)
if err != ErrInvalidURL {
t.Errorf("expected ErrInvalidURL, got %v", err)
}
}
func TestGetItemFromURL_InvalidPath_TooLong(t *testing.T) {
u, _ := url.Parse("https://aislegopher.com/p/slug/123/extra")
_, err := GetItemFromURL(context.Background(), nil, u)
if err != ErrInvalidURL {
t.Errorf("expected ErrInvalidURL, got %v", err)
}
}

View File

@@ -3,10 +3,10 @@ package main
import (
"context"
"fmt"
"io"
"net/url"
"os"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/aislegopher"
"github.com/urfave/cli/v3"
@@ -22,11 +22,6 @@ func (f AisleGopherFlags) ToConfig(_ *cli.Command) aislegopher.Config {
return res
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func main() {
var flags []cli.Flag
flags = append(flags, browser.Flags...)
@@ -44,7 +39,7 @@ func main() {
return fmt.Errorf("failed to create browser: %w", err)
}
defer deferClose(b)
defer extractor.DeferClose(b)
arg := c.Args().First()

View File

@@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"io"
"log/slog"
"net/url"
"strings"
@@ -39,12 +38,6 @@ func (c Config) validate() Config {
var DefaultConfig = Config{}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
// IsArchived checks if a url is archived. It returns the archived url if it is archived, or an empty string if it is not.
func (c Config) IsArchived(ctx context.Context, b extractor.Browser, target string) (extractor.Document, error) {
c = c.validate()

View File

@@ -0,0 +1,37 @@
package archive
import (
"testing"
"time"
)
func TestConfig_Validate_Defaults(t *testing.T) {
c := Config{}
c = c.validate()
if c.Endpoint != "https://archive.ph" {
t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.ph")
}
if c.Timeout == nil {
t.Fatal("Timeout should not be nil after validate")
}
if *c.Timeout != 1*time.Hour {
t.Errorf("Timeout = %v, want %v", *c.Timeout, 1*time.Hour)
}
}
func TestConfig_Validate_Preserves(t *testing.T) {
timeout := 5 * time.Minute
c := Config{
Endpoint: "https://archive.org",
Timeout: &timeout,
}
c = c.validate()
if c.Endpoint != "https://archive.org" {
t.Errorf("Endpoint = %q, want %q", c.Endpoint, "https://archive.org")
}
if *c.Timeout != 5*time.Minute {
t.Errorf("Timeout = %v, want %v", *c.Timeout, 5*time.Minute)
}
}

View File

@@ -3,12 +3,13 @@ package main
import (
"context"
"fmt"
"github.com/urfave/cli/v3"
"io"
"os"
"strings"
"time"
"github.com/urfave/cli/v3"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
)
@@ -49,12 +50,6 @@ func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) (duckduckgo.Config, error) {
return res, nil
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func main() {
var flags []cli.Flag
@@ -78,7 +73,7 @@ func main() {
}
b, err := browser.FromCommand(ctx, command)
defer deferClose(b)
defer extractor.DeferClose(b)
if err != nil {
return fmt.Errorf("failed to create browser: %w", err)
@@ -89,7 +84,7 @@ func main() {
return fmt.Errorf("failed to open search: %w", err)
}
defer deferClose(search)
defer extractor.DeferClose(search)
res := search.GetResults()
fmt.Println("Results:", res)

View File

@@ -3,7 +3,6 @@ package duckduckgo
import (
"context"
"fmt"
"io"
"log/slog"
"net/url"
@@ -71,12 +70,6 @@ type Result struct {
Description string
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
u := c.ToSearchURL(query)
@@ -97,7 +90,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
defer deferClose(doc)
defer extractor.DeferClose(doc)
if err != nil {
return nil, fmt.Errorf("failed to open url: %w", err)

View File

@@ -83,3 +83,34 @@ func TestConfig_ToSearchURL_NoRegion(t *testing.T) {
t.Errorf("kl should be empty when no region, got %q", u.Query().Get("kl"))
}
}
func TestConfig_ToSearchURL_Scheme(t *testing.T) {
c := Config{SafeSearch: SafeSearchOff}
u := c.ToSearchURL("test")
if u.Scheme != "https" {
t.Errorf("Scheme = %q, want %q", u.Scheme, "https")
}
}
func TestConfig_ToSearchURL_SpecialChars(t *testing.T) {
c := Config{SafeSearch: SafeSearchOff}
u := c.ToSearchURL("go lang & testing")
if u.Query().Get("q") != "go lang & testing" {
t.Errorf("q = %q, want %q", u.Query().Get("q"), "go lang & testing")
}
}
func TestResult_ZeroValue(t *testing.T) {
var r Result
if r.URL != "" || r.Title != "" || r.Description != "" {
t.Error("zero-value Result should have empty fields")
}
}
func TestDefaultConfig_SafeSearch(t *testing.T) {
if DefaultConfig.SafeSearch != SafeSearchOff {
t.Errorf("DefaultConfig.SafeSearch = %d, want %d", DefaultConfig.SafeSearch, SafeSearchOff)
}
}

View File

@@ -3,12 +3,12 @@ package main
import (
"context"
"fmt"
"io"
"os"
"strings"
"github.com/urfave/cli/v3"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/google"
)
@@ -42,12 +42,6 @@ func (f GoogleFlags) ToConfig(_ context.Context, cmd *cli.Command) google.Config
return c
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func main() {
var flags []cli.Flag
@@ -67,7 +61,7 @@ func main() {
b, err := browser.FromCommand(ctx, cli)
defer deferClose(b)
defer extractor.DeferClose(b)
if err != nil {
return err

View File

@@ -3,7 +3,6 @@ package google
import (
"context"
"fmt"
"io"
"net/url"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
@@ -48,12 +47,6 @@ type Result struct {
Description string
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
c = c.validate()
@@ -99,7 +92,7 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
return nil, fmt.Errorf("failed to open url: %w", err)
}
defer deferClose(doc)
defer extractor.DeferClose(doc)
var res []Result

View File

@@ -3,7 +3,6 @@ package megamillions
import (
"context"
"fmt"
"io"
"strconv"
"strings"
"time"
@@ -33,12 +32,6 @@ type NextDrawing struct {
Jackpot currency.Amount
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func netTicksToTime(t int64) time.Time {
return time.Unix(0, t*100).Add(-621355968000000000)
}
@@ -218,7 +211,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
return nil, nil, err
}
defer deferClose(doc)
defer extractor.DeferClose(doc)
d, err := getDrawing(ctx, doc)

View File

@@ -41,3 +41,33 @@ func TestNetTicksToTime_DifferenceIsCorrect(t *testing.T) {
t.Errorf("expected 1 second difference, got %v", diff)
}
}
func TestNetTicksToTime_NotZero(t *testing.T) {
// Verify the function produces a non-zero time for typical ticks values.
ticks := int64(638396256000000000)
result := netTicksToTime(ticks)
if result.IsZero() {
t.Error("netTicksToTime should not return zero time for valid ticks")
}
}
func TestConfig_Validate(t *testing.T) {
c := Config{}
c = c.validate()
_ = c // validate is a no-op, just verify no panic
}
func TestDrawing_ZeroValue(t *testing.T) {
var d Drawing
if d.MegaBall != 0 || d.Megaplier != 0 {
t.Error("zero-value Drawing should have zero fields")
}
}
func TestNextDrawing_ZeroValue(t *testing.T) {
var nd NextDrawing
if nd.Date != "" {
t.Error("zero-value NextDrawing should have empty date")
}
}

View File

@@ -3,7 +3,6 @@ package powerball
import (
"context"
"fmt"
"io"
"strconv"
"strings"
"time"
@@ -32,12 +31,6 @@ type NextDrawing struct {
JackpotDollars int
}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) {
var drawing Drawing
@@ -196,7 +189,7 @@ func (c Config) GetCurrent(ctx context.Context, b extractor.Browser) (*Drawing,
return nil, nil, err
}
defer deferClose(doc)
defer extractor.DeferClose(doc)
d, err := getDrawing(ctx, doc)

View File

@@ -0,0 +1,34 @@
package powerball
import "testing"
func TestConfig_Validate(t *testing.T) {
c := Config{}
c = c.validate()
// validate is a no-op for powerball Config, just verify it doesn't panic.
_ = c
}
func TestDefaultConfig(t *testing.T) {
c := DefaultConfig
_ = c
}
func TestDrawing_ZeroValue(t *testing.T) {
var d Drawing
if d.PowerBall != 0 || d.PowerPlay != 0 {
t.Error("zero-value Drawing should have zero fields")
}
for i, n := range d.Numbers {
if n != 0 {
t.Errorf("Numbers[%d] = %d, want 0", i, n)
}
}
}
func TestNextDrawing_ZeroValue(t *testing.T) {
var nd NextDrawing
if nd.Date != "" || nd.JackpotDollars != 0 {
t.Error("zero-value NextDrawing should have empty/zero fields")
}
}

View File

@@ -4,8 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
@@ -13,12 +11,6 @@ type Config struct{}
var DefaultConfig = Config{}
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
func GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.Browser) (string, error) {
return DefaultConfig.GetMostCommonDesktopUserAgent(ctx, b)
}
@@ -30,7 +22,7 @@ func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.B
return "", fmt.Errorf("failed to open useragents.me: %w", err)
}
defer deferClose(doc)
defer extractor.DeferClose(doc)
s := doc.Select("#most-common-desktop-useragents-json-csv > div:nth-child(1) > textarea:nth-child(4)")
text := ""

View File

@@ -0,0 +1,9 @@
package useragents
import "testing"
func TestDefaultConfig(t *testing.T) {
// DefaultConfig should be a zero-value Config.
c := DefaultConfig
_ = c // Just verify it exists and is usable.
}

View File

@@ -3,10 +3,10 @@ package main
import (
"context"
"fmt"
"io"
"net/url"
"os"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"github.com/urfave/cli/v3"
@@ -14,12 +14,6 @@ import (
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/wegmans"
)
func deferClose(cl io.Closer) {
if cl != nil {
_ = cl.Close()
}
}
type WegmansFlags []cli.Flag
var Flags = WegmansFlags{}
@@ -44,7 +38,7 @@ func main() {
cfg := Flags.ToConfig(cmd)
b, err := browser.FromCommand(ctx, cmd)
defer deferClose(b)
defer extractor.DeferClose(b)
if err != nil {
return fmt.Errorf("error creating browser: %w", err)

View File

@@ -3,7 +3,6 @@ package wegmans
import (
"context"
"errors"
"io"
"log/slog"
"net/url"
"strconv"
@@ -30,12 +29,6 @@ type Item struct {
Unit string
}
func deferClose(c io.Closer) {
if c != nil {
_ = c.Close()
}
}
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
if b == nil {
@@ -68,7 +61,7 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
defer deferClose(doc)
defer extractor.DeferClose(doc)
if err != nil {
return Item{}, err

View File

@@ -0,0 +1,39 @@
package wegmans
import (
"context"
"net/url"
"testing"
)
func TestGetItemPrice_NilBrowser(t *testing.T) {
u, _ := url.Parse("https://shop.wegmans.com/product/24921")
_, err := DefaultConfig.GetItemPrice(context.Background(), nil, u)
if err != ErrNilBrowser {
t.Errorf("expected ErrNilBrowser, got %v", err)
}
}
func TestGetItemPrice_NilURL(t *testing.T) {
// NilBrowser check comes before NilURL, so we can't test NilURL
// independently without a real browser. Verify the error sentinel exists.
if ErrNilURL.Error() != "url is nil" {
t.Errorf("ErrNilURL = %q, want %q", ErrNilURL.Error(), "url is nil")
}
}
func TestGetItemPrice_ErrorSentinels(t *testing.T) {
if ErrInvalidURL.Error() != "invalid url" {
t.Errorf("ErrInvalidURL = %q, want %q", ErrInvalidURL.Error(), "invalid url")
}
if ErrNilBrowser.Error() != "browser is nil" {
t.Errorf("ErrNilBrowser = %q, want %q", ErrNilBrowser.Error(), "browser is nil")
}
}
func TestItem_ZeroValue(t *testing.T) {
var item Item
if item.ID != 0 || item.Name != "" || item.Price != 0 || item.UnitPrice != 0 || item.Unit != "" {
t.Error("zero-value Item should have empty/zero fields")
}
}