Files
go-extractor/sites/wegmans/wegmans.go
Steve Dudenhoeffer cb2ed10cfd
Some checks failed
CI / build (push) Failing after 2m4s
CI / test (push) Failing after 2m6s
CI / vet (push) Failing after 2m19s
refactor: restructure API, deduplicate code, expand test coverage
- Extract shared DeferClose helper, removing 14 duplicate copies
- Rename PlayWright-prefixed types to cleaner names (BrowserOptions,
  BrowserSelection, NewBrowser, etc.)
- Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure)
- Extract shared initBrowser/mergeOptions into browser_init.go,
  deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser
- Remove unused locator field from document struct
- Add tests for all previously untested packages (archive, aislegopher,
  wegmans, useragents, powerball) and expand existing test suites
- Add MIGRATION.md documenting all breaking API changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 13:59:47 -05:00

125 lines
2.8 KiB
Go

package wegmans
import (
"context"
"errors"
"log/slog"
"net/url"
"strconv"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type Config struct {
}
var DefaultConfig = Config{}
var ErrNilBrowser = errors.New("browser is nil")
var ErrNilURL = errors.New("url is nil")
var ErrInvalidURL = errors.New("invalid url")
type Item struct {
ID int
Name string
Price float64
UnitPrice float64
Unit string
}
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
if b == nil {
return Item{}, ErrNilBrowser
}
if u == nil {
return Item{}, ErrNilURL
}
// urls in the format of:
// https://shop.wegmans.com/product/24921[/wegmans-frozen-thin-crust-uncured-pepperoni-pizza]
// (the slug is optional)
// get the product ID
a := strings.Split(u.Path, "/")
if len(a) < 3 {
return Item{}, ErrInvalidURL
}
if a[1] != "product" {
return Item{}, ErrInvalidURL
}
id, _ := strconv.Atoi(a[2])
if id == 0 {
return Item{}, ErrInvalidURL
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
defer extractor.DeferClose(doc)
if err != nil {
return Item{}, err
}
timeout := 15 * time.Second
_ = doc.WaitForNetworkIdle(&timeout)
res := Item{
ID: id,
}
titles := doc.Select("h1[data-testid]")
if len(titles) != 0 {
res.Name, _ = titles[0].Text()
}
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
slog.Info("prices", "len", len(prices))
if len(prices) != 0 {
priceStr, _ := prices[0].Text()
slog.Info("price", "0", prices[0], "text", priceStr)
priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.ReplaceAll(priceStr, ",", "")
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
priceStr = strings.Split(priceStr, "/")[0]
price, _ := strconv.ParseFloat(priceStr, 64)
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
res.Price = price
}
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
if len(unitPrices) != 0 {
unitPriceStr, _ := unitPrices[0].Text()
unitPriceStr = strings.TrimSpace(unitPriceStr)
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
units := strings.Split(unitPriceStr, "/")
if len(units) > 1 {
res.Unit = strings.TrimSpace(units[1])
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
// the unit might be like "lb.", so if it ends in a period, then just strip it off
if strings.HasSuffix(res.Unit, ".") {
res.Unit = strings.TrimSuffix(res.Unit, ".")
}
}
}
slog.Info("res", "res", res)
return res, nil
}