Files
go-extractor/sites/wegmans/wegmans.go
Steve Dudenhoeffer a9711ce904
All checks were successful
CI / vet (pull_request) Successful in 1m10s
CI / build (pull_request) Successful in 1m21s
CI / test (pull_request) Successful in 1m28s
fix: surface parsing errors instead of silently discarding them
Return errors for required fields (ID, price) and log warnings for
optional fields (title, description, unit price) across all site
extractors instead of silently discarding them with _ =.

Closes #24
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:31:56 +00:00

146 lines
3.5 KiB
Go

package wegmans
import (
"context"
"errors"
"fmt"
"log/slog"
"net/url"
"strconv"
"strings"
"time"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type Config struct {
}
var DefaultConfig = Config{}
var ErrNilBrowser = errors.New("browser is nil")
var ErrNilURL = errors.New("url is nil")
var ErrInvalidURL = errors.New("invalid url")
type Item struct {
ID int
Name string
Price float64
UnitPrice float64
Unit string
}
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
if b == nil {
return Item{}, ErrNilBrowser
}
if u == nil {
return Item{}, ErrNilURL
}
// urls in the format of:
// https://shop.wegmans.com/product/24921[/wegmans-frozen-thin-crust-uncured-pepperoni-pizza]
// (the slug is optional)
// get the product ID
a := strings.Split(u.Path, "/")
if len(a) < 3 {
return Item{}, ErrInvalidURL
}
if a[1] != "product" {
return Item{}, ErrInvalidURL
}
id, err := strconv.Atoi(a[2])
if err != nil {
return Item{}, fmt.Errorf("failed to parse product ID %q: %w", a[2], err)
}
if id == 0 {
return Item{}, ErrInvalidURL
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
return Item{}, err
}
defer extractor.DeferClose(doc)
timeout := 15 * time.Second
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
slog.Warn("WaitForNetworkIdle failed", "err", err)
}
res := Item{
ID: id,
}
titles := doc.Select("h1[data-testid]")
if len(titles) != 0 {
res.Name, err = titles[0].Text()
if err != nil {
slog.Warn("failed to get product name", "err", err)
}
}
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
slog.Info("prices", "len", len(prices))
if len(prices) != 0 {
priceStr, err := prices[0].Text()
if err != nil {
return res, fmt.Errorf("failed to get price text: %w", err)
}
slog.Info("price", "0", prices[0], "text", priceStr)
priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.ReplaceAll(priceStr, ",", "")
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
priceStr = strings.Split(priceStr, "/")[0]
price, err := strconv.ParseFloat(priceStr, 64)
if err != nil {
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
}
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
res.Price = price
}
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
if len(unitPrices) != 0 {
unitPriceStr, err := unitPrices[0].Text()
if err != nil {
slog.Warn("failed to get unit price text", "err", err)
} else {
unitPriceStr = strings.TrimSpace(unitPriceStr)
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
units := strings.Split(unitPriceStr, "/")
if len(units) > 1 {
res.Unit = strings.TrimSpace(units[1])
res.UnitPrice, err = strconv.ParseFloat(units[0], 64)
if err != nil {
slog.Warn("failed to parse unit price", "text", units[0], "err", err)
}
// the unit might be like "lb.", so if it ends in a period, then just strip it off
if strings.HasSuffix(res.Unit, ".") {
res.Unit = strings.TrimSuffix(res.Unit, ".")
}
}
}
}
slog.Info("res", "res", res)
return res, nil
}