Adjusted HTML selectors for improved compatibility and updated price parsing logic to handle additional formats. Added logging to provide better debugging insights during price extraction.
125 lines
2.7 KiB
Go
125 lines
2.7 KiB
Go
package wegmans
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
)
|
|
|
|
type Config struct {
|
|
}
|
|
|
|
var DefaultConfig = Config{}
|
|
|
|
var ErrNilBrowser = errors.New("browser is nil")
|
|
var ErrNilURL = errors.New("url is nil")
|
|
var ErrInvalidURL = errors.New("invalid url")
|
|
|
|
type Item struct {
|
|
ID int
|
|
Name string
|
|
Price float64
|
|
UnitPrice float64
|
|
Unit string
|
|
}
|
|
|
|
func deferClose(c io.Closer) {
|
|
if c != nil {
|
|
_ = c.Close()
|
|
}
|
|
}
|
|
|
|
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
|
|
if b == nil {
|
|
return Item{}, ErrNilBrowser
|
|
}
|
|
|
|
if u == nil {
|
|
return Item{}, ErrNilURL
|
|
}
|
|
|
|
// urls in the format of:
|
|
// https://shop.wegmans.com/product/24921[/wegmans-frozen-thin-crust-uncured-pepperoni-pizza]
|
|
// (the slug is optional)
|
|
|
|
// get the product ID
|
|
a := strings.Split(u.Path, "/")
|
|
|
|
if len(a) < 3 {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
if a[1] != "product" {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
id, _ := strconv.Atoi(a[2])
|
|
|
|
if id == 0 {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
defer deferClose(doc)
|
|
|
|
if err != nil {
|
|
return Item{}, err
|
|
}
|
|
|
|
timeout := 15 * time.Second
|
|
_ = doc.WaitForNetworkIdle(&timeout)
|
|
|
|
res := Item{
|
|
ID: id,
|
|
}
|
|
|
|
titles := doc.Select("h1[data-testid]")
|
|
|
|
if len(titles) != 0 {
|
|
res.Name, _ = titles[0].Text()
|
|
}
|
|
|
|
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
|
|
|
|
slog.Info("prices", "len", len(prices))
|
|
if len(prices) != 0 {
|
|
priceStr, _ := prices[0].Text()
|
|
slog.Info("price", "0", prices[0], "text", priceStr)
|
|
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
|
priceStr = strings.ReplaceAll(priceStr, ",", "")
|
|
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
|
|
priceStr = strings.Split(priceStr, "/")[0]
|
|
price, _ := strconv.ParseFloat(priceStr, 64)
|
|
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
|
|
res.Price = price
|
|
}
|
|
|
|
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
|
|
|
|
if len(unitPrices) != 0 {
|
|
unitPriceStr, _ := unitPrices[0].Text()
|
|
unitPriceStr = strings.TrimSpace(unitPriceStr)
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
|
|
|
|
units := strings.Split(unitPriceStr, "/")
|
|
|
|
if len(units) > 1 {
|
|
res.Unit = strings.TrimSpace(units[1])
|
|
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|