Steve Dudenhoeffer
81ea656332
This update enhances the `Item` structure to include `UnitPrice` and `Unit` fields. Additional logic is implemented to extract and parse unit pricing details from the HTML, improving data accuracy and granularity.
118 lines
2.3 KiB
Go
118 lines
2.3 KiB
Go
package wegmans
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
)
|
|
|
|
type Config struct {
|
|
}
|
|
|
|
var DefaultConfig = Config{}
|
|
|
|
var ErrNilBrowser = errors.New("browser is nil")
|
|
var ErrNilURL = errors.New("url is nil")
|
|
var ErrInvalidURL = errors.New("invalid url")
|
|
|
|
type Item struct {
|
|
ID int
|
|
Name string
|
|
Price float64
|
|
UnitPrice float64
|
|
Unit string
|
|
}
|
|
|
|
func deferClose(c io.Closer) {
|
|
if c != nil {
|
|
_ = c.Close()
|
|
}
|
|
}
|
|
|
|
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
if b == nil {
|
|
return Item{}, ErrNilBrowser
|
|
}
|
|
|
|
if u == nil {
|
|
return Item{}, ErrNilURL
|
|
}
|
|
|
|
// urls in the format of:
|
|
// https://shop.wegmans.com/product/24921[/wegmans-frozen-thin-crust-uncured-pepperoni-pizza]
|
|
// (the slug is optional)
|
|
|
|
// get the product ID
|
|
a := strings.Split(u.Path, "/")
|
|
|
|
if len(a) < 3 {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
if a[1] != "product" {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
id, _ := strconv.Atoi(a[2])
|
|
|
|
if id == 0 {
|
|
return Item{}, ErrInvalidURL
|
|
}
|
|
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
defer deferClose(doc)
|
|
|
|
if err != nil {
|
|
return Item{}, err
|
|
}
|
|
|
|
timeout := 15 * time.Second
|
|
_ = doc.WaitForNetworkIdle(&timeout)
|
|
|
|
res := Item{
|
|
ID: id,
|
|
}
|
|
|
|
titles := doc.Select("h1[data-test]")
|
|
|
|
if len(titles) != 0 {
|
|
res.Name, _ = titles[0].Text()
|
|
}
|
|
|
|
prices := doc.Select("span[data-test=\"amount\"] span:nth-child(1)")
|
|
|
|
if len(prices) != 0 {
|
|
priceStr, _ := prices[0].Text()
|
|
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
|
priceStr = strings.ReplaceAll(priceStr, ",", "")
|
|
price, _ := strconv.ParseFloat(priceStr, 64)
|
|
res.Price = price
|
|
}
|
|
|
|
unitPrices := doc.Select(`span[data-test="per-unit-price"]`)
|
|
|
|
if len(unitPrices) != 0 {
|
|
unitPriceStr, _ := unitPrices[0].Text()
|
|
unitPriceStr = strings.TrimSpace(unitPriceStr)
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
|
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
|
|
|
|
units := strings.Split(unitPriceStr, "/")
|
|
|
|
if len(units) > 1 {
|
|
res.Unit = strings.TrimSpace(units[1])
|
|
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|