fix: surface parsing errors instead of silently discarding them
All checks were successful
CI / vet (pull_request) Successful in 1m10s
CI / build (pull_request) Successful in 1m21s
CI / test (pull_request) Successful in 1m28s

Return errors for required fields (ID, price) and log warnings for
optional fields (title, description, unit price) across all site
extractors instead of silently discarding them with _ =.

Closes #24
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 16:31:56 +00:00
parent 7f24e97131
commit a9711ce904
4 changed files with 77 additions and 27 deletions

View File

@@ -3,6 +3,7 @@ package wegmans
import (
"context"
"errors"
"fmt"
"log/slog"
"net/url"
"strconv"
@@ -54,7 +55,10 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
return Item{}, ErrInvalidURL
}
id, _ := strconv.Atoi(a[2])
id, err := strconv.Atoi(a[2])
if err != nil {
return Item{}, fmt.Errorf("failed to parse product ID %q: %w", a[2], err)
}
if id == 0 {
return Item{}, ErrInvalidURL
@@ -67,7 +71,9 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
defer extractor.DeferClose(doc)
timeout := 15 * time.Second
_ = doc.WaitForNetworkIdle(&timeout)
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
slog.Warn("WaitForNetworkIdle failed", "err", err)
}
res := Item{
ID: id,
@@ -76,20 +82,29 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
titles := doc.Select("h1[data-testid]")
if len(titles) != 0 {
res.Name, _ = titles[0].Text()
res.Name, err = titles[0].Text()
if err != nil {
slog.Warn("failed to get product name", "err", err)
}
}
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
slog.Info("prices", "len", len(prices))
if len(prices) != 0 {
priceStr, _ := prices[0].Text()
priceStr, err := prices[0].Text()
if err != nil {
return res, fmt.Errorf("failed to get price text: %w", err)
}
slog.Info("price", "0", prices[0], "text", priceStr)
priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.ReplaceAll(priceStr, ",", "")
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
priceStr = strings.Split(priceStr, "/")[0]
price, _ := strconv.ParseFloat(priceStr, 64)
price, err := strconv.ParseFloat(priceStr, 64)
if err != nil {
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
}
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
res.Price = price
}
@@ -97,22 +112,29 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
if len(unitPrices) != 0 {
unitPriceStr, _ := unitPrices[0].Text()
unitPriceStr = strings.TrimSpace(unitPriceStr)
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
unitPriceStr, err := unitPrices[0].Text()
if err != nil {
slog.Warn("failed to get unit price text", "err", err)
} else {
unitPriceStr = strings.TrimSpace(unitPriceStr)
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "")
unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "")
units := strings.Split(unitPriceStr, "/")
units := strings.Split(unitPriceStr, "/")
if len(units) > 1 {
res.Unit = strings.TrimSpace(units[1])
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
if len(units) > 1 {
res.Unit = strings.TrimSpace(units[1])
res.UnitPrice, err = strconv.ParseFloat(units[0], 64)
if err != nil {
slog.Warn("failed to parse unit price", "text", units[0], "err", err)
}
// the unit might be like "lb.", so if it ends in a period, then just strip it off
if strings.HasSuffix(res.Unit, ".") {
res.Unit = strings.TrimSuffix(res.Unit, ".")
// the unit might be like "lb.", so if it ends in a period, then just strip it off
if strings.HasSuffix(res.Unit, ".") {
res.Unit = strings.TrimSuffix(res.Unit, ".")
}
}
}
}