fix: surface parsing errors instead of silently discarding them
All checks were successful
CI / vet (pull_request) Successful in 1m10s
CI / build (pull_request) Successful in 1m21s
CI / test (pull_request) Successful in 1m28s

Return errors for required fields (ID, price) and log warnings for
optional fields (title, description, unit price) across all site
extractors instead of silently discarding them with _ =.

Closes #24
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 16:31:56 +00:00
parent 7f24e97131
commit a9711ce904
4 changed files with 77 additions and 27 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net/url"
"strconv"
"strings"
@@ -48,7 +49,11 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
return res, ErrInvalidURL
}
res.ID, _ = strconv.Atoi(a[3])
var err error
res.ID, err = strconv.Atoi(a[3])
if err != nil {
return res, fmt.Errorf("failed to parse product ID %q: %w", a[3], err)
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
@@ -59,16 +64,25 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
names := doc.Select(".h4")
if len(names) > 0 {
res.Name, _ = names[0].Text()
res.Name, err = names[0].Text()
if err != nil {
slog.Warn("failed to get product name", "err", err)
}
}
prices := doc.Select(".h2")
if len(prices) > 0 {
priceStr, _ := prices[0].Text()
priceStr, err := prices[0].Text()
if err != nil {
return res, fmt.Errorf("failed to get price text: %w", err)
}
priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.TrimSpace(priceStr)
res.Price, _ = strconv.ParseFloat(priceStr, 64)
res.Price, err = strconv.ParseFloat(priceStr, 64)
if err != nil {
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
}
}
return res, nil