From a9711ce904c5b641852828be821510b05e8fd797 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sun, 15 Feb 2026 16:31:56 +0000 Subject: [PATCH] fix: surface parsing errors instead of silently discarding them Return errors for required fields (ID, price) and log warnings for optional fields (title, description, unit price) across all site extractors instead of silently discarding them with _ =. Closes #24 Co-Authored-By: Claude Opus 4.6 --- sites/aislegopher/aislegopher.go | 22 +++++++++--- sites/duckduckgo/page.go | 13 +++++-- sites/google/google.go | 11 ++++-- sites/wegmans/wegmans.go | 58 ++++++++++++++++++++++---------- 4 files changed, 77 insertions(+), 27 deletions(-) diff --git a/sites/aislegopher/aislegopher.go b/sites/aislegopher/aislegopher.go index aa128af..4b07390 100644 --- a/sites/aislegopher/aislegopher.go +++ b/sites/aislegopher/aislegopher.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net/url" "strconv" "strings" @@ -48,7 +49,11 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url. return res, ErrInvalidURL } - res.ID, _ = strconv.Atoi(a[3]) + var err error + res.ID, err = strconv.Atoi(a[3]) + if err != nil { + return res, fmt.Errorf("failed to parse product ID %q: %w", a[3], err) + } doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{}) if err != nil { @@ -59,16 +64,25 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url. names := doc.Select(".h4") if len(names) > 0 { - res.Name, _ = names[0].Text() + res.Name, err = names[0].Text() + if err != nil { + slog.Warn("failed to get product name", "err", err) + } } prices := doc.Select(".h2") if len(prices) > 0 { - priceStr, _ := prices[0].Text() + priceStr, err := prices[0].Text() + if err != nil { + return res, fmt.Errorf("failed to get price text: %w", err) + } priceStr = strings.ReplaceAll(priceStr, "$", "") priceStr = strings.TrimSpace(priceStr) - res.Price, _ = strconv.ParseFloat(priceStr, 64) + res.Price, err = strconv.ParseFloat(priceStr, 64) + if err != nil { + return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err) + } } return res, nil diff --git a/sites/duckduckgo/page.go b/sites/duckduckgo/page.go index a1e0df4..88f2bae 100644 --- a/sites/duckduckgo/page.go +++ b/sites/duckduckgo/page.go @@ -2,9 +2,10 @@ package duckduckgo import ( "fmt" - "gitea.stevedudenhoeffer.com/steve/go-extractor" "io" "log/slog" + + "gitea.stevedudenhoeffer.com/steve/go-extractor" ) type SearchPage interface { @@ -44,13 +45,19 @@ func extractResults(doc extractor.Node) ([]Result, error) { titles := n.Select("h2") if len(titles) != 0 { - r.Title, _ = titles[0].Text() + r.Title, err = titles[0].Text() + if err != nil { + slog.Warn("failed to get result title", "err", err) + } } descriptions := n.Select("span > span") if len(descriptions) != 0 { - r.Description, _ = descriptions[0].Text() + r.Description, err = descriptions[0].Text() + if err != nil { + slog.Warn("failed to get result description", "err", err) + } } res = append(res, r) diff --git a/sites/google/google.go b/sites/google/google.go index d31e688..d49a9e2 100644 --- a/sites/google/google.go +++ b/sites/google/google.go @@ -3,6 +3,7 @@ package google import ( "context" "fmt" + "log/slog" "net/url" "gitea.stevedudenhoeffer.com/steve/go-extractor" @@ -117,13 +118,19 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ( titles := s.Select("div > div > div a > h3") if len(titles) != 0 { - title, _ = titles[0].Text() + title, err = titles[0].Text() + if err != nil { + slog.Warn("failed to get result title", "err", err) + } } descs := s.Select("div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:not([class])") if len(descs) != 0 { - desc, _ = descs[0].Text() + desc, err = descs[0].Text() + if err != nil { + slog.Warn("failed to get result description", "err", err) + } } res = append(res, Result{ diff --git a/sites/wegmans/wegmans.go b/sites/wegmans/wegmans.go index 95a6900..2401b15 100644 --- a/sites/wegmans/wegmans.go +++ b/sites/wegmans/wegmans.go @@ -3,6 +3,7 @@ package wegmans import ( "context" "errors" + "fmt" "log/slog" "net/url" "strconv" @@ -54,7 +55,10 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR return Item{}, ErrInvalidURL } - id, _ := strconv.Atoi(a[2]) + id, err := strconv.Atoi(a[2]) + if err != nil { + return Item{}, fmt.Errorf("failed to parse product ID %q: %w", a[2], err) + } if id == 0 { return Item{}, ErrInvalidURL @@ -67,7 +71,9 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR defer extractor.DeferClose(doc) timeout := 15 * time.Second - _ = doc.WaitForNetworkIdle(&timeout) + if err := doc.WaitForNetworkIdle(&timeout); err != nil { + slog.Warn("WaitForNetworkIdle failed", "err", err) + } res := Item{ ID: id, @@ -76,20 +82,29 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR titles := doc.Select("h1[data-testid]") if len(titles) != 0 { - res.Name, _ = titles[0].Text() + res.Name, err = titles[0].Text() + if err != nil { + slog.Warn("failed to get product name", "err", err) + } } prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)") slog.Info("prices", "len", len(prices)) if len(prices) != 0 { - priceStr, _ := prices[0].Text() + priceStr, err := prices[0].Text() + if err != nil { + return res, fmt.Errorf("failed to get price text: %w", err) + } slog.Info("price", "0", prices[0], "text", priceStr) priceStr = strings.ReplaceAll(priceStr, "$", "") priceStr = strings.ReplaceAll(priceStr, ",", "") // if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off priceStr = strings.Split(priceStr, "/")[0] - price, _ := strconv.ParseFloat(priceStr, 64) + price, err := strconv.ParseFloat(priceStr, 64) + if err != nil { + return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err) + } slog.Info("price", "0", prices[0], "text", priceStr, "price", price) res.Price = price } @@ -97,22 +112,29 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`) if len(unitPrices) != 0 { - unitPriceStr, _ := unitPrices[0].Text() - unitPriceStr = strings.TrimSpace(unitPriceStr) - unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "") - unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "") - unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "") - unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "") + unitPriceStr, err := unitPrices[0].Text() + if err != nil { + slog.Warn("failed to get unit price text", "err", err) + } else { + unitPriceStr = strings.TrimSpace(unitPriceStr) + unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "") + unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "") + unitPriceStr = strings.ReplaceAll(unitPriceStr, "$", "") + unitPriceStr = strings.ReplaceAll(unitPriceStr, ",", "") - units := strings.Split(unitPriceStr, "/") + units := strings.Split(unitPriceStr, "/") - if len(units) > 1 { - res.Unit = strings.TrimSpace(units[1]) - res.UnitPrice, _ = strconv.ParseFloat(units[0], 64) + if len(units) > 1 { + res.Unit = strings.TrimSpace(units[1]) + res.UnitPrice, err = strconv.ParseFloat(units[0], 64) + if err != nil { + slog.Warn("failed to parse unit price", "text", units[0], "err", err) + } - // the unit might be like "lb.", so if it ends in a period, then just strip it off - if strings.HasSuffix(res.Unit, ".") { - res.Unit = strings.TrimSuffix(res.Unit, ".") + // the unit might be like "lb.", so if it ends in a period, then just strip it off + if strings.HasSuffix(res.Unit, ".") { + res.Unit = strings.TrimSuffix(res.Unit, ".") + } } } } -- 2.49.1