Fix silently ignored parsing errors (#24) #41
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -48,7 +49,11 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
|
|||||||
return res, ErrInvalidURL
|
return res, ErrInvalidURL
|
||||||
}
|
}
|
||||||
|
|
||||||
res.ID, _ = strconv.Atoi(a[3])
|
var err error
|
||||||
|
res.ID, err = strconv.Atoi(a[3])
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("failed to parse product ID %q: %w", a[3], err)
|
||||||
|
}
|
||||||
|
|
||||||
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -59,16 +64,25 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
|
|||||||
names := doc.Select(".h4")
|
names := doc.Select(".h4")
|
||||||
|
|
||||||
if len(names) > 0 {
|
if len(names) > 0 {
|
||||||
res.Name, _ = names[0].Text()
|
res.Name, err = names[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get product name", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
prices := doc.Select(".h2")
|
prices := doc.Select(".h2")
|
||||||
|
|
||||||
if len(prices) > 0 {
|
if len(prices) > 0 {
|
||||||
priceStr, _ := prices[0].Text()
|
priceStr, err := prices[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("failed to get price text: %w", err)
|
||||||
|
}
|
||||||
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
||||||
priceStr = strings.TrimSpace(priceStr)
|
priceStr = strings.TrimSpace(priceStr)
|
||||||
res.Price, _ = strconv.ParseFloat(priceStr, 64)
|
res.Price, err = strconv.ParseFloat(priceStr, 64)
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return res, nil
|
return res, nil
|
||||||
|
|||||||
@@ -2,9 +2,10 @@ package duckduckgo
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SearchPage interface {
|
type SearchPage interface {
|
||||||
@@ -44,13 +45,19 @@ func extractResults(doc extractor.Node) ([]Result, error) {
|
|||||||
titles := n.Select("h2")
|
titles := n.Select("h2")
|
||||||
|
|
||||||
if len(titles) != 0 {
|
if len(titles) != 0 {
|
||||||
r.Title, _ = titles[0].Text()
|
r.Title, err = titles[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get result title", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
descriptions := n.Select("span > span")
|
descriptions := n.Select("span > span")
|
||||||
|
|
||||||
if len(descriptions) != 0 {
|
if len(descriptions) != 0 {
|
||||||
r.Description, _ = descriptions[0].Text()
|
r.Description, err = descriptions[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get result description", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res = append(res, r)
|
res = append(res, r)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package google
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||||
@@ -117,13 +118,19 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) (
|
|||||||
titles := s.Select("div > div > div a > h3")
|
titles := s.Select("div > div > div a > h3")
|
||||||
|
|
||||||
if len(titles) != 0 {
|
if len(titles) != 0 {
|
||||||
title, _ = titles[0].Text()
|
title, err = titles[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get result title", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
descs := s.Select("div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:not([class])")
|
descs := s.Select("div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:not([class])")
|
||||||
|
|
||||||
if len(descs) != 0 {
|
if len(descs) != 0 {
|
||||||
desc, _ = descs[0].Text()
|
desc, err = descs[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get result description", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res = append(res, Result{
|
res = append(res, Result{
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package wegmans
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -54,7 +55,10 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
return Item{}, ErrInvalidURL
|
return Item{}, ErrInvalidURL
|
||||||
}
|
}
|
||||||
|
|
||||||
id, _ := strconv.Atoi(a[2])
|
id, err := strconv.Atoi(a[2])
|
||||||
|
if err != nil {
|
||||||
|
return Item{}, fmt.Errorf("failed to parse product ID %q: %w", a[2], err)
|
||||||
|
}
|
||||||
|
|
||||||
if id == 0 {
|
if id == 0 {
|
||||||
return Item{}, ErrInvalidURL
|
return Item{}, ErrInvalidURL
|
||||||
@@ -67,7 +71,9 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
defer extractor.DeferClose(doc)
|
defer extractor.DeferClose(doc)
|
||||||
|
|
||||||
timeout := 15 * time.Second
|
timeout := 15 * time.Second
|
||||||
_ = doc.WaitForNetworkIdle(&timeout)
|
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
|
||||||
|
slog.Warn("WaitForNetworkIdle failed", "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
res := Item{
|
res := Item{
|
||||||
ID: id,
|
ID: id,
|
||||||
@@ -76,20 +82,29 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
titles := doc.Select("h1[data-testid]")
|
titles := doc.Select("h1[data-testid]")
|
||||||
|
|
||||||
if len(titles) != 0 {
|
if len(titles) != 0 {
|
||||||
res.Name, _ = titles[0].Text()
|
res.Name, err = titles[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get product name", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
|
prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
|
||||||
|
|
||||||
slog.Info("prices", "len", len(prices))
|
slog.Info("prices", "len", len(prices))
|
||||||
if len(prices) != 0 {
|
if len(prices) != 0 {
|
||||||
priceStr, _ := prices[0].Text()
|
priceStr, err := prices[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("failed to get price text: %w", err)
|
||||||
|
}
|
||||||
slog.Info("price", "0", prices[0], "text", priceStr)
|
slog.Info("price", "0", prices[0], "text", priceStr)
|
||||||
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
||||||
priceStr = strings.ReplaceAll(priceStr, ",", "")
|
priceStr = strings.ReplaceAll(priceStr, ",", "")
|
||||||
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
|
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
|
||||||
priceStr = strings.Split(priceStr, "/")[0]
|
priceStr = strings.Split(priceStr, "/")[0]
|
||||||
price, _ := strconv.ParseFloat(priceStr, 64)
|
price, err := strconv.ParseFloat(priceStr, 64)
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
|
||||||
|
}
|
||||||
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
|
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
|
||||||
res.Price = price
|
res.Price = price
|
||||||
}
|
}
|
||||||
@@ -97,7 +112,10 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
|
unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
|
||||||
|
|
||||||
if len(unitPrices) != 0 {
|
if len(unitPrices) != 0 {
|
||||||
unitPriceStr, _ := unitPrices[0].Text()
|
unitPriceStr, err := unitPrices[0].Text()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to get unit price text", "err", err)
|
||||||
|
} else {
|
||||||
unitPriceStr = strings.TrimSpace(unitPriceStr)
|
unitPriceStr = strings.TrimSpace(unitPriceStr)
|
||||||
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, "(", "")
|
||||||
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
|
unitPriceStr = strings.ReplaceAll(unitPriceStr, ")", "")
|
||||||
@@ -108,7 +126,10 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
|
|
||||||
if len(units) > 1 {
|
if len(units) > 1 {
|
||||||
res.Unit = strings.TrimSpace(units[1])
|
res.Unit = strings.TrimSpace(units[1])
|
||||||
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
|
res.UnitPrice, err = strconv.ParseFloat(units[0], 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to parse unit price", "text", units[0], "err", err)
|
||||||
|
}
|
||||||
|
|
||||||
// the unit might be like "lb.", so if it ends in a period, then just strip it off
|
// the unit might be like "lb.", so if it ends in a period, then just strip it off
|
||||||
if strings.HasSuffix(res.Unit, ".") {
|
if strings.HasSuffix(res.Unit, ".") {
|
||||||
@@ -116,6 +137,7 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
slog.Info("res", "res", res)
|
slog.Info("res", "res", res)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user