Steve Dudenhoeffer
6de455b1bd
Added price field to Item struct in AisleGopher and implemented logic to extract price data. Updated Wegmans parser to validate URL structure by ensuring the second segment is "product". These changes improve data accuracy and error handling.
82 lines
1.5 KiB
Go
82 lines
1.5 KiB
Go
package aislegopher
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
)
|
|
|
|
type Config struct {
|
|
}
|
|
|
|
var DefaultConfig = Config{}
|
|
|
|
var (
|
|
ErrInvalidURL = errors.New("invalid url")
|
|
)
|
|
|
|
type Item struct {
|
|
ID int
|
|
Name string
|
|
Price float64
|
|
}
|
|
|
|
func deferClose(cl io.Closer) {
|
|
if cl != nil {
|
|
_ = cl.Close()
|
|
}
|
|
}
|
|
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
return DefaultConfig.GetItemFromURL(ctx, b, u)
|
|
}
|
|
|
|
func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
res := Item{}
|
|
|
|
// the url will be in the format of aislegopher.com/p/slug/id
|
|
// we need to parse the slug and id from the url
|
|
a := strings.Split(u.Path, "/")
|
|
if len(a) != 4 {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
if a[1] != "p" {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
if u.Host != "aislegopher.com" && u.Host != "www.aislegopher.com" {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
res.ID, _ = strconv.Atoi(a[3])
|
|
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
defer deferClose(doc)
|
|
if err != nil {
|
|
return res, fmt.Errorf("failed to open page: %w", err)
|
|
}
|
|
|
|
names := doc.Select("h2.h4")
|
|
|
|
if len(names) > 0 {
|
|
res.Name, _ = names[0].Text()
|
|
}
|
|
|
|
prices := doc.Select("h4.h2")
|
|
|
|
if len(prices) > 0 {
|
|
priceStr, _ := prices[0].Text()
|
|
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
|
priceStr = strings.TrimSpace(priceStr)
|
|
res.Price, _ = strconv.ParseFloat(priceStr, 64)
|
|
}
|
|
|
|
return res, nil
|
|
}
|