Add Wegmans module to fetch item details and prices
Introduce functionality to retrieve item details, including name and price, from Wegmans using a browser-based scraper. This includes a CLI tool to execute searches and robust error handling for URL validation and browser interactions.
This commit is contained in:
parent
654976de82
commit
f37e60dddc
81
sites/wegmans/cmd/wegmans/main.go
Normal file
81
sites/wegmans/cmd/wegmans/main.go
Normal file
@ -0,0 +1,81 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
|
||||
|
||||
"github.com/urfave/cli/v3"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/wegmans"
|
||||
)
|
||||
|
||||
func deferClose(cl io.Closer) {
|
||||
if cl != nil {
|
||||
_ = cl.Close()
|
||||
}
|
||||
}
|
||||
|
||||
type WegmansFlags []cli.Flag
|
||||
|
||||
var Flags = WegmansFlags{}
|
||||
|
||||
func (f WegmansFlags) ToConfig(_ *cli.Command) wegmans.Config {
|
||||
var res = wegmans.DefaultConfig
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func main() {
|
||||
var flags []cli.Flag
|
||||
|
||||
flags = append(flags, browser.Flags...)
|
||||
flags = append(flags, Flags...)
|
||||
|
||||
app := &cli.Command{
|
||||
Name: "wegmans",
|
||||
Usage: "Search Wegmans",
|
||||
Flags: flags,
|
||||
Action: func(ctx context.Context, cmd *cli.Command) error {
|
||||
cfg := Flags.ToConfig(cmd)
|
||||
|
||||
b, err := browser.FromCommand(ctx, cmd)
|
||||
defer deferClose(b)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating browser: %w", err)
|
||||
}
|
||||
arg := cmd.Args().First()
|
||||
|
||||
if arg == "" {
|
||||
return fmt.Errorf("url is required")
|
||||
}
|
||||
|
||||
u, err := url.Parse(arg)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse url: %w", err)
|
||||
}
|
||||
|
||||
item, err := cfg.GetItemPrice(ctx, b, u)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get item price: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println(item)
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
err := app.Run(context.Background(), os.Args)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
93
sites/wegmans/wegmans.go
Normal file
93
sites/wegmans/wegmans.go
Normal file
@ -0,0 +1,93 @@
|
||||
package wegmans
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
}
|
||||
|
||||
var DefaultConfig = Config{}
|
||||
|
||||
var ErrNilBrowser = errors.New("browser is nil")
|
||||
var ErrNilURL = errors.New("url is nil")
|
||||
var ErrInvalidURL = errors.New("invalid url")
|
||||
|
||||
type Item struct {
|
||||
ID int
|
||||
Name string
|
||||
Price float64
|
||||
}
|
||||
|
||||
func deferClose(c io.Closer) {
|
||||
if c != nil {
|
||||
_ = c.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
||||
if b == nil {
|
||||
return Item{}, ErrNilBrowser
|
||||
}
|
||||
|
||||
if u == nil {
|
||||
return Item{}, ErrNilURL
|
||||
}
|
||||
|
||||
// urls in the format of:
|
||||
// https://shop.wegmans.com/product/24921[/wegmans-frozen-thin-crust-uncured-pepperoni-pizza]
|
||||
// (the slug is optional)
|
||||
|
||||
// get the product ID
|
||||
a := strings.Split(u.Path, "/")
|
||||
|
||||
if len(a) < 3 {
|
||||
return Item{}, ErrInvalidURL
|
||||
}
|
||||
|
||||
id, _ := strconv.Atoi(a[2])
|
||||
|
||||
if id == 0 {
|
||||
return Item{}, ErrInvalidURL
|
||||
}
|
||||
|
||||
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
||||
defer deferClose(doc)
|
||||
|
||||
if err != nil {
|
||||
return Item{}, err
|
||||
}
|
||||
|
||||
timeout := 15 * time.Second
|
||||
_ = doc.WaitForNetworkIdle(&timeout)
|
||||
|
||||
res := Item{
|
||||
ID: id,
|
||||
}
|
||||
|
||||
titles := doc.Select("h1[data-test]")
|
||||
|
||||
if len(titles) != 0 {
|
||||
res.Name, _ = titles[0].Text()
|
||||
}
|
||||
|
||||
prices := doc.Select("span[data-test=\"amount\"] span:nth-child(1)")
|
||||
|
||||
if len(prices) != 0 {
|
||||
priceStr, _ := prices[0].Text()
|
||||
priceStr = strings.ReplaceAll(priceStr, "$", "")
|
||||
priceStr = strings.ReplaceAll(priceStr, ",", "")
|
||||
price, _ := strconv.ParseFloat(priceStr, 64)
|
||||
res.Price = price
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user