18 Commits

Author SHA1 Message Date
5fe7313fa4 Refine status check logic when handling document requests in Playwright 2025-10-12 20:17:04 -04:00
39c2c7d37a Add UseLocalOnly flag to connection options in Playwright logic 2025-10-12 00:17:11 -04:00
e32a6fa791 Add UseLocalOnly option to Playwright connection logic
Introduced the `UseLocalOnly` option to prevent connections to a remote Playwright server and enforce usage of the local server. Updated relevant connection logic to respect this new option.
2025-10-12 00:10:58 -04:00
afa0238758 Restrict Price assignment to unit price with "lb" only 2025-10-11 23:48:09 -04:00
9ae8619f93 Enhance price parsing to handle non-zero unit price
Updated price extraction logic to set `Price` from `UnitPrice` when it is non-zero, ensuring more accurate parsing.
2025-10-11 23:34:41 -04:00
f4caef22b0 Add timeout option to Playwright server connection
Introduced a 30-second timeout for connecting to the Playwright server. Added logging for connection attempts to improve debugging and enhance connection reliability.
2025-10-10 20:25:27 -04:00
9947cae947 Refine selectors and enhance price parsing with logging
Adjusted HTML selectors for improved compatibility and updated price parsing logic to handle additional formats. Added logging to provide better debugging insights during price extraction.
2025-10-10 14:42:06 -04:00
dc43d1626a Parse drawing date from Powerball numbers page 2025-09-16 11:17:04 -04:00
2d60940001 Refactored jackpot handling and updated dependencies
Replaced `currency.Amount` with `int` for jackpot values to simplify representation. Adjusted parsing logic accordingly. Updated Go version to 1.24.0 and refreshed dependencies in go.mod for compatibility.
2025-09-16 10:52:49 -04:00
d0fffb0411 Add warning log for Playwright server connection failure
Introduce a warning log to notify when failing to connect to the Playwright server, followed by a fallback to launching a local browser.
2025-08-29 01:39:40 -04:00
8b4e43c40f Add DontLaunchOnConnectFailure and refine server connection logic
Introduced the `DontLaunchOnConnectFailure` option to prevent browser launch on connection failure when connecting to a Playwright server. Enhanced environment variable handling for server address based on browser type, improving flexibility and reliability in connection scenarios.
2025-08-29 01:38:00 -04:00
6f4ca22b6a Update Firefox user agent version in PlayWrightBrowserOptions
Adjusted the default `UserAgent` in `PlayWrightBrowserOptions` to reflect the latest Firefox version (142.0) for improved compatibility and accuracy.
2025-08-29 00:25:45 -04:00
8aee8f0502 Add support for connecting to a remote Playwright server
Introduced `PlayWrightServerAddress` to `PlayWrightBrowserOptions`, allowing the browser to connect to a remote Playwright server if specified. Defaults to the `PLAYWRIGHT_SERVER_ADDRESS` environment variable. Updated initialization logic to handle both local launches and remote connections seamlessly.
2025-08-29 00:25:23 -04:00
203b97d957 Update default UserAgent string in PlayWrightBrowser
Changed the UserAgent to represent a macOS system using Firefox 137.0. This ensures the browser identification aligns with updated standards and improves compatibility.
2025-05-27 01:46:06 -04:00
39453288ce Add OpenSearch and SearchPage functionality for DuckDuckGo
Introduced the `OpenSearch` method and `SearchPage` interface to streamline search operations and allow for loading more results dynamically. Updated dependencies and modified the DuckDuckGo CLI to utilize these enhancements.
2025-03-18 02:42:50 -04:00
7c0e44a22f Add viewport dimensions and dark mode support
This commit introduces optional viewport dimensions and dark mode support to the PlayWrightBrowserOptions struct and its usage. It ensures more control over browser display settings and improves flexibility when configuring browser contexts. Additionally, visibility checking logic in SetHidden was refined to avoid redundant operations.
2025-03-15 00:46:02 -04:00
0f9f6c776d Rename SetVisible to SetHidden for clearer semantic meaning
The method and its implementation now align with setting an element's "hidden" property instead of "visible." This change improves code clarity and consistency with expected behavior.
2025-03-03 23:39:37 -05:00
62cb6958fa Add SetVisible and SetAttribute methods to Node interface
This commit introduces two new methods, SetVisible and SetAttribute, to the Node interface. These methods allow toggling element visibility and setting attributes dynamically. Additionally, a helper function, escapeJavaScript, was added to ensure proper escaping of JavaScript strings.
2025-03-03 23:31:51 -05:00
9 changed files with 275 additions and 54 deletions

18
go.mod
View File

@@ -1,21 +1,23 @@
module gitea.stevedudenhoeffer.com/steve/go-extractor module gitea.stevedudenhoeffer.com/steve/go-extractor
go 1.23.2 go 1.24.0
toolchain go1.24.1
require ( require (
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612
github.com/playwright-community/playwright-go v0.4802.0 github.com/playwright-community/playwright-go v0.5200.0
github.com/urfave/cli/v3 v3.0.0-beta1 github.com/urfave/cli/v3 v3.0.0-beta1
golang.org/x/text v0.21.0 golang.org/x/text v0.29.0
) )
require ( require (
github.com/andybalholm/cascadia v1.3.2 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/deckarep/golang-set/v2 v2.8.0 // indirect
github.com/go-jose/go-jose/v3 v3.0.3 // indirect github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
github.com/go-stack/stack v1.8.1 // indirect github.com/go-stack/stack v1.8.1 // indirect
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
golang.org/x/net v0.32.0 // indirect golang.org/x/net v0.44.0 // indirect
) )

33
node.go
View File

@@ -1,6 +1,9 @@
package extractor package extractor
import ( import (
"fmt"
"strings"
"github.com/playwright-community/playwright-go" "github.com/playwright-community/playwright-go"
) )
@@ -17,6 +20,9 @@ type Node interface {
SelectFirst(selector string) Node SelectFirst(selector string) Node
ForEach(selector string, fn func(Node) error) error ForEach(selector string, fn func(Node) error) error
SetHidden(val bool) error
SetAttribute(name, value string) error
} }
type node struct { type node struct {
@@ -79,3 +85,30 @@ func (n node) ForEach(selector string, fn func(Node) error) error {
return nil return nil
} }
func (n node) SetHidden(val bool) error {
visible, err := n.locator.IsVisible()
if err != nil {
return fmt.Errorf("error checking visibility: %w", err)
}
if visible == !val {
return nil
}
// Set the hidden property
_, err = n.locator.Evaluate(fmt.Sprintf(`(element) => element.hidden = %t;`, val), nil)
if err != nil {
return fmt.Errorf("error setting hidden property: %w", err)
}
return nil
}
func escapeJavaScript(s string) string {
return strings.Replace(strings.Replace(s, "\\", "\\\\", -1), "'", "\\'", -1)
}
func (n node) SetAttribute(name, value string) error {
_, err := n.locator.Evaluate(fmt.Sprintf(`(element) => element.setAttribute('%s', '%s');`, escapeJavaScript(name), escapeJavaScript(value)), nil)
return err
}

View File

@@ -6,18 +6,20 @@ import (
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
"os"
"time" "time"
"github.com/playwright-community/playwright-go" "github.com/playwright-community/playwright-go"
) )
type playWrightBrowser struct { type playWrightBrowser struct {
pw *playwright.Playwright pw *playwright.Playwright
browser playwright.Browser browser playwright.Browser
ctx playwright.BrowserContext ctx playwright.BrowserContext
userAgent string userAgent string
timeout time.Duration timeout time.Duration
cookieJar CookieJar cookieJar CookieJar
serverAddr string
} }
var _ Browser = playWrightBrowser{} var _ Browser = playWrightBrowser{}
@@ -36,6 +38,10 @@ const (
PlayWrightBrowserSelectionWebKit PlayWrightBrowserSelection = "webkit" PlayWrightBrowserSelectionWebKit PlayWrightBrowserSelection = "webkit"
) )
type Size struct {
Width int
Height int
}
type PlayWrightBrowserOptions struct { type PlayWrightBrowserOptions struct {
UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" UserAgent string // If empty, defaults to "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
Browser PlayWrightBrowserSelection // If unset defaults to Firefox. Browser PlayWrightBrowserSelection // If unset defaults to Firefox.
@@ -46,6 +52,20 @@ type PlayWrightBrowserOptions struct {
CookieJar CookieJar
ShowBrowser bool // If false, browser will be headless ShowBrowser bool // If false, browser will be headless
Dimensions Size
DarkMode bool
// PlayWrightServerAddress is the address of a PlayWright server to connect to.
// Defaults to the value of the environment variable PLAYWRIGHT_SERVER_ADDRESS.
PlayWrightServerAddress string
// DontLaunchOnConnectFailure will, if set, not launch the browser if the connection to the PlayWright server,
// and return an error if the connection fails.
DontLaunchOnConnectFailure bool
// UseLocalOnly will, if set, not connect to the PlayWright server, and instead use the local PlayWright server.
UseLocalOnly bool
} }
func cookieToPlaywrightOptionalCookie(cookie Cookie) playwright.OptionalCookie { func cookieToPlaywrightOptionalCookie(cookie Cookie) playwright.OptionalCookie {
@@ -73,9 +93,11 @@ func playwrightCookieToCookie(cookie playwright.Cookie) Cookie {
func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) { func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
var thirtySeconds = 30 * time.Second var thirtySeconds = 30 * time.Second
opt := PlayWrightBrowserOptions{ opt := PlayWrightBrowserOptions{
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
Browser: PlayWrightBrowserSelectionFirefox, Browser: PlayWrightBrowserSelectionFirefox,
Timeout: &thirtySeconds, Timeout: &thirtySeconds,
DarkMode: false,
PlayWrightServerAddress: "",
} }
for _, o := range opts { for _, o := range opts {
@@ -91,6 +113,21 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
if o.CookieJar != nil { if o.CookieJar != nil {
opt.CookieJar = o.CookieJar opt.CookieJar = o.CookieJar
} }
if o.Dimensions.Width > 0 && o.Dimensions.Height > 0 {
opt.Dimensions = o.Dimensions
}
if o.DarkMode {
opt.DarkMode = true
}
if o.PlayWrightServerAddress != "" {
opt.PlayWrightServerAddress = o.PlayWrightServerAddress
}
if o.DontLaunchOnConnectFailure {
opt.DontLaunchOnConnectFailure = true
}
if o.UseLocalOnly {
opt.UseLocalOnly = true
}
opt.ShowBrowser = o.ShowBrowser opt.ShowBrowser = o.ShowBrowser
} }
@@ -115,26 +152,72 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
switch opt.Browser { switch opt.Browser {
case PlayWrightBrowserSelectionChromium: case PlayWrightBrowserSelectionChromium:
bt = pw.Chromium bt = pw.Chromium
if opt.PlayWrightServerAddress == "" {
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_CHROMIUM")
}
case PlayWrightBrowserSelectionFirefox: case PlayWrightBrowserSelectionFirefox:
bt = pw.Firefox bt = pw.Firefox
if opt.PlayWrightServerAddress == "" {
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_FIREFOX")
}
case PlayWrightBrowserSelectionWebKit: case PlayWrightBrowserSelectionWebKit:
bt = pw.WebKit bt = pw.WebKit
if opt.PlayWrightServerAddress == "" {
opt.PlayWrightServerAddress = os.Getenv("PLAYWRIGHT_SERVER_ADDRESS_WEBKIT")
}
default: default:
return nil, ErrInvalidBrowserSelection return nil, ErrInvalidBrowserSelection
} }
var browser playwright.Browser
browser, err := bt.Launch(playwright.BrowserTypeLaunchOptions{ var launch = true
Headless: playwright.Bool(!opt.ShowBrowser), if opt.PlayWrightServerAddress != "" && !opt.UseLocalOnly {
}) launch = false
if err != nil { slog.Info("connecting to playwright server", "address", opt.PlayWrightServerAddress)
return nil, err var timeout float64 = 30000
browser, err = bt.Connect(opt.PlayWrightServerAddress, playwright.BrowserTypeConnectOptions{Timeout: &timeout})
if err != nil {
if opt.DontLaunchOnConnectFailure {
return nil, err
}
slog.Warn("failed to connect to playwright server, launching local browser", "err", err)
launch = true
}
}
if launch {
browser, err = bt.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(!opt.ShowBrowser),
})
if err != nil {
return nil, err
}
}
var viewport *playwright.Size
if opt.Dimensions.Width > 0 && opt.Dimensions.Height > 0 {
viewport = &playwright.Size{
Width: opt.Dimensions.Width,
Height: opt.Dimensions.Height,
}
}
var scheme *playwright.ColorScheme
if opt.DarkMode {
scheme = playwright.ColorSchemeDark
} else {
scheme = playwright.ColorSchemeNoPreference
} }
c, err := browser.NewContext(playwright.BrowserNewContextOptions{ c, err := browser.NewContext(playwright.BrowserNewContextOptions{
UserAgent: playwright.String(opt.UserAgent), UserAgent: playwright.String(opt.UserAgent),
Viewport: viewport,
ColorScheme: scheme,
}) })
if err != nil { if err != nil {
return nil, err return nil, err
@@ -160,12 +243,13 @@ func NewPlayWrightBrowser(opts ...PlayWrightBrowserOptions) (Browser, error) {
} }
return playWrightBrowser{ return playWrightBrowser{
pw: pw, pw: pw,
browser: browser, browser: browser,
userAgent: opt.UserAgent, userAgent: opt.UserAgent,
timeout: *opt.Timeout, timeout: *opt.Timeout,
cookieJar: opt.CookieJar, cookieJar: opt.CookieJar,
ctx: c, ctx: c,
serverAddr: opt.PlayWrightServerAddress,
}, nil }, nil
} }
@@ -212,10 +296,7 @@ func (b playWrightBrowser) openPage(_ context.Context, target string, opts OpenP
slog.Info("opened document", "url", target, "status", resp.Status(), "request", resp.Request()) slog.Info("opened document", "url", target, "status", resp.Status(), "request", resp.Request())
if resp.Status() != 200 { if resp.Status() < 200 || resp.Status() >= 300 {
time.Sleep(999 * time.Hour * 24)
time.Sleep(25 * time.Second)
_ = page.Close() _ = page.Close()
if resp.Status() == 404 { if resp.Status() == 404 {

View File

@@ -62,13 +62,13 @@ func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.
return res, fmt.Errorf("failed to open page: %w", err) return res, fmt.Errorf("failed to open page: %w", err)
} }
names := doc.Select("h2.h4") names := doc.Select(".h4")
if len(names) > 0 { if len(names) > 0 {
res.Name, _ = names[0].Text() res.Name, _ = names[0].Text()
} }
prices := doc.Select("h4.h2") prices := doc.Select(".h2")
if len(prices) > 0 { if len(prices) > 0 {
priceStr, _ := prices[0].Text() priceStr, _ := prices[0].Text()

View File

@@ -3,11 +3,11 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"github.com/urfave/cli/v3"
"io" "io"
"os" "os"
"strings" "strings"
"time"
"github.com/urfave/cli/v3"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser" "gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
@@ -58,6 +58,7 @@ func deferClose(cl io.Closer) {
func main() { func main() {
var flags []cli.Flag var flags []cli.Flag
flags = append(flags, browser.Flags...)
flags = append(flags, Flags...) flags = append(flags, Flags...)
cli := &cli.Command{ cli := &cli.Command{
@@ -81,13 +82,24 @@ func main() {
return fmt.Errorf("failed to create browser: %w", err) return fmt.Errorf("failed to create browser: %w", err)
} }
res, err := c.Search(ctx, b, query) search, err := c.OpenSearch(ctx, b, query)
if err != nil { if err != nil {
return fmt.Errorf("failed to search: %w", err) return fmt.Errorf("failed to open search: %w", err)
} }
fmt.Println(res) defer deferClose(search)
res := search.GetResults()
fmt.Println("Results:", res)
err = search.LoadMore()
if err != nil {
return fmt.Errorf("failed to load more: %w", err)
}
time.Sleep(2 * time.Second)
res = search.GetResults()
fmt.Println("Results:", res)
return nil return nil
}, },

View File

@@ -77,6 +77,21 @@ func deferClose(cl io.Closer) {
} }
} }
func (c Config) OpenSearch(ctx context.Context, b extractor.Browser, query string) (SearchPage, error) {
u := c.ToSearchURL(query)
slog.Info("searching", "url", u, "query", query, "config", c, "browser", b)
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
if doc != nil {
_ = doc.Close()
}
return nil, fmt.Errorf("failed to open url: %w", err)
}
return searchPage{doc}, nil
}
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) { func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
u := c.ToSearchURL(query) u := c.ToSearchURL(query)

68
sites/duckduckgo/page.go Normal file
View File

@@ -0,0 +1,68 @@
package duckduckgo
import (
"fmt"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"io"
"log/slog"
)
type SearchPage interface {
io.Closer
GetResults() []Result
LoadMore() error
}
type searchPage struct {
doc extractor.Document
}
func (s searchPage) GetResults() []Result {
var res []Result
var err error
err = s.doc.ForEach(`article[id^="r1-"]`, func(n extractor.Node) error {
var r Result
links := n.Select(`a[href][target="_self"]`)
if len(links) == 0 {
return nil
}
r.URL, err = links[0].Attr(`href`)
if err != nil {
return fmt.Errorf("failed to get link: %w", err)
}
titles := n.Select("h2")
if len(titles) != 0 {
r.Title, _ = titles[0].Text()
}
descriptions := n.Select("span > span")
if len(descriptions) != 0 {
r.Description, _ = descriptions[0].Text()
}
res = append(res, r)
return nil
})
return res
}
func (s searchPage) LoadMore() error {
return s.doc.ForEach(`button#more-results`, func(n extractor.Node) error {
slog.Info("clicking load more", "node", n)
return n.Click()
})
}
func (s searchPage) Close() error {
return s.doc.Close()
}

View File

@@ -9,8 +9,6 @@ import (
"time" "time"
"gitea.stevedudenhoeffer.com/steve/go-extractor" "gitea.stevedudenhoeffer.com/steve/go-extractor"
"golang.org/x/text/currency"
) )
type Config struct { type Config struct {
@@ -30,8 +28,8 @@ type Drawing struct {
} }
type NextDrawing struct { type NextDrawing struct {
Date string Date string
Jackpot currency.Amount JackpotDollars int
} }
func deferClose(cl io.Closer) { func deferClose(cl io.Closer) {
@@ -43,6 +41,10 @@ func deferClose(cl io.Closer) {
func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) { func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) {
var drawing Drawing var drawing Drawing
dateStr, err := doc.SelectFirst("#numbers .title-date").Text()
drawing.Date, err = time.Parse("Mon, Jan 2, 2006", dateStr)
nums := doc.Select("div.game-ball-group div.white-balls") nums := doc.Select("div.game-ball-group div.white-balls")
if len(nums) != 5 { if len(nums) != 5 {
@@ -165,16 +167,15 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er
set := false set := false
if strings.Contains(txt, "Billion") { if strings.Contains(txt, "Billion") {
amt := currency.USD.Amount(numeric * 1000000000) amt := numeric * 1000000000
nextDrawing.Jackpot = amt nextDrawing.JackpotDollars = int(amt)
set = true set = true
} else if strings.Contains(txt, "Million") { } else if strings.Contains(txt, "Million") {
amt := currency.USD.Amount(numeric * 1000000) amt := numeric * 1000000
nextDrawing.Jackpot = amt nextDrawing.JackpotDollars = int(amt)
set = true set = true
} else { } else {
amt := currency.USD.Amount(numeric) nextDrawing.JackpotDollars = int(numeric)
nextDrawing.Jackpot = amt
set = true set = true
} }

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"io" "io"
"log/slog"
"net/url" "net/url"
"strconv" "strconv"
"strings" "strings"
@@ -80,23 +81,28 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
ID: id, ID: id,
} }
titles := doc.Select("h1[data-test]") titles := doc.Select("h1[data-testid]")
if len(titles) != 0 { if len(titles) != 0 {
res.Name, _ = titles[0].Text() res.Name, _ = titles[0].Text()
} }
prices := doc.Select("span[data-test=\"amount\"] span:nth-child(1)") prices := doc.Select("div.component--product-price:nth-child(1) > div:nth-child(1) > span:nth-child(1) > span:nth-child(2)")
slog.Info("prices", "len", len(prices))
if len(prices) != 0 { if len(prices) != 0 {
priceStr, _ := prices[0].Text() priceStr, _ := prices[0].Text()
slog.Info("price", "0", prices[0], "text", priceStr)
priceStr = strings.ReplaceAll(priceStr, "$", "") priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.ReplaceAll(priceStr, ",", "") priceStr = strings.ReplaceAll(priceStr, ",", "")
// if there's a "/" in the price, then it's in the format of like "1.99/ea", so split it off
priceStr = strings.Split(priceStr, "/")[0]
price, _ := strconv.ParseFloat(priceStr, 64) price, _ := strconv.ParseFloat(priceStr, 64)
slog.Info("price", "0", prices[0], "text", priceStr, "price", price)
res.Price = price res.Price = price
} }
unitPrices := doc.Select(`span[data-test="per-unit-price"]`) unitPrices := doc.Select(`div.component--product-price:nth-child(1) span.price-per-unit`)
if len(unitPrices) != 0 { if len(unitPrices) != 0 {
unitPriceStr, _ := unitPrices[0].Text() unitPriceStr, _ := unitPrices[0].Text()
@@ -111,6 +117,9 @@ func (c Config) GetItemPrice(ctx context.Context, b extractor.Browser, u *url.UR
if len(units) > 1 { if len(units) > 1 {
res.Unit = strings.TrimSpace(units[1]) res.Unit = strings.TrimSpace(units[1])
res.UnitPrice, _ = strconv.ParseFloat(units[0], 64) res.UnitPrice, _ = strconv.ParseFloat(units[0], 64)
if res.UnitPrice != 0 && res.Unit == "lb" {
res.Price = res.UnitPrice
}
} }
} }