Files
go-extractor/sites/aislegopher/aislegopher.go
Steve Dudenhoeffer a9711ce904
All checks were successful
CI / vet (pull_request) Successful in 1m10s
CI / build (pull_request) Successful in 1m21s
CI / test (pull_request) Successful in 1m28s
fix: surface parsing errors instead of silently discarding them
Return errors for required fields (ID, price) and log warnings for
optional fields (title, description, unit price) across all site
extractors instead of silently discarding them with _ =.

Closes #24
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:31:56 +00:00

90 lines
1.8 KiB
Go

package aislegopher
import (
"context"
"errors"
"fmt"
"log/slog"
"net/url"
"strconv"
"strings"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type Config struct {
}
var DefaultConfig = Config{}
var (
ErrInvalidURL = errors.New("invalid url")
)
type Item struct {
ID int
Name string
Price float64
}
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
return DefaultConfig.GetItemFromURL(ctx, b, u)
}
func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
res := Item{}
// the url will be in the format of aislegopher.com/p/slug/id
// we need to parse the slug and id from the url
a := strings.Split(u.Path, "/")
if len(a) != 4 {
return res, ErrInvalidURL
}
if a[1] != "p" {
return res, ErrInvalidURL
}
if u.Host != "aislegopher.com" && u.Host != "www.aislegopher.com" {
return res, ErrInvalidURL
}
var err error
res.ID, err = strconv.Atoi(a[3])
if err != nil {
return res, fmt.Errorf("failed to parse product ID %q: %w", a[3], err)
}
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
return res, fmt.Errorf("failed to open page: %w", err)
}
defer extractor.DeferClose(doc)
names := doc.Select(".h4")
if len(names) > 0 {
res.Name, err = names[0].Text()
if err != nil {
slog.Warn("failed to get product name", "err", err)
}
}
prices := doc.Select(".h2")
if len(prices) > 0 {
priceStr, err := prices[0].Text()
if err != nil {
return res, fmt.Errorf("failed to get price text: %w", err)
}
priceStr = strings.ReplaceAll(priceStr, "$", "")
priceStr = strings.TrimSpace(priceStr)
res.Price, err = strconv.ParseFloat(priceStr, 64)
if err != nil {
return res, fmt.Errorf("failed to parse price %q: %w", priceStr, err)
}
}
return res, nil
}