Steve Dudenhoeffer
654976de82
Introduced a new package and command for extracting data from aislegopher.com, including URL parsing and item retrieval. Updated dependencies in go.mod to support the new functionality. Additionally, refined import structure in the DuckDuckGo integration.
72 lines
1.3 KiB
Go
72 lines
1.3 KiB
Go
package aislegopher
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
)
|
|
|
|
type Config struct {
|
|
}
|
|
|
|
var DefaultConfig = Config{}
|
|
|
|
var (
|
|
ErrInvalidURL = errors.New("invalid url")
|
|
)
|
|
|
|
type Item struct {
|
|
ID int
|
|
Name string
|
|
}
|
|
|
|
func deferClose(cl io.Closer) {
|
|
if cl != nil {
|
|
_ = cl.Close()
|
|
}
|
|
}
|
|
func GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
return DefaultConfig.GetItemFromURL(ctx, b, u)
|
|
}
|
|
|
|
func (c Config) GetItemFromURL(ctx context.Context, b extractor.Browser, u *url.URL) (Item, error) {
|
|
res := Item{}
|
|
|
|
// the url will be in the format of aislegopher.com/p/slug/id
|
|
// we need to parse the slug and id from the url
|
|
a := strings.Split(u.Path, "/")
|
|
if len(a) != 4 {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
if a[1] != "p" {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
if u.Host != "aislegopher.com" && u.Host != "www.aislegopher.com" {
|
|
return res, ErrInvalidURL
|
|
}
|
|
|
|
res.ID, _ = strconv.Atoi(a[3])
|
|
|
|
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
|
|
defer deferClose(doc)
|
|
if err != nil {
|
|
return res, fmt.Errorf("failed to open page: %w", err)
|
|
}
|
|
|
|
names := doc.Select("h2.h4")
|
|
|
|
if len(names) > 0 {
|
|
res.Name, _ = names[0].Text()
|
|
}
|
|
|
|
return res, nil
|
|
}
|