answer/pkg/extractor/extractor.go

88 lines
1.6 KiB
Go

package extractor
import (
"answer/pkg/cache"
"context"
"errors"
)
var ErrFailedToExtract = errors.New("failed to extract")
type Article struct {
URL string
Title string
Body string
}
// Extractor is an interface of systems that can extract the contents of
type Extractor interface {
Extract(ctx context.Context, url string) (Article, error)
}
type multiExtractor struct {
extractors []Extractor
}
var _ Extractor = multiExtractor{}
// Extract will try to extract the contents of a URL using all the extractors, and return the first successful result.
func (m multiExtractor) Extract(ctx context.Context, url string) (Article, error) {
var errs []error
for _, e := range m.extractors {
article, err := e.Extract(ctx, url)
if err == nil {
return article, nil
}
if errors.Is(err, ErrFailedToExtract) {
continue
}
errs = append(errs, err)
}
if len(errs) > 0 {
return Article{}, errors.Join(errs...)
}
return Article{}, ErrFailedToExtract
}
func MultiExtractor(e ...Extractor) Extractor {
return multiExtractor{extractors: e}
}
type CacheExtractor struct {
Cache cache.Cache
Tag string
Extractor Extractor
}
var _ Extractor = CacheExtractor{}
func (c CacheExtractor) Extract(ctx context.Context, url string) (Article, error) {
tag := c.Tag
if tag == "" {
tag = "defaultextractor:"
}
key := tag + ":" + url
var article Article
err := c.Cache.GetJSON(key, &article)
if err == nil {
return article, nil
}
article, err = c.Extractor.Extract(ctx, url)
if err != nil {
return Article{}, err
}
err = c.Cache.SetJSON(key, article)
if err != nil {
return Article{}, err
}
return article, nil
}