package extractor import ( "answer/pkg/cache" "context" "errors" ) var ErrFailedToExtract = errors.New("failed to extract") type Article struct { URL string Title string Body string } // Extractor is an interface of systems that can extract the contents of type Extractor interface { Extract(ctx context.Context, url string) (Article, error) } type multiExtractor struct { extractors []Extractor } var _ Extractor = multiExtractor{} // Extract will try to extract the contents of a URL using all the extractors, and return the first successful result. func (m multiExtractor) Extract(ctx context.Context, url string) (Article, error) { var errs []error for _, e := range m.extractors { article, err := e.Extract(ctx, url) if err == nil { return article, nil } if errors.Is(err, ErrFailedToExtract) { continue } errs = append(errs, err) } if len(errs) > 0 { return Article{}, errors.Join(errs...) } return Article{}, ErrFailedToExtract } func MultiExtractor(e ...Extractor) Extractor { return multiExtractor{extractors: e} } type CacheExtractor struct { Cache cache.Cache Tag string Extractor Extractor } var _ Extractor = CacheExtractor{} func (c CacheExtractor) Extract(ctx context.Context, url string) (Article, error) { tag := c.Tag if tag == "" { tag = "defaultextractor:" } key := tag + ":" + url var article Article err := c.Cache.GetJSON(key, &article) if err == nil { return article, nil } article, err = c.Extractor.Extract(ctx, url) if err != nil { return Article{}, err } err = c.Cache.SetJSON(key, article) if err != nil { return Article{}, err } return article, nil }