Renamed the Go module to align with the updated repository path for better clarity and organization. This ensures consistency across the project and prevents potential import conflicts.
		
			
				
	
	
		
			89 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package extractor
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"errors"
 | |
| 
 | |
| 	"gitea.stevedudenhoeffer.com/steve/answer/pkg/cache"
 | |
| )
 | |
| 
 | |
| var ErrFailedToExtract = errors.New("failed to extract")
 | |
| 
 | |
| type Article struct {
 | |
| 	URL   string
 | |
| 	Title string
 | |
| 	Body  string
 | |
| }
 | |
| 
 | |
| // Extractor is an interface of systems that can extract the contents of
 | |
| type Extractor interface {
 | |
| 	Extract(ctx context.Context, url string) (Article, error)
 | |
| }
 | |
| 
 | |
| type multiExtractor struct {
 | |
| 	extractors []Extractor
 | |
| }
 | |
| 
 | |
| var _ Extractor = multiExtractor{}
 | |
| 
 | |
| // Extract will try to extract the contents of a URL using all the extractors, and return the first successful result.
 | |
| func (m multiExtractor) Extract(ctx context.Context, url string) (Article, error) {
 | |
| 	var errs []error
 | |
| 	for _, e := range m.extractors {
 | |
| 		article, err := e.Extract(ctx, url)
 | |
| 		if err == nil {
 | |
| 			return article, nil
 | |
| 		}
 | |
| 
 | |
| 		if errors.Is(err, ErrFailedToExtract) {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		errs = append(errs, err)
 | |
| 	}
 | |
| 
 | |
| 	if len(errs) > 0 {
 | |
| 		return Article{}, errors.Join(errs...)
 | |
| 	}
 | |
| 	return Article{}, ErrFailedToExtract
 | |
| }
 | |
| 
 | |
| func MultiExtractor(e ...Extractor) Extractor {
 | |
| 	return multiExtractor{extractors: e}
 | |
| }
 | |
| 
 | |
| type CacheExtractor struct {
 | |
| 	Cache     cache.Cache
 | |
| 	Tag       string
 | |
| 	Extractor Extractor
 | |
| }
 | |
| 
 | |
| var _ Extractor = CacheExtractor{}
 | |
| 
 | |
| func (c CacheExtractor) Extract(ctx context.Context, url string) (Article, error) {
 | |
| 	tag := c.Tag
 | |
| 	if tag == "" {
 | |
| 		tag = "defaultextractor:"
 | |
| 	}
 | |
| 	key := tag + ":" + url
 | |
| 
 | |
| 	var article Article
 | |
| 
 | |
| 	err := c.Cache.GetJSON(key, &article)
 | |
| 	if err == nil {
 | |
| 		return article, nil
 | |
| 	}
 | |
| 
 | |
| 	article, err = c.Extractor.Extract(ctx, url)
 | |
| 	if err != nil {
 | |
| 		return Article{}, err
 | |
| 	}
 | |
| 
 | |
| 	err = c.Cache.SetJSON(key, article)
 | |
| 	if err != nil {
 | |
| 		return Article{}, err
 | |
| 	}
 | |
| 
 | |
| 	return article, nil
 | |
| }
 |