sync of changes

This commit is contained in:
2024-11-09 19:50:14 -05:00
parent cc7b03c614
commit a83d5f9822
9 changed files with 491 additions and 95 deletions

25
pkg/extractor/goose.go Normal file
View File

@@ -0,0 +1,25 @@
package extractor
import (
"context"
goose "github.com/advancedlogic/GoOse"
)
type GooseExtractor struct {
}
func (GooseExtractor) Extract(ctx context.Context, url string) (Article, error) {
var res = Article{
URL: url,
}
g := goose.New()
article, err := g.ExtractFromURL(url)
if err != nil {
return res, err
}
res.Body = article.CleanedText
res.Title = article.Title
return res, nil
}