26 lines
406 B
Go
26 lines
406 B
Go
package extractor
|
|
|
|
import (
|
|
"context"
|
|
goose "github.com/advancedlogic/GoOse"
|
|
)
|
|
|
|
type GooseExtractor struct {
|
|
}
|
|
|
|
func (GooseExtractor) Extract(ctx context.Context, url string) (Article, error) {
|
|
var res = Article{
|
|
URL: url,
|
|
}
|
|
g := goose.New()
|
|
|
|
article, err := g.ExtractFromURL(url)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
|
|
res.Body = article.CleanedText
|
|
res.Title = article.Title
|
|
return res, nil
|
|
}
|