46 lines
806 B
Go
46 lines
806 B
Go
package extractor
|
|
|
|
import (
|
|
"context"
|
|
"net/url"
|
|
|
|
"github.com/go-shiori/go-readability"
|
|
)
|
|
|
|
type Readability struct {
|
|
Extractor
|
|
}
|
|
|
|
var _ Extractor = Readability{}
|
|
|
|
func (r Readability) Extract(_ context.Context, src Source) (Article, error) {
|
|
u, err := url.Parse(src.URL())
|
|
|
|
if err != nil {
|
|
return Article{}, err
|
|
}
|
|
a, err := readability.FromReader(src.Reader(), u)
|
|
|
|
if err != nil {
|
|
return Article{}, err
|
|
}
|
|
|
|
pubTime := ""
|
|
|
|
if a.PublishedTime != nil {
|
|
pubTime = a.PublishedTime.Format("2006-01-02T15:04:05Z")
|
|
}
|
|
return Article{
|
|
Title: a.Title,
|
|
Content: a.Content,
|
|
TextContent: a.TextContent,
|
|
Length: a.Length,
|
|
Excerpt: a.Excerpt,
|
|
Byline: a.Byline,
|
|
SiteName: a.SiteName,
|
|
Lang: a.Language,
|
|
PublishedTime: pubTime,
|
|
}, nil
|
|
|
|
}
|