initial commit
This commit is contained in:
45
readability.go
Normal file
45
readability.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
|
||||
"github.com/go-shiori/go-readability"
|
||||
)
|
||||
|
||||
type Readability struct {
|
||||
Extractor
|
||||
}
|
||||
|
||||
var _ Extractor = Readability{}
|
||||
|
||||
func (r Readability) Extract(_ context.Context, src Source) (Article, error) {
|
||||
u, err := url.Parse(src.URL())
|
||||
|
||||
if err != nil {
|
||||
return Article{}, err
|
||||
}
|
||||
a, err := readability.FromReader(src.Reader(), u)
|
||||
|
||||
if err != nil {
|
||||
return Article{}, err
|
||||
}
|
||||
|
||||
pubTime := ""
|
||||
|
||||
if a.PublishedTime != nil {
|
||||
pubTime = a.PublishedTime.Format("2006-01-02T15:04:05Z")
|
||||
}
|
||||
return Article{
|
||||
Title: a.Title,
|
||||
Content: a.Content,
|
||||
TextContent: a.TextContent,
|
||||
Length: a.Length,
|
||||
Excerpt: a.Excerpt,
|
||||
Byline: a.Byline,
|
||||
SiteName: a.SiteName,
|
||||
Lang: a.Language,
|
||||
PublishedTime: pubTime,
|
||||
}, nil
|
||||
|
||||
}
|
Reference in New Issue
Block a user