package extractor import ( "context" "net/url" "github.com/go-shiori/go-readability" ) type Readability struct { Extractor } var _ Extractor = Readability{} func (r Readability) Extract(_ context.Context, src Source) (Article, error) { u, err := url.Parse(src.URL()) if err != nil { return Article{}, err } a, err := readability.FromReader(src.Reader(), u) if err != nil { return Article{}, err } pubTime := "" if a.PublishedTime != nil { pubTime = a.PublishedTime.Format("2006-01-02T15:04:05Z") } return Article{ Title: a.Title, Content: a.Content, TextContent: a.TextContent, Length: a.Length, Excerpt: a.Excerpt, Byline: a.Byline, SiteName: a.SiteName, Lang: a.Language, PublishedTime: pubTime, }, nil }