99 lines
1.7 KiB
Go
99 lines
1.7 KiB
Go
package extractor
|
|
|
|
import (
|
|
"io"
|
|
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
type Document interface {
|
|
io.Closer
|
|
|
|
Content() (string, error)
|
|
Text() (string, error)
|
|
Screenshot() ([]byte, error)
|
|
|
|
Select(selector string) Documents
|
|
SelectFirst(selector string) Document
|
|
|
|
ForEach(selector string, fn func(Document) error) error
|
|
}
|
|
|
|
type document struct {
|
|
pw *playwright.Playwright
|
|
browser playwright.Browser
|
|
page playwright.Page
|
|
root playwright.ElementHandle
|
|
locator playwright.Locator
|
|
}
|
|
|
|
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
|
|
root, err := page.QuerySelector("html")
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
root2 := page.Locator("html")
|
|
return document{
|
|
pw: pw,
|
|
browser: browser,
|
|
page: page,
|
|
locator: root2,
|
|
root: root,
|
|
}, nil
|
|
}
|
|
func (p document) Close() error {
|
|
return p.page.Close()
|
|
}
|
|
|
|
func (p document) Content() (string, error) {
|
|
return p.locator.TextContent()
|
|
}
|
|
|
|
func (p document) Text() (string, error) {
|
|
return p.locator.InnerText()
|
|
}
|
|
|
|
func (p document) Screenshot() ([]byte, error) {
|
|
return p.locator.Screenshot()
|
|
}
|
|
|
|
func (d document) Select(selector string) Documents {
|
|
|
|
elements, err := d.locator.Locator(selector).All()
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
res := make(Documents, len(elements))
|
|
for i, el := range elements {
|
|
res[i] = document{
|
|
pw: d.pw,
|
|
browser: d.browser,
|
|
page: d.page,
|
|
locator: el,
|
|
}
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
func (d document) SelectFirst(selector string) Document {
|
|
return d.Select(selector)[0]
|
|
}
|
|
|
|
func (d document) ForEach(selector string, fn func(Document) error) error {
|
|
|
|
e := d.Select(selector)
|
|
|
|
for _, el := range e {
|
|
err := fn(el)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|