go-extractor/document.go

99 lines
1.7 KiB
Go

package extractor
import (
"io"
"github.com/playwright-community/playwright-go"
)
type Document interface {
io.Closer
Content() (string, error)
Text() (string, error)
Screenshot() ([]byte, error)
Select(selector string) Documents
SelectFirst(selector string) Document
ForEach(selector string, fn func(Document) error) error
}
type document struct {
pw *playwright.Playwright
browser playwright.Browser
page playwright.Page
root playwright.ElementHandle
locator playwright.Locator
}
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
root, err := page.QuerySelector("html")
if err != nil {
return nil, err
}
root2 := page.Locator("html")
return document{
pw: pw,
browser: browser,
page: page,
locator: root2,
root: root,
}, nil
}
func (p document) Close() error {
return p.page.Close()
}
func (p document) Content() (string, error) {
return p.locator.TextContent()
}
func (p document) Text() (string, error) {
return p.locator.InnerText()
}
func (p document) Screenshot() ([]byte, error) {
return p.locator.Screenshot()
}
func (d document) Select(selector string) Documents {
elements, err := d.locator.Locator(selector).All()
if err != nil {
return nil
}
res := make(Documents, len(elements))
for i, el := range elements {
res[i] = document{
pw: d.pw,
browser: d.browser,
page: d.page,
locator: el,
}
}
return res
}
func (d document) SelectFirst(selector string) Document {
return d.Select(selector)[0]
}
func (d document) ForEach(selector string, fn func(Document) error) error {
e := d.Select(selector)
for _, el := range e {
err := fn(el)
if err != nil {
return err
}
}
return nil
}