changed browser api to return pages that can be acted on, not strictly contents
This commit is contained in:
98
document.go
Normal file
98
document.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/playwright-community/playwright-go"
|
||||
)
|
||||
|
||||
type Document interface {
|
||||
io.Closer
|
||||
|
||||
Content() (string, error)
|
||||
Text() (string, error)
|
||||
Screenshot() ([]byte, error)
|
||||
|
||||
Select(selector string) Documents
|
||||
SelectFirst(selector string) Document
|
||||
|
||||
ForEach(selector string, fn func(Document) error) error
|
||||
}
|
||||
|
||||
type document struct {
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
page playwright.Page
|
||||
root playwright.ElementHandle
|
||||
locator playwright.Locator
|
||||
}
|
||||
|
||||
func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) {
|
||||
root, err := page.QuerySelector("html")
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
root2 := page.Locator("html")
|
||||
return document{
|
||||
pw: pw,
|
||||
browser: browser,
|
||||
page: page,
|
||||
locator: root2,
|
||||
root: root,
|
||||
}, nil
|
||||
}
|
||||
func (p document) Close() error {
|
||||
return p.page.Close()
|
||||
}
|
||||
|
||||
func (p document) Content() (string, error) {
|
||||
return p.locator.TextContent()
|
||||
}
|
||||
|
||||
func (p document) Text() (string, error) {
|
||||
return p.locator.InnerText()
|
||||
}
|
||||
|
||||
func (p document) Screenshot() ([]byte, error) {
|
||||
return p.locator.Screenshot()
|
||||
}
|
||||
|
||||
func (d document) Select(selector string) Documents {
|
||||
|
||||
elements, err := d.locator.Locator(selector).All()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := make(Documents, len(elements))
|
||||
for i, el := range elements {
|
||||
res[i] = document{
|
||||
pw: d.pw,
|
||||
browser: d.browser,
|
||||
page: d.page,
|
||||
locator: el,
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (d document) SelectFirst(selector string) Document {
|
||||
return d.Select(selector)[0]
|
||||
}
|
||||
|
||||
func (d document) ForEach(selector string, fn func(Document) error) error {
|
||||
|
||||
e := d.Select(selector)
|
||||
|
||||
for _, el := range e {
|
||||
err := fn(el)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
Reference in New Issue
Block a user