Consolidated a bunch of reused code to agents
This commit is contained in:
72
pkg/agents/read_page.go
Normal file
72
pkg/agents/read_page.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package agents
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"gitea.stevedudenhoeffer.com/steve/answer/pkg/cache"
|
||||
"gitea.stevedudenhoeffer.com/steve/answer/pkg/extractor"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
func (a Agent) ReadPage(ctx context.Context, u *url.URL, questions []string) (Knowledge, error) {
|
||||
ar, err := extractArticle(ctx, u)
|
||||
if err != nil {
|
||||
return Knowledge{}, err
|
||||
}
|
||||
|
||||
if ar.Body == "" {
|
||||
return Knowledge{}, fmt.Errorf("could not extract body from page")
|
||||
}
|
||||
|
||||
return a.ExtractKnowledge(ctx, ar.Body, u.String(), questions)
|
||||
|
||||
}
|
||||
|
||||
type article struct {
|
||||
URL string
|
||||
Title string
|
||||
Body string
|
||||
}
|
||||
|
||||
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
|
||||
defer func() {
|
||||
e := recover()
|
||||
|
||||
if e != nil {
|
||||
if e, ok := e.(error); ok {
|
||||
err = fmt.Errorf("panic: %w", e)
|
||||
} else {
|
||||
err = fmt.Errorf("panic: %v", e)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
extractors := extractor.MultiExtractor(
|
||||
extractor.CacheExtractor{
|
||||
Cache: cache.Nop{},
|
||||
Tag: "goose",
|
||||
Extractor: extractor.GooseExtractor{},
|
||||
},
|
||||
extractor.CacheExtractor{
|
||||
Cache: cache.Nop{},
|
||||
Tag: "playwright",
|
||||
Extractor: extractor.PlaywrightExtractor{},
|
||||
},
|
||||
)
|
||||
|
||||
a, err := extractors.Extract(ctx, u.String())
|
||||
|
||||
if err != nil {
|
||||
return article{
|
||||
URL: "",
|
||||
Title: "",
|
||||
Body: "",
|
||||
}, err
|
||||
}
|
||||
|
||||
return article{
|
||||
URL: a.URL,
|
||||
Title: a.Title,
|
||||
Body: a.Body,
|
||||
}, nil
|
||||
}
|
Reference in New Issue
Block a user