answer/pkg/agents/read_page.go
Steve Dudenhoeffer 1c3ea7d1f1 Refactor Knowledge struct into shared package
Moved the Knowledge struct and related types to the shared package, updating all references across the codebase. This improves modularity and enables better reuse of the Knowledge type across different components.
2025-05-03 22:09:02 -04:00

75 lines
1.4 KiB
Go

package agents
import (
"context"
"fmt"
"net/url"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/cache"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/extractor"
)
func (a Agent) ReadPage(ctx context.Context, u *url.URL, questions []string) (shared.Knowledge, error) {
ar, err := extractArticle(ctx, u)
if err != nil {
return shared.Knowledge{}, err
}
if ar.Body == "" {
return shared.Knowledge{}, fmt.Errorf("could not extract body from page")
}
return a.ExtractKnowledge(ctx, ar.Body, u.String(), questions)
}
type article struct {
URL string
Title string
Body string
}
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
defer func() {
e := recover()
if e != nil {
if e, ok := e.(error); ok {
err = fmt.Errorf("panic: %w", e)
} else {
err = fmt.Errorf("panic: %v", e)
}
}
}()
extractors := extractor.MultiExtractor(
extractor.CacheExtractor{
Cache: cache.Nop{},
Tag: "goose",
Extractor: extractor.GooseExtractor{},
},
extractor.CacheExtractor{
Cache: cache.Nop{},
Tag: "playwright",
Extractor: extractor.PlaywrightExtractor{},
},
)
a, err := extractors.Extract(ctx, u.String())
if err != nil {
return article{
URL: "",
Title: "",
Body: "",
}, err
}
return article{
URL: a.URL,
Title: a.Title,
Body: a.Body,
}, nil
}