Moved the Knowledge struct and related types to the shared package, updating all references across the codebase. This improves modularity and enables better reuse of the Knowledge type across different components.
75 lines
1.4 KiB
Go
75 lines
1.4 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/cache"
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/extractor"
|
|
)
|
|
|
|
func (a Agent) ReadPage(ctx context.Context, u *url.URL, questions []string) (shared.Knowledge, error) {
|
|
ar, err := extractArticle(ctx, u)
|
|
if err != nil {
|
|
return shared.Knowledge{}, err
|
|
}
|
|
|
|
if ar.Body == "" {
|
|
return shared.Knowledge{}, fmt.Errorf("could not extract body from page")
|
|
}
|
|
|
|
return a.ExtractKnowledge(ctx, ar.Body, u.String(), questions)
|
|
|
|
}
|
|
|
|
type article struct {
|
|
URL string
|
|
Title string
|
|
Body string
|
|
}
|
|
|
|
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
|
|
defer func() {
|
|
e := recover()
|
|
|
|
if e != nil {
|
|
if e, ok := e.(error); ok {
|
|
err = fmt.Errorf("panic: %w", e)
|
|
} else {
|
|
err = fmt.Errorf("panic: %v", e)
|
|
}
|
|
}
|
|
}()
|
|
|
|
extractors := extractor.MultiExtractor(
|
|
extractor.CacheExtractor{
|
|
Cache: cache.Nop{},
|
|
Tag: "goose",
|
|
Extractor: extractor.GooseExtractor{},
|
|
},
|
|
extractor.CacheExtractor{
|
|
Cache: cache.Nop{},
|
|
Tag: "playwright",
|
|
Extractor: extractor.PlaywrightExtractor{},
|
|
},
|
|
)
|
|
|
|
a, err := extractors.Extract(ctx, u.String())
|
|
|
|
if err != nil {
|
|
return article{
|
|
URL: "",
|
|
Title: "",
|
|
Body: "",
|
|
}, err
|
|
}
|
|
|
|
return article{
|
|
URL: a.URL,
|
|
Title: a.Title,
|
|
Body: a.Body,
|
|
}, nil
|
|
}
|