package agents import ( "context" "fmt" "gitea.stevedudenhoeffer.com/steve/answer/pkg/cache" "gitea.stevedudenhoeffer.com/steve/answer/pkg/extractor" "net/url" ) func (a Agent) ReadPage(ctx context.Context, u *url.URL, questions []string) (Knowledge, error) { ar, err := extractArticle(ctx, u) if err != nil { return Knowledge{}, err } if ar.Body == "" { return Knowledge{}, fmt.Errorf("could not extract body from page") } return a.ExtractKnowledge(ctx, ar.Body, u.String(), questions) } type article struct { URL string Title string Body string } func extractArticle(ctx context.Context, u *url.URL) (res article, err error) { defer func() { e := recover() if e != nil { if e, ok := e.(error); ok { err = fmt.Errorf("panic: %w", e) } else { err = fmt.Errorf("panic: %v", e) } } }() extractors := extractor.MultiExtractor( extractor.CacheExtractor{ Cache: cache.Nop{}, Tag: "goose", Extractor: extractor.GooseExtractor{}, }, extractor.CacheExtractor{ Cache: cache.Nop{}, Tag: "playwright", Extractor: extractor.PlaywrightExtractor{}, }, ) a, err := extractors.Extract(ctx, u.String()) if err != nil { return article{ URL: "", Title: "", Body: "", }, err } return article{ URL: a.URL, Title: a.Title, Body: a.Body, }, nil }