package extractor
import (
"context"
"testing"
)
func TestReadability_ValidHTML(t *testing.T) {
html := `
Test Article
This is a test article with enough content to be parsed by readability.
It needs to have a reasonable amount of text so the algorithm considers it
a valid article. Let us add several sentences to make sure this works
correctly. The readability library requires a minimum amount of content
to successfully extract an article from a page.
Here is another paragraph to add more content. We want to make sure
that the content is substantial enough for the readability algorithm to
consider this a valid article and extract the text properly.
`
doc := mockDocument{
url: "https://example.com/article",
content: html,
}
article, err := Readability(context.Background(), doc)
if err != nil {
t.Fatalf("Readability() error = %v", err)
}
if article.Title != "Test Article" {
t.Errorf("Title = %q, want %q", article.Title, "Test Article")
}
if article.TextContent == "" {
t.Error("TextContent should not be empty")
}
}
func TestReadability_EmptyContent(t *testing.T) {
doc := mockDocument{
url: "https://example.com/empty",
content: "",
}
article, err := Readability(context.Background(), doc)
if err != nil {
t.Fatalf("Readability() unexpected error = %v", err)
}
// Empty content should produce an empty article.
if article.Title != "" && article.TextContent != "" {
t.Error("expected empty article from empty content")
}
}
func TestReadability_InvalidURL(t *testing.T) {
doc := mockDocument{
url: "://invalid",
content: "