package extractor import ( "context" "testing" ) func TestReadability_ValidHTML(t *testing.T) { html := ` Test Article

Test Article

This is a test article with enough content to be parsed by readability. It needs to have a reasonable amount of text so the algorithm considers it a valid article. Let us add several sentences to make sure this works correctly. The readability library requires a minimum amount of content to successfully extract an article from a page.

Here is another paragraph to add more content. We want to make sure that the content is substantial enough for the readability algorithm to consider this a valid article and extract the text properly.

` doc := mockDocument{ url: "https://example.com/article", content: html, } article, err := Readability(context.Background(), doc) if err != nil { t.Fatalf("Readability() error = %v", err) } if article.Title != "Test Article" { t.Errorf("Title = %q, want %q", article.Title, "Test Article") } if article.TextContent == "" { t.Error("TextContent should not be empty") } } func TestReadability_EmptyContent(t *testing.T) { doc := mockDocument{ url: "https://example.com/empty", content: "", } article, err := Readability(context.Background(), doc) if err != nil { t.Fatalf("Readability() unexpected error = %v", err) } // Empty content should produce an empty article. if article.Title != "" && article.TextContent != "" { t.Error("expected empty article from empty content") } } func TestReadability_InvalidURL(t *testing.T) { doc := mockDocument{ url: "://invalid", content: "

text

", } _, err := Readability(context.Background(), doc) if err == nil { t.Error("Readability() expected error for invalid URL, got nil") } }