- Extract shared DeferClose helper, removing 14 duplicate copies - Rename PlayWright-prefixed types to cleaner names (BrowserOptions, BrowserSelection, NewBrowser, etc.) - Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure) - Extract shared initBrowser/mergeOptions into browser_init.go, deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser - Remove unused locator field from document struct - Add tests for all previously untested packages (archive, aislegopher, wegmans, useragents, powerball) and expand existing test suites - Add MIGRATION.md documenting all breaking API changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
73 lines
1.9 KiB
Go
73 lines
1.9 KiB
Go
package extractor
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
)
|
|
|
|
func TestReadability_ValidHTML(t *testing.T) {
|
|
html := `<!DOCTYPE html>
|
|
<html>
|
|
<head><title>Test Article</title></head>
|
|
<body>
|
|
<article>
|
|
<h1>Test Article</h1>
|
|
<p>This is a test article with enough content to be parsed by readability.
|
|
It needs to have a reasonable amount of text so the algorithm considers it
|
|
a valid article. Let us add several sentences to make sure this works
|
|
correctly. The readability library requires a minimum amount of content
|
|
to successfully extract an article from a page.</p>
|
|
<p>Here is another paragraph to add more content. We want to make sure
|
|
that the content is substantial enough for the readability algorithm to
|
|
consider this a valid article and extract the text properly.</p>
|
|
</article>
|
|
</body>
|
|
</html>`
|
|
|
|
doc := mockDocument{
|
|
url: "https://example.com/article",
|
|
content: html,
|
|
}
|
|
|
|
article, err := Readability(context.Background(), doc)
|
|
if err != nil {
|
|
t.Fatalf("Readability() error = %v", err)
|
|
}
|
|
|
|
if article.Title != "Test Article" {
|
|
t.Errorf("Title = %q, want %q", article.Title, "Test Article")
|
|
}
|
|
|
|
if article.TextContent == "" {
|
|
t.Error("TextContent should not be empty")
|
|
}
|
|
}
|
|
|
|
func TestReadability_EmptyContent(t *testing.T) {
|
|
doc := mockDocument{
|
|
url: "https://example.com/empty",
|
|
content: "",
|
|
}
|
|
|
|
article, err := Readability(context.Background(), doc)
|
|
if err != nil {
|
|
t.Fatalf("Readability() unexpected error = %v", err)
|
|
}
|
|
// Empty content should produce an empty article.
|
|
if article.Title != "" && article.TextContent != "" {
|
|
t.Error("expected empty article from empty content")
|
|
}
|
|
}
|
|
|
|
func TestReadability_InvalidURL(t *testing.T) {
|
|
doc := mockDocument{
|
|
url: "://invalid",
|
|
content: "<html><body><p>text</p></body></html>",
|
|
}
|
|
|
|
_, err := Readability(context.Background(), doc)
|
|
if err == nil {
|
|
t.Error("Readability() expected error for invalid URL, got nil")
|
|
}
|
|
}
|