refactor: restructure API, deduplicate code, expand test coverage
- Extract shared DeferClose helper, removing 14 duplicate copies - Rename PlayWright-prefixed types to cleaner names (BrowserOptions, BrowserSelection, NewBrowser, etc.) - Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure) - Extract shared initBrowser/mergeOptions into browser_init.go, deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser - Remove unused locator field from document struct - Add tests for all previously untested packages (archive, aislegopher, wegmans, useragents, powerball) and expand existing test suites - Add MIGRATION.md documenting all breaking API changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
72
readability_test.go
Normal file
72
readability_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadability_ValidHTML(t *testing.T) {
|
||||
html := `<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Test Article</title></head>
|
||||
<body>
|
||||
<article>
|
||||
<h1>Test Article</h1>
|
||||
<p>This is a test article with enough content to be parsed by readability.
|
||||
It needs to have a reasonable amount of text so the algorithm considers it
|
||||
a valid article. Let us add several sentences to make sure this works
|
||||
correctly. The readability library requires a minimum amount of content
|
||||
to successfully extract an article from a page.</p>
|
||||
<p>Here is another paragraph to add more content. We want to make sure
|
||||
that the content is substantial enough for the readability algorithm to
|
||||
consider this a valid article and extract the text properly.</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
doc := mockDocument{
|
||||
url: "https://example.com/article",
|
||||
content: html,
|
||||
}
|
||||
|
||||
article, err := Readability(context.Background(), doc)
|
||||
if err != nil {
|
||||
t.Fatalf("Readability() error = %v", err)
|
||||
}
|
||||
|
||||
if article.Title != "Test Article" {
|
||||
t.Errorf("Title = %q, want %q", article.Title, "Test Article")
|
||||
}
|
||||
|
||||
if article.TextContent == "" {
|
||||
t.Error("TextContent should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadability_EmptyContent(t *testing.T) {
|
||||
doc := mockDocument{
|
||||
url: "https://example.com/empty",
|
||||
content: "",
|
||||
}
|
||||
|
||||
article, err := Readability(context.Background(), doc)
|
||||
if err != nil {
|
||||
t.Fatalf("Readability() unexpected error = %v", err)
|
||||
}
|
||||
// Empty content should produce an empty article.
|
||||
if article.Title != "" && article.TextContent != "" {
|
||||
t.Error("expected empty article from empty content")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadability_InvalidURL(t *testing.T) {
|
||||
doc := mockDocument{
|
||||
url: "://invalid",
|
||||
content: "<html><body><p>text</p></body></html>",
|
||||
}
|
||||
|
||||
_, err := Readability(context.Background(), doc)
|
||||
if err == nil {
|
||||
t.Error("Readability() expected error for invalid URL, got nil")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user