Files
go-extractor/readability_test.go
Steve Dudenhoeffer cb2ed10cfd
Some checks failed
CI / build (push) Failing after 2m4s
CI / test (push) Failing after 2m6s
CI / vet (push) Failing after 2m19s
refactor: restructure API, deduplicate code, expand test coverage
- Extract shared DeferClose helper, removing 14 duplicate copies
- Rename PlayWright-prefixed types to cleaner names (BrowserOptions,
  BrowserSelection, NewBrowser, etc.)
- Rename fields: ServerAddress, RequireServer (was DontLaunchOnConnectFailure)
- Extract shared initBrowser/mergeOptions into browser_init.go,
  deduplicating ~120 lines between NewBrowser and NewInteractiveBrowser
- Remove unused locator field from document struct
- Add tests for all previously untested packages (archive, aislegopher,
  wegmans, useragents, powerball) and expand existing test suites
- Add MIGRATION.md documenting all breaking API changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 13:59:47 -05:00

73 lines
1.9 KiB
Go

package extractor
import (
"context"
"testing"
)
func TestReadability_ValidHTML(t *testing.T) {
html := `<!DOCTYPE html>
<html>
<head><title>Test Article</title></head>
<body>
<article>
<h1>Test Article</h1>
<p>This is a test article with enough content to be parsed by readability.
It needs to have a reasonable amount of text so the algorithm considers it
a valid article. Let us add several sentences to make sure this works
correctly. The readability library requires a minimum amount of content
to successfully extract an article from a page.</p>
<p>Here is another paragraph to add more content. We want to make sure
that the content is substantial enough for the readability algorithm to
consider this a valid article and extract the text properly.</p>
</article>
</body>
</html>`
doc := mockDocument{
url: "https://example.com/article",
content: html,
}
article, err := Readability(context.Background(), doc)
if err != nil {
t.Fatalf("Readability() error = %v", err)
}
if article.Title != "Test Article" {
t.Errorf("Title = %q, want %q", article.Title, "Test Article")
}
if article.TextContent == "" {
t.Error("TextContent should not be empty")
}
}
func TestReadability_EmptyContent(t *testing.T) {
doc := mockDocument{
url: "https://example.com/empty",
content: "",
}
article, err := Readability(context.Background(), doc)
if err != nil {
t.Fatalf("Readability() unexpected error = %v", err)
}
// Empty content should produce an empty article.
if article.Title != "" && article.TextContent != "" {
t.Error("expected empty article from empty content")
}
}
func TestReadability_InvalidURL(t *testing.T) {
doc := mockDocument{
url: "://invalid",
content: "<html><body><p>text</p></body></html>",
}
_, err := Readability(context.Background(), doc)
if err == nil {
t.Error("Readability() expected error for invalid URL, got nil")
}
}