diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..ba419e2 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - run: go build ./... + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - run: go test ./... + + vet: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - run: go vet ./... diff --git a/cmd/browser/main.go b/cmd/browser/main.go index 3eb6ebc..4f5a8a4 100644 --- a/cmd/browser/main.go +++ b/cmd/browser/main.go @@ -74,6 +74,7 @@ func main() { err := cmd.Run(context.Background(), os.Args) if err != nil { - panic(err) + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } } diff --git a/cookiejar.go b/cookiejar.go index f08aa0f..849e012 100644 --- a/cookiejar.go +++ b/cookiejar.go @@ -25,26 +25,28 @@ func (c Cookie) IsTargetMatch(target string) (bool, error) { // the host of the cookie is the same as the host of the target // if the cookie host starts with a dot, that means it matches any subdomain if c.Host == u.Host || strings.HasPrefix(c.Host, ".") && strings.HasSuffix(u.Host, c.Host) { - if c.Path != "" { - if !strings.HasPrefix(u.Path, c.Path) { - return false, nil - } - - // if the cookie path is a prefix of the target path, then it's a match - // so now these would both match: - // cookie path: /foo - // target path: /foo/bar - // cookie path: /foo - // target path: /foosball - // because foseball is not an actual match, we need to check to see that either the path is an exact match - // or that the next character in the target path is a slash - - if len(u.Path) > len(c.Path) && u.Path[len(c.Path)] != '/' { - return false, nil - } - + if c.Path == "" { return true, nil } + + if !strings.HasPrefix(u.Path, c.Path) { + return false, nil + } + + // if the cookie path is a prefix of the target path, then it's a match + // so now these would both match: + // cookie path: /foo + // target path: /foo/bar + // cookie path: /foo + // target path: /foosball + // because foseball is not an actual match, we need to check to see that either the path is an exact match + // or that the next character in the target path is a slash + + if len(u.Path) > len(c.Path) && !strings.HasSuffix(c.Path, "/") && u.Path[len(c.Path)] != '/' { + return false, nil + } + + return true, nil } return false, nil diff --git a/cookiejar_test.go b/cookiejar_test.go new file mode 100644 index 0000000..48b4aab --- /dev/null +++ b/cookiejar_test.go @@ -0,0 +1,266 @@ +package extractor + +import ( + "testing" +) + +func TestCookie_IsTargetMatch_ExactHost(t *testing.T) { + c := Cookie{Host: "example.com", Path: "/"} + match, err := c.IsTargetMatch("https://example.com/page") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !match { + t.Error("expected match for exact host") + } +} + +func TestCookie_IsTargetMatch_DotPrefix(t *testing.T) { + c := Cookie{Host: ".example.com", Path: "/"} + match, err := c.IsTargetMatch("https://sub.example.com/page") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !match { + t.Error("expected match for .example.com against sub.example.com") + } +} + +func TestCookie_IsTargetMatch_DotPrefix_NoFalsePositive(t *testing.T) { + c := Cookie{Host: ".example.com", Path: "/"} + match, err := c.IsTargetMatch("https://notexample.com/page") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if match { + t.Error("did not expect .example.com to match notexample.com") + } +} + +func TestCookie_IsTargetMatch_PathExact(t *testing.T) { + c := Cookie{Host: "example.com", Path: "/foo"} + match, err := c.IsTargetMatch("https://example.com/foo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !match { + t.Error("expected match for exact path /foo") + } +} + +func TestCookie_IsTargetMatch_PathPrefix(t *testing.T) { + c := Cookie{Host: "example.com", Path: "/foo"} + match, err := c.IsTargetMatch("https://example.com/foo/bar") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !match { + t.Error("expected match for /foo prefix with /foo/bar") + } +} + +func TestCookie_IsTargetMatch_PathBoundary(t *testing.T) { + c := Cookie{Host: "example.com", Path: "/foo"} + match, err := c.IsTargetMatch("https://example.com/foosball") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if match { + t.Error("did not expect /foo to match /foosball") + } +} + +func TestCookie_IsTargetMatch_EmptyPath(t *testing.T) { + c := Cookie{Host: "example.com", Path: ""} + match, err := c.IsTargetMatch("https://example.com/anything") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !match { + t.Error("expected empty path cookie to match any path") + } +} + +func TestCookie_IsTargetMatch_NoMatch(t *testing.T) { + c := Cookie{Host: "other.com", Path: "/"} + match, err := c.IsTargetMatch("https://example.com/page") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if match { + t.Error("did not expect other.com to match example.com") + } +} + +func TestCookie_IsTargetMatch_InvalidURL(t *testing.T) { + c := Cookie{Host: "example.com", Path: "/"} + _, err := c.IsTargetMatch("://invalid") + if err == nil { + t.Error("expected error for invalid URL") + } +} + +func TestStaticCookieJar_GetAll(t *testing.T) { + jar := &staticCookieJar{ + Cookie{Host: "a.com", Name: "a", Value: "1"}, + Cookie{Host: "b.com", Name: "b", Value: "2"}, + } + + cookies, err := jar.GetAll() + if err != nil { + t.Fatalf("GetAll() error: %v", err) + } + if len(cookies) != 2 { + t.Errorf("GetAll() returned %d cookies, want 2", len(cookies)) + } +} + +func TestStaticCookieJar_Get(t *testing.T) { + jar := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + Cookie{Host: "other.com", Path: "/", Name: "b", Value: "2"}, + } + + cookies, err := jar.Get("https://example.com/page") + if err != nil { + t.Fatalf("Get() error: %v", err) + } + if len(cookies) != 1 { + t.Fatalf("Get() returned %d cookies, want 1", len(cookies)) + } + if cookies[0].Name != "a" { + t.Errorf("Get() cookie name = %q, want %q", cookies[0].Name, "a") + } +} + +func TestStaticCookieJar_Set_New(t *testing.T) { + jar := &staticCookieJar{} + err := jar.Set(Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}) + if err != nil { + t.Fatalf("Set() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("after Set, GetAll() returned %d cookies, want 1", len(cookies)) + } + if cookies[0].Value != "1" { + t.Errorf("cookie value = %q, want %q", cookies[0].Value, "1") + } +} + +func TestStaticCookieJar_Set_Update(t *testing.T) { + jar := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + err := jar.Set(Cookie{Host: "example.com", Path: "/", Name: "a", Value: "2"}) + if err != nil { + t.Fatalf("Set() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("after update Set, GetAll() returned %d cookies, want 1", len(cookies)) + } + if cookies[0].Value != "2" { + t.Errorf("cookie value = %q, want %q", cookies[0].Value, "2") + } +} + +func TestStaticCookieJar_Delete(t *testing.T) { + jar := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + Cookie{Host: "other.com", Path: "/", Name: "b", Value: "2"}, + } + err := jar.Delete(Cookie{Host: "example.com", Path: "/", Name: "a"}) + if err != nil { + t.Fatalf("Delete() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("after Delete, GetAll() returned %d cookies, want 1", len(cookies)) + } + if cookies[0].Name != "b" { + t.Errorf("remaining cookie name = %q, want %q", cookies[0].Name, "b") + } +} + +func TestStaticCookieJar_Delete_NotFound(t *testing.T) { + jar := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + err := jar.Delete(Cookie{Host: "nonexistent.com", Path: "/", Name: "x"}) + if err != nil { + t.Fatalf("Delete() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("after no-op Delete, GetAll() returned %d cookies, want 1", len(cookies)) + } +} + +func TestReadOnlyCookieJar_SetIsNoop(t *testing.T) { + inner := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + ro := ReadOnlyCookieJar{Jar: inner} + + err := ro.Set(Cookie{Host: "example.com", Path: "/", Name: "new", Value: "val"}) + if err != nil { + t.Fatalf("Set() error: %v", err) + } + + cookies, _ := inner.GetAll() + if len(cookies) != 1 { + t.Errorf("ReadOnlyCookieJar.Set should be noop, but inner jar has %d cookies", len(cookies)) + } +} + +func TestReadOnlyCookieJar_DeleteIsNoop(t *testing.T) { + inner := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + ro := ReadOnlyCookieJar{Jar: inner} + + err := ro.Delete(Cookie{Host: "example.com", Path: "/", Name: "a"}) + if err != nil { + t.Fatalf("Delete() error: %v", err) + } + + cookies, _ := inner.GetAll() + if len(cookies) != 1 { + t.Errorf("ReadOnlyCookieJar.Delete should be noop, but inner jar has %d cookies", len(cookies)) + } +} + +func TestReadOnlyCookieJar_GetAll(t *testing.T) { + inner := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + ro := ReadOnlyCookieJar{Jar: inner} + + cookies, err := ro.GetAll() + if err != nil { + t.Fatalf("GetAll() error: %v", err) + } + if len(cookies) != 1 { + t.Errorf("ReadOnlyCookieJar.GetAll() returned %d cookies, want 1", len(cookies)) + } +} + +func TestReadOnlyCookieJar_Get(t *testing.T) { + inner := &staticCookieJar{ + Cookie{Host: "example.com", Path: "/", Name: "a", Value: "1"}, + } + ro := ReadOnlyCookieJar{Jar: inner} + + cookies, err := ro.Get("https://example.com/page") + if err != nil { + t.Fatalf("Get() error: %v", err) + } + if len(cookies) != 1 { + t.Errorf("ReadOnlyCookieJar.Get() returned %d cookies, want 1", len(cookies)) + } +} diff --git a/cookies_txt_test.go b/cookies_txt_test.go new file mode 100644 index 0000000..dc6b922 --- /dev/null +++ b/cookies_txt_test.go @@ -0,0 +1,189 @@ +package extractor + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func writeTempCookieFile(t *testing.T, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "cookies.txt") + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("failed to write temp cookie file: %v", err) + } + return path +} + +func TestLoadCookiesFile_Valid(t *testing.T) { + content := ".example.com\tTRUE\t/\tFALSE\t1700000000\tsession\tabc123\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("expected 1 cookie, got %d", len(cookies)) + } + + c := cookies[0] + if c.Host != ".example.com" { + t.Errorf("Host = %q, want %q", c.Host, ".example.com") + } + if !c.HttpOnly { + t.Error("HttpOnly = false, want true") + } + if c.Path != "/" { + t.Errorf("Path = %q, want %q", c.Path, "/") + } + if c.Secure { + t.Error("Secure = true, want false") + } + if c.Name != "session" { + t.Errorf("Name = %q, want %q", c.Name, "session") + } + if c.Value != "abc123" { + t.Errorf("Value = %q, want %q", c.Value, "abc123") + } + if c.Expires.Unix() != 1700000000 { + t.Errorf("Expires = %d, want 1700000000", c.Expires.Unix()) + } +} + +func TestLoadCookiesFile_Comments(t *testing.T) { + content := "# This is a comment\n.example.com\tTRUE\t/\tFALSE\t1700000000\tsession\tabc123\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Errorf("expected 1 cookie (comment skipped), got %d", len(cookies)) + } +} + +func TestLoadCookiesFile_EmptyLines(t *testing.T) { + content := "\n\n.example.com\tTRUE\t/\tFALSE\t1700000000\tsession\tabc123\n\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Errorf("expected 1 cookie (empty lines skipped), got %d", len(cookies)) + } +} + +func TestLoadCookiesFile_ShortLines(t *testing.T) { + content := "too\tfew\tfields\n.example.com\tTRUE\t/\tFALSE\t1700000000\tsession\tabc123\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Errorf("expected 1 cookie (short line skipped), got %d", len(cookies)) + } +} + +func TestLoadCookiesFile_InvalidExpiry(t *testing.T) { + content := ".example.com\tTRUE\t/\tFALSE\tnotanumber\tsession\tabc123\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 1 { + t.Fatalf("expected 1 cookie, got %d", len(cookies)) + } + + // Should have a default expiry ~180 days from now + now := time.Now() + expected := now.Add(180 * 24 * time.Hour) + diff := cookies[0].Expires.Sub(expected) + if diff < -time.Minute || diff > time.Minute { + t.Errorf("invalid expiry default: got %v, expected ~%v", cookies[0].Expires, expected) + } +} + +func TestLoadCookiesFile_HttpOnly(t *testing.T) { + content := ".example.com\tTRUE\t/\tFALSE\t1700000000\ta\t1\n.other.com\tFALSE\t/\tFALSE\t1700000000\tb\t2\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 2 { + t.Fatalf("expected 2 cookies, got %d", len(cookies)) + } + + if !cookies[0].HttpOnly { + t.Error("first cookie HttpOnly = false, want true") + } + if cookies[1].HttpOnly { + t.Error("second cookie HttpOnly = true, want false") + } +} + +func TestLoadCookiesFile_Secure(t *testing.T) { + content := ".example.com\tFALSE\t/\tTRUE\t1700000000\ta\t1\n.other.com\tFALSE\t/\tFALSE\t1700000000\tb\t2\n" + path := writeTempCookieFile(t, content) + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 2 { + t.Fatalf("expected 2 cookies, got %d", len(cookies)) + } + + if !cookies[0].Secure { + t.Error("first cookie Secure = false, want true") + } + if cookies[1].Secure { + t.Error("second cookie Secure = true, want false") + } +} + +func TestLoadCookiesFile_NonexistentFile(t *testing.T) { + _, err := LoadCookiesFile("/nonexistent/path/cookies.txt") + if err == nil { + t.Error("expected error for nonexistent file") + } +} + +func TestLoadCookiesFile_Empty(t *testing.T) { + path := writeTempCookieFile(t, "") + + jar, err := LoadCookiesFile(path) + if err != nil { + t.Fatalf("LoadCookiesFile() error: %v", err) + } + + cookies, _ := jar.GetAll() + if len(cookies) != 0 { + t.Errorf("expected 0 cookies from empty file, got %d", len(cookies)) + } +} diff --git a/document.go b/document.go index 48f6b3a..ef057f2 100644 --- a/document.go +++ b/document.go @@ -25,30 +25,22 @@ type document struct { pw *playwright.Playwright browser playwright.Browser page playwright.Page - root playwright.ElementHandle locator playwright.Locator } func newDocument(pw *playwright.Playwright, browser playwright.Browser, page playwright.Page) (Document, error) { - root, err := page.QuerySelector("html") - - if err != nil { - return nil, err - } - - root2 := page.Locator("html") + locator := page.Locator("html") res := &document{ node: node{ - locator: root2, + locator: locator, }, pw: pw, browser: browser, page: page, - root: root, } - slog.Info("new document", "url", page.URL(), "root", root, "locator", root2) + slog.Info("new document", "url", page.URL(), "locator", locator) return res, nil } @@ -78,21 +70,14 @@ func (d *document) Refresh() error { } func (d *document) WaitForNetworkIdle(timeout *time.Duration) error { - - var f *float64 = nil if timeout == nil { t := 30 * time.Second timeout = &t } - if timeout != nil { - ms := float64(timeout.Milliseconds()) - f = &ms - } - - err := d.page.WaitForLoadState(playwright.PageWaitForLoadStateOptions{ + ms := float64(timeout.Milliseconds()) + return d.page.WaitForLoadState(playwright.PageWaitForLoadStateOptions{ State: playwright.LoadStateNetworkidle, - Timeout: f, + Timeout: &ms, }) - return err } diff --git a/interactive.go b/interactive.go index 8a0b60d..aa3c681 100644 --- a/interactive.go +++ b/interactive.go @@ -227,13 +227,12 @@ func NewInteractiveBrowser(ctx context.Context, opts ...PlayWrightBrowserOptions } func (ib *interactiveBrowser) Navigate(url string) (string, error) { - resp, err := ib.page.Goto(url, playwright.PageGotoOptions{ + _, err := ib.page.Goto(url, playwright.PageGotoOptions{ WaitUntil: playwright.WaitUntilStateLoad, }) if err != nil { return "", fmt.Errorf("navigation failed: %w", err) } - _ = resp return ib.page.URL(), nil } diff --git a/nodes.go b/nodes.go index b88b5ec..103d9cb 100644 --- a/nodes.go +++ b/nodes.go @@ -13,6 +13,9 @@ func (n Nodes) Select(selector string) Nodes { } func (d Nodes) First() Node { + if len(d) == 0 { + return nil + } return d[0] } diff --git a/nodes_test.go b/nodes_test.go new file mode 100644 index 0000000..18623ed --- /dev/null +++ b/nodes_test.go @@ -0,0 +1,111 @@ +package extractor + +import ( + "fmt" + "testing" +) + +// mockNode implements the Node interface for testing. +type mockNode struct { + text string + textErr error + content string + children Nodes +} + +func (m mockNode) Content() (string, error) { return m.content, nil } +func (m mockNode) Text() (string, error) { return m.text, m.textErr } +func (m mockNode) Attr(_ string) (string, error) { return "", nil } +func (m mockNode) Screenshot() ([]byte, error) { return nil, nil } +func (m mockNode) Type(_ string) error { return nil } +func (m mockNode) Click() error { return nil } +func (m mockNode) Select(_ string) Nodes { return m.children } +func (m mockNode) SelectFirst(_ string) Node { return m.children.First() } +func (m mockNode) ForEach(_ string, _ func(Node) error) error { return nil } +func (m mockNode) SetHidden(_ bool) error { return nil } +func (m mockNode) SetAttribute(_, _ string) error { return nil } + +func TestNodes_First_Empty(t *testing.T) { + var nodes Nodes + got := nodes.First() + if got != nil { + t.Errorf("First() on empty Nodes = %v, want nil", got) + } +} + +func TestNodes_First_NonEmpty(t *testing.T) { + n1 := mockNode{text: "first"} + n2 := mockNode{text: "second"} + nodes := Nodes{n1, n2} + + got := nodes.First() + if got == nil { + t.Fatal("First() on non-empty Nodes returned nil") + } + + text, _ := got.Text() + if text != "first" { + t.Errorf("First().Text() = %q, want %q", text, "first") + } +} + +func TestNodes_Select(t *testing.T) { + child1 := mockNode{text: "child1"} + child2 := mockNode{text: "child2"} + child3 := mockNode{text: "child3"} + + n1 := mockNode{children: Nodes{child1, child2}} + n2 := mockNode{children: Nodes{child3}} + + nodes := Nodes{n1, n2} + result := nodes.Select("anything") + + if len(result) != 3 { + t.Errorf("Select() returned %d nodes, want 3", len(result)) + } +} + +func TestNodes_Select_Empty(t *testing.T) { + var nodes Nodes + result := nodes.Select("anything") + if len(result) != 0 { + t.Errorf("Select() on empty Nodes returned %d nodes, want 0", len(result)) + } +} + +func TestNodes_ExtractText(t *testing.T) { + n1 := mockNode{text: "hello"} + n2 := mockNode{text: "world"} + nodes := Nodes{n1, n2} + + texts, err := nodes.ExtractText() + if err != nil { + t.Fatalf("ExtractText() error = %v", err) + } + + if len(texts) != 2 || texts[0] != "hello" || texts[1] != "world" { + t.Errorf("ExtractText() = %v, want [hello world]", texts) + } +} + +func TestNodes_ExtractText_Error(t *testing.T) { + n1 := mockNode{text: "hello"} + n2 := mockNode{textErr: fmt.Errorf("text error")} + nodes := Nodes{n1, n2} + + _, err := nodes.ExtractText() + if err == nil { + t.Fatal("ExtractText() expected error, got nil") + } +} + +func TestNodes_ExtractText_Empty(t *testing.T) { + var nodes Nodes + texts, err := nodes.ExtractText() + if err != nil { + t.Fatalf("ExtractText() error = %v", err) + } + if len(texts) != 0 { + t.Errorf("ExtractText() on empty = %v, want empty", texts) + } +} diff --git a/sites/aislegopher/cmd/aislegopher/aislegopher.go b/sites/aislegopher/cmd/aislegopher/aislegopher.go index 5e599c3..c14127b 100644 --- a/sites/aislegopher/cmd/aislegopher/aislegopher.go +++ b/sites/aislegopher/cmd/aislegopher/aislegopher.go @@ -72,6 +72,7 @@ func main() { err := cli.Run(context.Background(), os.Args) if err != nil { - panic(err) + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } } diff --git a/sites/archive/archive.go b/sites/archive/archive.go index e7f4cd3..cdf8c66 100644 --- a/sites/archive/archive.go +++ b/sites/archive/archive.go @@ -130,10 +130,9 @@ func (c Config) Archive(ctx context.Context, b extractor.Browser, target string) select { case <-ctx.Done(): - fmt.Println("context already done before entering the loop:", ctx.Err()) + slog.Debug("context already done before entering the loop", "err", ctx.Err()) return nil, ctx.Err() default: - fmt.Println("context not done yet") // Proceed with the loop } // now we are waiting for archive.ph to archive the page and redirect us to the archived page @@ -141,6 +140,9 @@ func (c Config) Archive(ctx context.Context, b extractor.Browser, target string) // if the page path starts with /wip/ then we are still waiting // also periodically refresh the page just in case + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + keepGoing := true for keepGoing { select { @@ -148,14 +150,14 @@ func (c Config) Archive(ctx context.Context, b extractor.Browser, target string) slog.Info("context done") keepGoing = false - case <-time.NewTicker(5 * time.Second).C: + case <-ticker.C: archivedUrl, err := url.Parse(doc.URL()) if err != nil { continue } - fmt.Println("checking url:", archivedUrl.String()) + slog.Debug("checking url", "url", archivedUrl.String()) // if the url is not the same as the endpoint, or the path does not start with /wip/ or /submit then we are done if archivedUrl.Hostname() != endpoint.Hostname() || (!strings.HasPrefix(archivedUrl.Path, "/wip/") && !strings.HasPrefix(archivedUrl.Path, "/submit")) { keepGoing = false diff --git a/sites/archive/cmd/archive/main.go b/sites/archive/cmd/archive/main.go index 2467659..b1278db 100644 --- a/sites/archive/cmd/archive/main.go +++ b/sites/archive/cmd/archive/main.go @@ -28,7 +28,7 @@ var Flags = ArchiveFlags{ }, } -func (f ArchiveFlags) ToConfig(_ context.Context, cmd *cli.Command) archive.Config { +func (f ArchiveFlags) ToConfig(_ context.Context, cmd *cli.Command) (archive.Config, error) { c := archive.DefaultConfig if e := cmd.String("endpoint"); e != "" { @@ -38,12 +38,12 @@ func (f ArchiveFlags) ToConfig(_ context.Context, cmd *cli.Command) archive.Conf if t := cmd.String("timeout"); t != "" { d, err := time.ParseDuration(t) if err != nil { - panic(err) + return c, fmt.Errorf("invalid timeout duration: %w", err) } c.Timeout = &d } - return c + return c, nil } func main() { @@ -122,7 +122,8 @@ func main() { err := cli.Run(context.Background(), os.Args) if err != nil { - panic(err) + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } } diff --git a/sites/duckduckgo/cmd/duckduckgo/main.go b/sites/duckduckgo/cmd/duckduckgo/main.go index 196e911..f332b9f 100644 --- a/sites/duckduckgo/cmd/duckduckgo/main.go +++ b/sites/duckduckgo/cmd/duckduckgo/main.go @@ -26,7 +26,7 @@ var Flags = DuckDuckGoFlags{ }, } -func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) duckduckgo.Config { +func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) (duckduckgo.Config, error) { var res = duckduckgo.DefaultConfig if r := cmd.String("region"); r != "" { @@ -42,11 +42,11 @@ func (f DuckDuckGoFlags) ToConfig(cmd *cli.Command) duckduckgo.Config { case "off": res.SafeSearch = duckduckgo.SafeSearchOff default: - panic("invalid safe search value") + return res, fmt.Errorf("invalid safe search value: %s", s) } } - return res + return res, nil } func deferClose(cl io.Closer) { @@ -66,8 +66,10 @@ func main() { Usage: "Search DuckDuckGo", Flags: flags, Action: func(ctx context.Context, command *cli.Command) error { - c := Flags.ToConfig(command) - defer deferClose(nil) + c, err := Flags.ToConfig(command) + if err != nil { + return err + } query := strings.TrimSpace(strings.Join(command.Args().Slice(), " ")) @@ -105,9 +107,8 @@ func main() { }, } - err := cli.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := cli.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } } diff --git a/sites/duckduckgo/duckduckgo_test.go b/sites/duckduckgo/duckduckgo_test.go new file mode 100644 index 0000000..0733b9e --- /dev/null +++ b/sites/duckduckgo/duckduckgo_test.go @@ -0,0 +1,85 @@ +package duckduckgo + +import ( + "testing" +) + +func TestConfig_ToSearchURL_Default(t *testing.T) { + c := Config{SafeSearch: SafeSearchOff} + u := c.ToSearchURL("test query") + + if u.Host != "duckduckgo.com" { + t.Errorf("Host = %q, want %q", u.Host, "duckduckgo.com") + } + + if u.Query().Get("q") != "test query" { + t.Errorf("q = %q, want %q", u.Query().Get("q"), "test query") + } + + if u.Query().Get("kp") != "-2" { + t.Errorf("kp = %q, want %q", u.Query().Get("kp"), "-2") + } +} + +func TestConfig_ToSearchURL_SafeSearchOn(t *testing.T) { + c := Config{SafeSearch: SafeSearchOn} + u := c.ToSearchURL("test") + + if u.Query().Get("kp") != "1" { + t.Errorf("kp = %q, want %q", u.Query().Get("kp"), "1") + } +} + +func TestConfig_ToSearchURL_SafeSearchModerate(t *testing.T) { + c := Config{SafeSearch: SafeSearchModerate} + u := c.ToSearchURL("test") + + if u.Query().Get("kp") != "-1" { + t.Errorf("kp = %q, want %q", u.Query().Get("kp"), "-1") + } +} + +func TestConfig_ToSearchURL_SafeSearchOff(t *testing.T) { + c := Config{SafeSearch: SafeSearchOff} + u := c.ToSearchURL("test") + + if u.Query().Get("kp") != "-2" { + t.Errorf("kp = %q, want %q", u.Query().Get("kp"), "-2") + } +} + +func TestConfig_ToSearchURL_WithRegion(t *testing.T) { + c := Config{SafeSearch: SafeSearchOff, Region: "us-en"} + u := c.ToSearchURL("test") + + if u.Query().Get("kl") != "us-en" { + t.Errorf("kl = %q, want %q", u.Query().Get("kl"), "us-en") + } +} + +func TestConfig_ToSearchURL_WithQuery(t *testing.T) { + c := Config{SafeSearch: SafeSearchOff} + u := c.ToSearchURL("golang testing") + + if u.Query().Get("q") != "golang testing" { + t.Errorf("q = %q, want %q", u.Query().Get("q"), "golang testing") + } +} + +func TestConfig_Validate_DefaultsSafeSearch(t *testing.T) { + c := Config{SafeSearch: 0} + c = c.validate() + + if c.SafeSearch != SafeSearchOff { + t.Errorf("validate() SafeSearch = %d, want %d (SafeSearchOff)", c.SafeSearch, SafeSearchOff) + } +} + +func TestConfig_ToSearchURL_NoRegion(t *testing.T) { + c := Config{SafeSearch: SafeSearchOff} + u := c.ToSearchURL("test") + + if u.Query().Get("kl") != "" { + t.Errorf("kl should be empty when no region, got %q", u.Query().Get("kl")) + } +} diff --git a/sites/google/cmd/google/main.go b/sites/google/cmd/google/main.go index 2575815..d195b53 100644 --- a/sites/google/cmd/google/main.go +++ b/sites/google/cmd/google/main.go @@ -87,9 +87,8 @@ func main() { }, } - err := cli.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := cli.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } } diff --git a/sites/google/google.go b/sites/google/google.go index dad0330..d592e23 100644 --- a/sites/google/google.go +++ b/sites/google/google.go @@ -57,14 +57,17 @@ func deferClose(cl io.Closer) { func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) { c = c.validate() - u, err := url.Parse(fmt.Sprintf("https://%s/search?q=%s", c.BaseURL, query)) + u, err := url.Parse(fmt.Sprintf("https://%s/search", c.BaseURL)) if err != nil { return nil, fmt.Errorf("invalid url: %w", err) } + vals := u.Query() + vals.Set("q", query) + if c.Language != "" { - u.Query().Set("hl", c.Language) + vals.Set("hl", c.Language) } if c.Country != "" { @@ -84,10 +87,12 @@ func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ( } if country != "" { - u.Query().Set("cr", country) + vals.Set("cr", country) } } + u.RawQuery = vals.Encode() + doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{}) if err != nil { diff --git a/sites/google/google_test.go b/sites/google/google_test.go new file mode 100644 index 0000000..a25ced4 --- /dev/null +++ b/sites/google/google_test.go @@ -0,0 +1,39 @@ +package google + +import ( + "testing" +) + +func TestConfig_Validate_Defaults(t *testing.T) { + c := Config{} + c = c.validate() + + if c.BaseURL != "google.com" { + t.Errorf("BaseURL = %q, want %q", c.BaseURL, "google.com") + } + if c.Language != "en" { + t.Errorf("Language = %q, want %q", c.Language, "en") + } + if c.Country != "us" { + t.Errorf("Country = %q, want %q", c.Country, "us") + } +} + +func TestConfig_Validate_Preserves(t *testing.T) { + c := Config{ + BaseURL: "google.co.uk", + Language: "fr", + Country: "uk", + } + c = c.validate() + + if c.BaseURL != "google.co.uk" { + t.Errorf("BaseURL = %q, want %q", c.BaseURL, "google.co.uk") + } + if c.Language != "fr" { + t.Errorf("Language = %q, want %q", c.Language, "fr") + } + if c.Country != "uk" { + t.Errorf("Country = %q, want %q", c.Country, "uk") + } +} diff --git a/sites/megamillions/cmd/megamillions.go b/sites/megamillions/cmd/megamillions.go index 81cde2b..5d1f82a 100644 --- a/sites/megamillions/cmd/megamillions.go +++ b/sites/megamillions/cmd/megamillions.go @@ -51,10 +51,8 @@ func main() { }, } - err := cli.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := cli.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } - } diff --git a/sites/megamillions/megamillions.go b/sites/megamillions/megamillions.go index 4e9d414..fa92cf4 100644 --- a/sites/megamillions/megamillions.go +++ b/sites/megamillions/megamillions.go @@ -64,7 +64,6 @@ func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) { return nil, fmt.Errorf("failed to parse date: %w", err) } - fmt.Println("ticks", ticks) drawing.Date = netTicksToTime(ticks) err = doc.ForEach("ul.numbers li.ball", func(n extractor.Node) error { @@ -199,23 +198,12 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er numeric := numericOnly(txt) - set := false if strings.Contains(txt, "Billion") { - amt := currency.USD.Amount(numeric * 1000000000) - nextDrawing.Jackpot = amt - set = true + nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000000) } else if strings.Contains(txt, "Million") { - amt := currency.USD.Amount(numeric * 1000000) - nextDrawing.Jackpot = amt - set = true + nextDrawing.Jackpot = currency.USD.Amount(numeric * 1000000) } else { - amt := currency.USD.Amount(numeric) - nextDrawing.Jackpot = amt - set = true - } - - if !set { - return nil, fmt.Errorf("failed to convert jackpot to currency: %w", err) + nextDrawing.Jackpot = currency.USD.Amount(numeric) } return &nextDrawing, nil diff --git a/sites/megamillions/megamillions_test.go b/sites/megamillions/megamillions_test.go new file mode 100644 index 0000000..a6d6ba8 --- /dev/null +++ b/sites/megamillions/megamillions_test.go @@ -0,0 +1,43 @@ +package megamillions + +import ( + "testing" + "time" +) + +func TestNetTicksToTime_Consistency(t *testing.T) { + // netTicksToTime converts .NET ticks to Go time. + // Verify it produces consistent results for the same input. + ticks := int64(638396256000000000) + t1 := netTicksToTime(ticks) + t2 := netTicksToTime(ticks) + + if !t1.Equal(t2) { + t.Errorf("netTicksToTime is not consistent: %v != %v", t1, t2) + } +} + +func TestNetTicksToTime_Ordering(t *testing.T) { + // A larger ticks value should produce a later time. + earlier := netTicksToTime(638396256000000000) + later := netTicksToTime(638396256100000000) // 10 seconds later in ticks + + if !later.After(earlier) { + t.Errorf("expected later ticks to produce later time: %v vs %v", earlier, later) + } +} + +func TestNetTicksToTime_DifferenceIsCorrect(t *testing.T) { + // .NET ticks are 100-nanosecond intervals. + // 10,000,000 ticks = 1 second. + ticks1 := int64(638396256000000000) + ticks2 := ticks1 + 10000000 // 1 second later + + t1 := netTicksToTime(ticks1) + t2 := netTicksToTime(ticks2) + + diff := t2.Sub(t1) + if diff != time.Second { + t.Errorf("expected 1 second difference, got %v", diff) + } +} diff --git a/sites/powerball/cmd/powerball.go b/sites/powerball/cmd/powerball.go index 5801add..a736605 100644 --- a/sites/powerball/cmd/powerball.go +++ b/sites/powerball/cmd/powerball.go @@ -51,10 +51,8 @@ func main() { }, } - err := cli.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := cli.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } - } diff --git a/sites/powerball/powerball.go b/sites/powerball/powerball.go index d9f5f51..6951e03 100644 --- a/sites/powerball/powerball.go +++ b/sites/powerball/powerball.go @@ -41,9 +41,20 @@ func deferClose(cl io.Closer) { func getDrawing(_ context.Context, doc extractor.Document) (*Drawing, error) { var drawing Drawing - dateStr, err := doc.SelectFirst("#numbers .title-date").Text() + dateNode := doc.SelectFirst("#numbers .title-date") + if dateNode == nil { + return nil, fmt.Errorf("failed to find date element") + } + + dateStr, err := dateNode.Text() + if err != nil { + return nil, fmt.Errorf("failed to get date text: %w", err) + } drawing.Date, err = time.Parse("Mon, Jan 2, 2006", dateStr) + if err != nil { + return nil, fmt.Errorf("failed to parse date %q: %w", dateStr, err) + } nums := doc.Select("div.game-ball-group div.white-balls") @@ -165,22 +176,12 @@ func getNextDrawing(_ context.Context, doc extractor.Document) (*NextDrawing, er numeric := numericOnly(txt) - set := false if strings.Contains(txt, "Billion") { - amt := numeric * 1000000000 - nextDrawing.JackpotDollars = int(amt) - set = true + nextDrawing.JackpotDollars = int(numeric * 1000000000) } else if strings.Contains(txt, "Million") { - amt := numeric * 1000000 - nextDrawing.JackpotDollars = int(amt) - set = true + nextDrawing.JackpotDollars = int(numeric * 1000000) } else { nextDrawing.JackpotDollars = int(numeric) - set = true - } - - if !set { - return nil, fmt.Errorf("failed to convert jackpot to currency: %w", err) } return &nextDrawing, nil diff --git a/sites/useragents/cmd/useragents.go b/sites/useragents/cmd/useragents.go index cd849f6..73991df 100644 --- a/sites/useragents/cmd/useragents.go +++ b/sites/useragents/cmd/useragents.go @@ -49,10 +49,8 @@ func main() { }, } - err := cli.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := cli.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } - } diff --git a/sites/useragents/useragents.go b/sites/useragents/useragents.go index 8613f00..696b4e7 100644 --- a/sites/useragents/useragents.go +++ b/sites/useragents/useragents.go @@ -44,8 +44,6 @@ func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.B } data := []map[string]any{} - fmt.Println("text", text) - err = json.Unmarshal([]byte(text), &data) if err != nil { @@ -63,8 +61,12 @@ func (c Config) GetMostCommonDesktopUserAgent(ctx context.Context, b extractor.B } if pct > highestPct { + ua, ok := agent["ua"].(string) + if !ok { + continue + } highestPct = pct - highestAgent = agent["ua"].(string) + highestAgent = ua } } diff --git a/sites/wegmans/cmd/wegmans/main.go b/sites/wegmans/cmd/wegmans/main.go index e9237d0..7b26b26 100644 --- a/sites/wegmans/cmd/wegmans/main.go +++ b/sites/wegmans/cmd/wegmans/main.go @@ -73,9 +73,8 @@ func main() { }, } - err := app.Run(context.Background(), os.Args) - - if err != nil { - panic(err) + if err := app.Run(context.Background(), os.Args); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) } }