- Fix Nodes.First() panic on empty slice (return nil) - Fix ticker leak in archive.go (create once, defer Stop) - Fix cookie path matching for empty and root paths - Fix lost query params in google.go (u.Query().Set was discarded) - Fix type assertion panic in useragents.go - Fix dropped date parse error in powerball.go - Remove unreachable dead code in megamillions.go and powerball.go - Simplify document.go WaitForNetworkIdle, remove unused root field - Remove debug fmt.Println calls across codebase - Replace panic(err) with stderr+exit in all cmd/ programs - Fix duckduckgo cmd: remove useless defer, return error on bad safesearch - Fix archive cmd: ToConfig returns error instead of panicking - Add 39+ unit tests across 6 new test files - Add Gitea Actions CI workflow (build, test, vet in parallel) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
83 lines
1.8 KiB
Go
83 lines
1.8 KiB
Go
package extractor
|
|
|
|
import (
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type Cookie struct {
|
|
Host string
|
|
Path string
|
|
Expires time.Time
|
|
Secure bool
|
|
HttpOnly bool
|
|
Name string
|
|
Value string
|
|
}
|
|
|
|
func (c Cookie) IsTargetMatch(target string) (bool, error) {
|
|
u, err := url.Parse(target)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// the host of the cookie is the same as the host of the target
|
|
// if the cookie host starts with a dot, that means it matches any subdomain
|
|
if c.Host == u.Host || strings.HasPrefix(c.Host, ".") && strings.HasSuffix(u.Host, c.Host) {
|
|
if c.Path == "" {
|
|
return true, nil
|
|
}
|
|
|
|
if !strings.HasPrefix(u.Path, c.Path) {
|
|
return false, nil
|
|
}
|
|
|
|
// if the cookie path is a prefix of the target path, then it's a match
|
|
// so now these would both match:
|
|
// cookie path: /foo
|
|
// target path: /foo/bar
|
|
// cookie path: /foo
|
|
// target path: /foosball
|
|
// because foseball is not an actual match, we need to check to see that either the path is an exact match
|
|
// or that the next character in the target path is a slash
|
|
|
|
if len(u.Path) > len(c.Path) && !strings.HasSuffix(c.Path, "/") && u.Path[len(c.Path)] != '/' {
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
type CookieJar interface {
|
|
GetAll() ([]Cookie, error)
|
|
Get(url string) ([]Cookie, error)
|
|
Set(cookie Cookie) error
|
|
Delete(cookie Cookie) error
|
|
}
|
|
|
|
// ReadOnlyCookieJar is a wrapper for CookieJar that allows only read operations on cookies, but all
|
|
// write operations are no-ops.
|
|
type ReadOnlyCookieJar struct {
|
|
Jar CookieJar
|
|
}
|
|
|
|
func (r ReadOnlyCookieJar) GetAll() ([]Cookie, error) {
|
|
return r.Jar.GetAll()
|
|
}
|
|
|
|
func (r ReadOnlyCookieJar) Get(url string) ([]Cookie, error) {
|
|
return r.Jar.Get(url)
|
|
}
|
|
|
|
func (r ReadOnlyCookieJar) Set(_ Cookie) error {
|
|
return nil
|
|
}
|
|
|
|
func (r ReadOnlyCookieJar) Delete(_ Cookie) error {
|
|
return nil
|
|
}
|