Files
go-extractor/sites/google/google.go
Steve Dudenhoeffer 963696cd62
All checks were successful
CI / vet (pull_request) Successful in 40s
CI / build (pull_request) Successful in 1m22s
CI / test (pull_request) Successful in 1m28s
enhance: thread-safe CookieJar, SameSite cookie attr, dynamic Google countries
- Wrap staticCookieJar in struct with sync.RWMutex for thread safety
- Add SameSite field to Cookie struct with Strict/Lax/None constants
- Update Playwright cookie conversion functions for SameSite
- Replace hardcoded 4-country switch with dynamic country code generation

Closes #20, #22, #23
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:34:54 +00:00

134 lines
2.4 KiB
Go

package google
import (
"context"
"fmt"
"log/slog"
"net/url"
"strings"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
)
type Config struct {
// BaseURL is the base URL for the search engine, if empty "google.com" is used
BaseURL string
// Language is the language to use for the search engine, if empty "en" is used
Language string
// Country is the country to use for the search engine, if empty "us" is used
Country string
}
var DefaultConfig = Config{
BaseURL: "google.com",
Language: "en",
Country: "us",
}
func (c Config) validate() Config {
if c.BaseURL == "" {
c.BaseURL = "google.com"
}
if c.Language == "" {
c.Language = "en"
}
if c.Country == "" {
c.Country = "us"
}
return c
}
type Result struct {
URL string
Title string
Description string
}
func (c Config) Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
c = c.validate()
u, err := url.Parse(fmt.Sprintf("https://%s/search", c.BaseURL))
if err != nil {
return nil, fmt.Errorf("invalid url: %w", err)
}
vals := u.Query()
vals.Set("q", query)
if c.Language != "" {
vals.Set("hl", c.Language)
}
if c.Country != "" {
vals.Set("cr", "country"+strings.ToUpper(c.Country))
}
u.RawQuery = vals.Encode()
doc, err := b.Open(ctx, u.String(), extractor.OpenPageOptions{})
if err != nil {
return nil, fmt.Errorf("failed to open url: %w", err)
}
defer extractor.DeferClose(doc)
var res []Result
err = doc.ForEach("div.g", func(s extractor.Node) error {
var u string
var title string
var desc string
// get the first link in the div
link := s.Select("a")
if len(link) == 0 {
return nil
}
u, err := link[0].Attr("href")
if err != nil {
return fmt.Errorf("failed to get link: %w", err)
}
titles := s.Select("div > div > div a > h3")
if len(titles) != 0 {
title, err = titles[0].Text()
if err != nil {
slog.Warn("failed to get result title", "err", err)
}
}
descs := s.Select("div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:not([class])")
if len(descs) != 0 {
desc, err = descs[0].Text()
if err != nil {
slog.Warn("failed to get result description", "err", err)
}
}
res = append(res, Result{
URL: u,
Title: title,
Description: desc,
})
return nil
})
return res, err
}
func Search(ctx context.Context, b extractor.Browser, query string) ([]Result, error) {
return DefaultConfig.Search(ctx, b, query)
}