365 lines
12 KiB
Go
365 lines
12 KiB
Go
package googlesearch
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/gocolly/colly/v2"
|
|
)
|
|
|
|
// Result represents a single result from Google Search.
|
|
type Result struct {
|
|
|
|
// Rank is the order number of the search result.
|
|
Rank int `json:"rank"`
|
|
|
|
// URL of result.
|
|
URL string `json:"url"`
|
|
|
|
// Title of result.
|
|
Title string `json:"title"`
|
|
|
|
// Description of the result.
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
// GoogleDomains represents localized Google homepages. The 2 letter country code is based on ISO 3166-1 alpha-2.
|
|
//
|
|
// See: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
|
var GoogleDomains = map[string]string{
|
|
"us": "https://www.google.com/search?q=",
|
|
"ac": "https://www.google.ac/search?q=",
|
|
"ad": "https://www.google.ad/search?q=",
|
|
"ae": "https://www.google.ae/search?q=",
|
|
"af": "https://www.google.com.af/search?q=",
|
|
"ag": "https://www.google.com.ag/search?q=",
|
|
"ai": "https://www.google.com.ai/search?q=",
|
|
"al": "https://www.google.al/search?q=",
|
|
"am": "https://www.google.am/search?q=",
|
|
"ao": "https://www.google.co.ao/search?q=",
|
|
"ar": "https://www.google.com.ar/search?q=",
|
|
"as": "https://www.google.as/search?q=",
|
|
"at": "https://www.google.at/search?q=",
|
|
"au": "https://www.google.com.au/search?q=",
|
|
"az": "https://www.google.az/search?q=",
|
|
"ba": "https://www.google.ba/search?q=",
|
|
"bd": "https://www.google.com.bd/search?q=",
|
|
"be": "https://www.google.be/search?q=",
|
|
"bf": "https://www.google.bf/search?q=",
|
|
"bg": "https://www.google.bg/search?q=",
|
|
"bh": "https://www.google.com.bh/search?q=",
|
|
"bi": "https://www.google.bi/search?q=",
|
|
"bj": "https://www.google.bj/search?q=",
|
|
"bn": "https://www.google.com.bn/search?q=",
|
|
"bo": "https://www.google.com.bo/search?q=",
|
|
"br": "https://www.google.com.br/search?q=",
|
|
"bs": "https://www.google.bs/search?q=",
|
|
"bt": "https://www.google.bt/search?q=",
|
|
"bw": "https://www.google.co.bw/search?q=",
|
|
"by": "https://www.google.by/search?q=",
|
|
"bz": "https://www.google.com.bz/search?q=",
|
|
"ca": "https://www.google.ca/search?q=",
|
|
"kh": "https://www.google.com.kh/search?q=",
|
|
"cc": "https://www.google.cc/search?q=",
|
|
"cd": "https://www.google.cd/search?q=",
|
|
"cf": "https://www.google.cf/search?q=",
|
|
"cat": "https://www.google.cat/search?q=",
|
|
"cg": "https://www.google.cg/search?q=",
|
|
"ch": "https://www.google.ch/search?q=",
|
|
"ci": "https://www.google.ci/search?q=",
|
|
"ck": "https://www.google.co.ck/search?q=",
|
|
"cl": "https://www.google.cl/search?q=",
|
|
"cm": "https://www.google.cm/search?q=",
|
|
"cn": "https://www.google.cn/search?q=",
|
|
"co": "https://www.google.com.co/search?q=",
|
|
"cr": "https://www.google.co.cr/search?q=",
|
|
"cu": "https://www.google.com.cu/search?q=",
|
|
"cv": "https://www.google.cv/search?q=",
|
|
"cy": "https://www.google.com.cy/search?q=",
|
|
"cz": "https://www.google.cz/search?q=",
|
|
"de": "https://www.google.de/search?q=",
|
|
"dj": "https://www.google.dj/search?q=",
|
|
"dk": "https://www.google.dk/search?q=",
|
|
"dm": "https://www.google.dm/search?q=",
|
|
"do": "https://www.google.com.do/search?q=",
|
|
"dz": "https://www.google.dz/search?q=",
|
|
"ec": "https://www.google.com.ec/search?q=",
|
|
"ee": "https://www.google.ee/search?q=",
|
|
"eg": "https://www.google.com.eg/search?q=",
|
|
"es": "https://www.google.es/search?q=",
|
|
"et": "https://www.google.com.et/search?q=",
|
|
"fi": "https://www.google.fi/search?q=",
|
|
"fj": "https://www.google.com.fj/search?q=",
|
|
"fm": "https://www.google.fm/search?q=",
|
|
"fr": "https://www.google.fr/search?q=",
|
|
"ga": "https://www.google.ga/search?q=",
|
|
"gb": "https://www.google.co.uk/search?q=",
|
|
"ge": "https://www.google.ge/search?q=",
|
|
"gf": "https://www.google.gf/search?q=",
|
|
"gg": "https://www.google.gg/search?q=",
|
|
"gh": "https://www.google.com.gh/search?q=",
|
|
"gi": "https://www.google.com.gi/search?q=",
|
|
"gl": "https://www.google.gl/search?q=",
|
|
"gm": "https://www.google.gm/search?q=",
|
|
"gp": "https://www.google.gp/search?q=",
|
|
"gr": "https://www.google.gr/search?q=",
|
|
"gt": "https://www.google.com.gt/search?q=",
|
|
"gy": "https://www.google.gy/search?q=",
|
|
"hk": "https://www.google.com.hk/search?q=",
|
|
"hn": "https://www.google.hn/search?q=",
|
|
"hr": "https://www.google.hr/search?q=",
|
|
"ht": "https://www.google.ht/search?q=",
|
|
"hu": "https://www.google.hu/search?q=",
|
|
"id": "https://www.google.co.id/search?q=",
|
|
"iq": "https://www.google.iq/search?q=",
|
|
"ie": "https://www.google.ie/search?q=",
|
|
"il": "https://www.google.co.il/search?q=",
|
|
"im": "https://www.google.im/search?q=",
|
|
"in": "https://www.google.co.in/search?q=",
|
|
"io": "https://www.google.io/search?q=",
|
|
"is": "https://www.google.is/search?q=",
|
|
"it": "https://www.google.it/search?q=",
|
|
"je": "https://www.google.je/search?q=",
|
|
"jm": "https://www.google.com.jm/search?q=",
|
|
"jo": "https://www.google.jo/search?q=",
|
|
"jp": "https://www.google.co.jp/search?q=",
|
|
"ke": "https://www.google.co.ke/search?q=",
|
|
"ki": "https://www.google.ki/search?q=",
|
|
"kg": "https://www.google.kg/search?q=",
|
|
"kr": "https://www.google.co.kr/search?q=",
|
|
"kw": "https://www.google.com.kw/search?q=",
|
|
"kz": "https://www.google.kz/search?q=",
|
|
"la": "https://www.google.la/search?q=",
|
|
"lb": "https://www.google.com.lb/search?q=",
|
|
"lc": "https://www.google.com.lc/search?q=",
|
|
"li": "https://www.google.li/search?q=",
|
|
"lk": "https://www.google.lk/search?q=",
|
|
"ls": "https://www.google.co.ls/search?q=",
|
|
"lt": "https://www.google.lt/search?q=",
|
|
"lu": "https://www.google.lu/search?q=",
|
|
"lv": "https://www.google.lv/search?q=",
|
|
"ly": "https://www.google.com.ly/search?q=",
|
|
"ma": "https://www.google.co.ma/search?q=",
|
|
"md": "https://www.google.md/search?q=",
|
|
"me": "https://www.google.me/search?q=",
|
|
"mg": "https://www.google.mg/search?q=",
|
|
"mk": "https://www.google.mk/search?q=",
|
|
"ml": "https://www.google.ml/search?q=",
|
|
"mm": "https://www.google.com.mm/search?q=",
|
|
"mn": "https://www.google.mn/search?q=",
|
|
"ms": "https://www.google.ms/search?q=",
|
|
"mt": "https://www.google.com.mt/search?q=",
|
|
"mu": "https://www.google.mu/search?q=",
|
|
"mv": "https://www.google.mv/search?q=",
|
|
"mw": "https://www.google.mw/search?q=",
|
|
"mx": "https://www.google.com.mx/search?q=",
|
|
"my": "https://www.google.com.my/search?q=",
|
|
"mz": "https://www.google.co.mz/search?q=",
|
|
"na": "https://www.google.com.na/search?q=",
|
|
"ne": "https://www.google.ne/search?q=",
|
|
"nf": "https://www.google.com.nf/search?q=",
|
|
"ng": "https://www.google.com.ng/search?q=",
|
|
"ni": "https://www.google.com.ni/search?q=",
|
|
"nl": "https://www.google.nl/search?q=",
|
|
"no": "https://www.google.no/search?q=",
|
|
"np": "https://www.google.com.np/search?q=",
|
|
"nr": "https://www.google.nr/search?q=",
|
|
"nu": "https://www.google.nu/search?q=",
|
|
"nz": "https://www.google.co.nz/search?q=",
|
|
"om": "https://www.google.com.om/search?q=",
|
|
"pa": "https://www.google.com.pa/search?q=",
|
|
"pe": "https://www.google.com.pe/search?q=",
|
|
"ph": "https://www.google.com.ph/search?q=",
|
|
"pk": "https://www.google.com.pk/search?q=",
|
|
"pl": "https://www.google.pl/search?q=",
|
|
"pg": "https://www.google.com.pg/search?q=",
|
|
"pn": "https://www.google.pn/search?q=",
|
|
"pr": "https://www.google.com.pr/search?q=",
|
|
"ps": "https://www.google.ps/search?q=",
|
|
"pt": "https://www.google.pt/search?q=",
|
|
"py": "https://www.google.com.py/search?q=",
|
|
"qa": "https://www.google.com.qa/search?q=",
|
|
"ro": "https://www.google.ro/search?q=",
|
|
"rs": "https://www.google.rs/search?q=",
|
|
"ru": "https://www.google.ru/search?q=",
|
|
"rw": "https://www.google.rw/search?q=",
|
|
"sa": "https://www.google.com.sa/search?q=",
|
|
"sb": "https://www.google.com.sb/search?q=",
|
|
"sc": "https://www.google.sc/search?q=",
|
|
"se": "https://www.google.se/search?q=",
|
|
"sg": "https://www.google.com.sg/search?q=",
|
|
"sh": "https://www.google.sh/search?q=",
|
|
"si": "https://www.google.si/search?q=",
|
|
"sk": "https://www.google.sk/search?q=",
|
|
"sl": "https://www.google.com.sl/search?q=",
|
|
"sn": "https://www.google.sn/search?q=",
|
|
"sm": "https://www.google.sm/search?q=",
|
|
"so": "https://www.google.so/search?q=",
|
|
"st": "https://www.google.st/search?q=",
|
|
"sv": "https://www.google.com.sv/search?q=",
|
|
"td": "https://www.google.td/search?q=",
|
|
"tg": "https://www.google.tg/search?q=",
|
|
"th": "https://www.google.co.th/search?q=",
|
|
"tj": "https://www.google.com.tj/search?q=",
|
|
"tk": "https://www.google.tk/search?q=",
|
|
"tl": "https://www.google.tl/search?q=",
|
|
"tm": "https://www.google.tm/search?q=",
|
|
"to": "https://www.google.to/search?q=",
|
|
"tn": "https://www.google.tn/search?q=",
|
|
"tr": "https://www.google.com.tr/search?q=",
|
|
"tt": "https://www.google.tt/search?q=",
|
|
"tw": "https://www.google.com.tw/search?q=",
|
|
"tz": "https://www.google.co.tz/search?q=",
|
|
"ua": "https://www.google.com.ua/search?q=",
|
|
"ug": "https://www.google.co.ug/search?q=",
|
|
"uk": "https://www.google.co.uk/search?q=",
|
|
"uy": "https://www.google.com.uy/search?q=",
|
|
"uz": "https://www.google.co.uz/search?q=",
|
|
"vc": "https://www.google.com.vc/search?q=",
|
|
"ve": "https://www.google.co.ve/search?q=",
|
|
"vg": "https://www.google.vg/search?q=",
|
|
"vi": "https://www.google.co.vi/search?q=",
|
|
"vn": "https://www.google.com.vn/search?q=",
|
|
"vu": "https://www.google.vu/search?q=",
|
|
"ws": "https://www.google.ws/search?q=",
|
|
"za": "https://www.google.co.za/search?q=",
|
|
"zm": "https://www.google.co.zm/search?q=",
|
|
"zw": "https://www.google.co.zw/search?q=",
|
|
}
|
|
|
|
// SearchOptions modifies how the Search function behaves.
|
|
type SearchOptions struct {
|
|
|
|
// CountryCode sets the ISO 3166-1 alpha-2 code of the localized Google Search homepage to use.
|
|
// The default is "us", which will return results from https://www.google.com.
|
|
CountryCode string
|
|
|
|
// LanguageCode sets the language code.
|
|
// Default: en
|
|
LanguageCode string
|
|
|
|
// Limit sets how many results to fetch (at maximum).
|
|
Limit int
|
|
|
|
// Start sets from what rank the new result set should return.
|
|
Start int
|
|
|
|
// UserAgent sets the UserAgent of the http request.
|
|
// Default: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
|
|
UserAgent string
|
|
}
|
|
|
|
// Search returns a list of search results from Google.
|
|
func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Result, error) {
|
|
|
|
c := colly.NewCollector(colly.MaxDepth(1))
|
|
if len(opts) == 0 {
|
|
opts = append(opts, SearchOptions{})
|
|
}
|
|
|
|
if opts[0].UserAgent == "" {
|
|
c.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
|
|
} else {
|
|
c.UserAgent = opts[0].UserAgent
|
|
}
|
|
|
|
var lc string
|
|
if opts[0].LanguageCode == "" {
|
|
lc = "en"
|
|
} else {
|
|
lc = opts[0].LanguageCode
|
|
}
|
|
|
|
results := []Result{}
|
|
var rErr error
|
|
rank := 1
|
|
|
|
c.OnRequest(func(r *colly.Request) {
|
|
if err := ctx.Err(); err != nil {
|
|
r.Abort()
|
|
rErr = err
|
|
return
|
|
}
|
|
})
|
|
|
|
c.OnError(func(r *colly.Response, err error) {
|
|
rErr = err
|
|
})
|
|
|
|
c.OnHTML("div.g", func(e *colly.HTMLElement) {
|
|
|
|
sel := e.DOM
|
|
|
|
for i := range sel.Nodes {
|
|
if err := ctx.Err(); err != nil {
|
|
rErr = err
|
|
return
|
|
}
|
|
|
|
item := sel.Eq(i)
|
|
|
|
rDiv := item.Find("div.rc")
|
|
|
|
linkHref, _ := rDiv.Find("a").Attr("href")
|
|
linkText := strings.TrimSpace(linkHref)
|
|
titleText := strings.TrimSpace(rDiv.Find("h3").Text())
|
|
|
|
descText := strings.TrimSpace(rDiv.Find("div > div > span > span").Text())
|
|
|
|
if linkText != "" && linkText != "#" {
|
|
result := Result{
|
|
Rank: rank,
|
|
URL: linkText,
|
|
Title: titleText,
|
|
Description: descText,
|
|
}
|
|
results = append(results, result)
|
|
rank += 1
|
|
}
|
|
}
|
|
})
|
|
|
|
url := url(searchTerm, opts[0].CountryCode, lc, opts[0].Limit, opts[0].Start)
|
|
c.Visit(url)
|
|
|
|
if rErr != nil {
|
|
return nil, rErr
|
|
}
|
|
|
|
// Reduce results to max limit
|
|
if opts[0].Limit != 0 && len(results) > opts[0].Limit {
|
|
return results[:opts[0].Limit], nil
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func url(searchTerm string, countryCode string, languageCode string, limit int, start int) string {
|
|
searchTerm = strings.Trim(searchTerm, " ")
|
|
searchTerm = strings.Replace(searchTerm, " ", "+", -1)
|
|
countryCode = strings.ToLower(countryCode)
|
|
|
|
var url string
|
|
|
|
if googleBase, found := GoogleDomains[countryCode]; found {
|
|
if start == 0 {
|
|
url = fmt.Sprintf("%s%s&hl=%s", googleBase, searchTerm, languageCode)
|
|
} else {
|
|
url = fmt.Sprintf("%s%s&hl=%s&start=%d", googleBase, searchTerm, languageCode, start)
|
|
}
|
|
} else {
|
|
if start == 0 {
|
|
url = fmt.Sprintf("%s%s&hl=%s", GoogleDomains["us"], searchTerm, languageCode)
|
|
} else {
|
|
url = fmt.Sprintf("%s%s&hl=%s&start=%d", GoogleDomains["us"], searchTerm, languageCode, start)
|
|
}
|
|
}
|
|
|
|
if limit != 0 {
|
|
url = fmt.Sprintf("%s&num=%d", url, int(float64(limit)*1.5)) // Factor in ads etc
|
|
}
|
|
|
|
return url
|
|
}
|