diff --git a/search.go b/search.go index 4298e14..985234a 100644 --- a/search.go +++ b/search.go @@ -27,208 +27,210 @@ type Result struct { Description string `json:"description"` } +const stdGoogleBase = "https://www.google." + // GoogleDomains represents localized Google homepages. The 2 letter country code is based on ISO 3166-1 alpha-2. // // See: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 var GoogleDomains = map[string]string{ - "us": "https://www.google.com/search?q=", - "ac": "https://www.google.ac/search?q=", - "ad": "https://www.google.ad/search?q=", - "ae": "https://www.google.ae/search?q=", - "af": "https://www.google.com.af/search?q=", - "ag": "https://www.google.com.ag/search?q=", - "ai": "https://www.google.com.ai/search?q=", - "al": "https://www.google.al/search?q=", - "am": "https://www.google.am/search?q=", - "ao": "https://www.google.co.ao/search?q=", - "ar": "https://www.google.com.ar/search?q=", - "as": "https://www.google.as/search?q=", - "at": "https://www.google.at/search?q=", - "au": "https://www.google.com.au/search?q=", - "az": "https://www.google.az/search?q=", - "ba": "https://www.google.ba/search?q=", - "bd": "https://www.google.com.bd/search?q=", - "be": "https://www.google.be/search?q=", - "bf": "https://www.google.bf/search?q=", - "bg": "https://www.google.bg/search?q=", - "bh": "https://www.google.com.bh/search?q=", - "bi": "https://www.google.bi/search?q=", - "bj": "https://www.google.bj/search?q=", - "bn": "https://www.google.com.bn/search?q=", - "bo": "https://www.google.com.bo/search?q=", - "br": "https://www.google.com.br/search?q=", - "bs": "https://www.google.bs/search?q=", - "bt": "https://www.google.bt/search?q=", - "bw": "https://www.google.co.bw/search?q=", - "by": "https://www.google.by/search?q=", - "bz": "https://www.google.com.bz/search?q=", - "ca": "https://www.google.ca/search?q=", - "kh": "https://www.google.com.kh/search?q=", - "cc": "https://www.google.cc/search?q=", - "cd": "https://www.google.cd/search?q=", - "cf": "https://www.google.cf/search?q=", - "cat": "https://www.google.cat/search?q=", - "cg": "https://www.google.cg/search?q=", - "ch": "https://www.google.ch/search?q=", - "ci": "https://www.google.ci/search?q=", - "ck": "https://www.google.co.ck/search?q=", - "cl": "https://www.google.cl/search?q=", - "cm": "https://www.google.cm/search?q=", - "cn": "https://www.google.cn/search?q=", - "co": "https://www.google.com.co/search?q=", - "cr": "https://www.google.co.cr/search?q=", - "cu": "https://www.google.com.cu/search?q=", - "cv": "https://www.google.cv/search?q=", - "cy": "https://www.google.com.cy/search?q=", - "cz": "https://www.google.cz/search?q=", - "de": "https://www.google.de/search?q=", - "dj": "https://www.google.dj/search?q=", - "dk": "https://www.google.dk/search?q=", - "dm": "https://www.google.dm/search?q=", - "do": "https://www.google.com.do/search?q=", - "dz": "https://www.google.dz/search?q=", - "ec": "https://www.google.com.ec/search?q=", - "ee": "https://www.google.ee/search?q=", - "eg": "https://www.google.com.eg/search?q=", - "es": "https://www.google.es/search?q=", - "et": "https://www.google.com.et/search?q=", - "fi": "https://www.google.fi/search?q=", - "fj": "https://www.google.com.fj/search?q=", - "fm": "https://www.google.fm/search?q=", - "fr": "https://www.google.fr/search?q=", - "ga": "https://www.google.ga/search?q=", - "gb": "https://www.google.co.uk/search?q=", - "ge": "https://www.google.ge/search?q=", - "gf": "https://www.google.gf/search?q=", - "gg": "https://www.google.gg/search?q=", - "gh": "https://www.google.com.gh/search?q=", - "gi": "https://www.google.com.gi/search?q=", - "gl": "https://www.google.gl/search?q=", - "gm": "https://www.google.gm/search?q=", - "gp": "https://www.google.gp/search?q=", - "gr": "https://www.google.gr/search?q=", - "gt": "https://www.google.com.gt/search?q=", - "gy": "https://www.google.gy/search?q=", - "hk": "https://www.google.com.hk/search?q=", - "hn": "https://www.google.hn/search?q=", - "hr": "https://www.google.hr/search?q=", - "ht": "https://www.google.ht/search?q=", - "hu": "https://www.google.hu/search?q=", - "id": "https://www.google.co.id/search?q=", - "iq": "https://www.google.iq/search?q=", - "ie": "https://www.google.ie/search?q=", - "il": "https://www.google.co.il/search?q=", - "im": "https://www.google.im/search?q=", - "in": "https://www.google.co.in/search?q=", - "io": "https://www.google.io/search?q=", - "is": "https://www.google.is/search?q=", - "it": "https://www.google.it/search?q=", - "je": "https://www.google.je/search?q=", - "jm": "https://www.google.com.jm/search?q=", - "jo": "https://www.google.jo/search?q=", - "jp": "https://www.google.co.jp/search?q=", - "ke": "https://www.google.co.ke/search?q=", - "ki": "https://www.google.ki/search?q=", - "kg": "https://www.google.kg/search?q=", - "kr": "https://www.google.co.kr/search?q=", - "kw": "https://www.google.com.kw/search?q=", - "kz": "https://www.google.kz/search?q=", - "la": "https://www.google.la/search?q=", - "lb": "https://www.google.com.lb/search?q=", - "lc": "https://www.google.com.lc/search?q=", - "li": "https://www.google.li/search?q=", - "lk": "https://www.google.lk/search?q=", - "ls": "https://www.google.co.ls/search?q=", - "lt": "https://www.google.lt/search?q=", - "lu": "https://www.google.lu/search?q=", - "lv": "https://www.google.lv/search?q=", - "ly": "https://www.google.com.ly/search?q=", - "ma": "https://www.google.co.ma/search?q=", - "md": "https://www.google.md/search?q=", - "me": "https://www.google.me/search?q=", - "mg": "https://www.google.mg/search?q=", - "mk": "https://www.google.mk/search?q=", - "ml": "https://www.google.ml/search?q=", - "mm": "https://www.google.com.mm/search?q=", - "mn": "https://www.google.mn/search?q=", - "ms": "https://www.google.ms/search?q=", - "mt": "https://www.google.com.mt/search?q=", - "mu": "https://www.google.mu/search?q=", - "mv": "https://www.google.mv/search?q=", - "mw": "https://www.google.mw/search?q=", - "mx": "https://www.google.com.mx/search?q=", - "my": "https://www.google.com.my/search?q=", - "mz": "https://www.google.co.mz/search?q=", - "na": "https://www.google.com.na/search?q=", - "ne": "https://www.google.ne/search?q=", - "nf": "https://www.google.com.nf/search?q=", - "ng": "https://www.google.com.ng/search?q=", - "ni": "https://www.google.com.ni/search?q=", - "nl": "https://www.google.nl/search?q=", - "no": "https://www.google.no/search?q=", - "np": "https://www.google.com.np/search?q=", - "nr": "https://www.google.nr/search?q=", - "nu": "https://www.google.nu/search?q=", - "nz": "https://www.google.co.nz/search?q=", - "om": "https://www.google.com.om/search?q=", - "pa": "https://www.google.com.pa/search?q=", - "pe": "https://www.google.com.pe/search?q=", - "ph": "https://www.google.com.ph/search?q=", - "pk": "https://www.google.com.pk/search?q=", - "pl": "https://www.google.pl/search?q=", - "pg": "https://www.google.com.pg/search?q=", - "pn": "https://www.google.pn/search?q=", - "pr": "https://www.google.com.pr/search?q=", - "ps": "https://www.google.ps/search?q=", - "pt": "https://www.google.pt/search?q=", - "py": "https://www.google.com.py/search?q=", - "qa": "https://www.google.com.qa/search?q=", - "ro": "https://www.google.ro/search?q=", - "rs": "https://www.google.rs/search?q=", - "ru": "https://www.google.ru/search?q=", - "rw": "https://www.google.rw/search?q=", - "sa": "https://www.google.com.sa/search?q=", - "sb": "https://www.google.com.sb/search?q=", - "sc": "https://www.google.sc/search?q=", - "se": "https://www.google.se/search?q=", - "sg": "https://www.google.com.sg/search?q=", - "sh": "https://www.google.sh/search?q=", - "si": "https://www.google.si/search?q=", - "sk": "https://www.google.sk/search?q=", - "sl": "https://www.google.com.sl/search?q=", - "sn": "https://www.google.sn/search?q=", - "sm": "https://www.google.sm/search?q=", - "so": "https://www.google.so/search?q=", - "st": "https://www.google.st/search?q=", - "sv": "https://www.google.com.sv/search?q=", - "td": "https://www.google.td/search?q=", - "tg": "https://www.google.tg/search?q=", - "th": "https://www.google.co.th/search?q=", - "tj": "https://www.google.com.tj/search?q=", - "tk": "https://www.google.tk/search?q=", - "tl": "https://www.google.tl/search?q=", - "tm": "https://www.google.tm/search?q=", - "to": "https://www.google.to/search?q=", - "tn": "https://www.google.tn/search?q=", - "tr": "https://www.google.com.tr/search?q=", - "tt": "https://www.google.tt/search?q=", - "tw": "https://www.google.com.tw/search?q=", - "tz": "https://www.google.co.tz/search?q=", - "ua": "https://www.google.com.ua/search?q=", - "ug": "https://www.google.co.ug/search?q=", - "uk": "https://www.google.co.uk/search?q=", - "uy": "https://www.google.com.uy/search?q=", - "uz": "https://www.google.co.uz/search?q=", - "vc": "https://www.google.com.vc/search?q=", - "ve": "https://www.google.co.ve/search?q=", - "vg": "https://www.google.vg/search?q=", - "vi": "https://www.google.co.vi/search?q=", - "vn": "https://www.google.com.vn/search?q=", - "vu": "https://www.google.vu/search?q=", - "ws": "https://www.google.ws/search?q=", - "za": "https://www.google.co.za/search?q=", - "zm": "https://www.google.co.zm/search?q=", - "zw": "https://www.google.co.zw/search?q=", + "us": "com/search?q=", + "ac": "ac/search?q=", + "ad": "ad/search?q=", + "ae": "ae/search?q=", + "af": "com.af/search?q=", + "ag": "com.ag/search?q=", + "ai": "com.ai/search?q=", + "al": "al/search?q=", + "am": "am/search?q=", + "ao": "co.ao/search?q=", + "ar": "com.ar/search?q=", + "as": "as/search?q=", + "at": "at/search?q=", + "au": "com.au/search?q=", + "az": "az/search?q=", + "ba": "ba/search?q=", + "bd": "com.bd/search?q=", + "be": "be/search?q=", + "bf": "bf/search?q=", + "bg": "bg/search?q=", + "bh": "com.bh/search?q=", + "bi": "bi/search?q=", + "bj": "bj/search?q=", + "bn": "com.bn/search?q=", + "bo": "com.bo/search?q=", + "br": "com.br/search?q=", + "bs": "bs/search?q=", + "bt": "bt/search?q=", + "bw": "co.bw/search?q=", + "by": "by/search?q=", + "bz": "com.bz/search?q=", + "ca": "ca/search?q=", + "kh": "com.kh/search?q=", + "cc": "cc/search?q=", + "cd": "cd/search?q=", + "cf": "cf/search?q=", + "cat": "cat/search?q=", + "cg": "cg/search?q=", + "ch": "ch/search?q=", + "ci": "ci/search?q=", + "ck": "co.ck/search?q=", + "cl": "cl/search?q=", + "cm": "cm/search?q=", + "cn": "cn/search?q=", + "co": "com.co/search?q=", + "cr": "co.cr/search?q=", + "cu": "com.cu/search?q=", + "cv": "cv/search?q=", + "cy": "com.cy/search?q=", + "cz": "cz/search?q=", + "de": "de/search?q=", + "dj": "dj/search?q=", + "dk": "dk/search?q=", + "dm": "dm/search?q=", + "do": "com.do/search?q=", + "dz": "dz/search?q=", + "ec": "com.ec/search?q=", + "ee": "ee/search?q=", + "eg": "com.eg/search?q=", + "es": "es/search?q=", + "et": "com.et/search?q=", + "fi": "fi/search?q=", + "fj": "com.fj/search?q=", + "fm": "fm/search?q=", + "fr": "fr/search?q=", + "ga": "ga/search?q=", + "gb": "co.uk/search?q=", + "ge": "ge/search?q=", + "gf": "gf/search?q=", + "gg": "gg/search?q=", + "gh": "com.gh/search?q=", + "gi": "com.gi/search?q=", + "gl": "gl/search?q=", + "gm": "gm/search?q=", + "gp": "gp/search?q=", + "gr": "gr/search?q=", + "gt": "com.gt/search?q=", + "gy": "gy/search?q=", + "hk": "com.hk/search?q=", + "hn": "hn/search?q=", + "hr": "hr/search?q=", + "ht": "ht/search?q=", + "hu": "hu/search?q=", + "id": "co.id/search?q=", + "iq": "iq/search?q=", + "ie": "ie/search?q=", + "il": "co.il/search?q=", + "im": "im/search?q=", + "in": "co.in/search?q=", + "io": "io/search?q=", + "is": "is/search?q=", + "it": "it/search?q=", + "je": "je/search?q=", + "jm": "com.jm/search?q=", + "jo": "jo/search?q=", + "jp": "co.jp/search?q=", + "ke": "co.ke/search?q=", + "ki": "ki/search?q=", + "kg": "kg/search?q=", + "kr": "co.kr/search?q=", + "kw": "com.kw/search?q=", + "kz": "kz/search?q=", + "la": "la/search?q=", + "lb": "com.lb/search?q=", + "lc": "com.lc/search?q=", + "li": "li/search?q=", + "lk": "lk/search?q=", + "ls": "co.ls/search?q=", + "lt": "lt/search?q=", + "lu": "lu/search?q=", + "lv": "lv/search?q=", + "ly": "com.ly/search?q=", + "ma": "co.ma/search?q=", + "md": "md/search?q=", + "me": "me/search?q=", + "mg": "mg/search?q=", + "mk": "mk/search?q=", + "ml": "ml/search?q=", + "mm": "com.mm/search?q=", + "mn": "mn/search?q=", + "ms": "ms/search?q=", + "mt": "com.mt/search?q=", + "mu": "mu/search?q=", + "mv": "mv/search?q=", + "mw": "mw/search?q=", + "mx": "com.mx/search?q=", + "my": "com.my/search?q=", + "mz": "co.mz/search?q=", + "na": "com.na/search?q=", + "ne": "ne/search?q=", + "nf": "com.nf/search?q=", + "ng": "com.ng/search?q=", + "ni": "com.ni/search?q=", + "nl": "nl/search?q=", + "no": "no/search?q=", + "np": "com.np/search?q=", + "nr": "nr/search?q=", + "nu": "nu/search?q=", + "nz": "co.nz/search?q=", + "om": "com.om/search?q=", + "pa": "com.pa/search?q=", + "pe": "com.pe/search?q=", + "ph": "com.ph/search?q=", + "pk": "com.pk/search?q=", + "pl": "pl/search?q=", + "pg": "com.pg/search?q=", + "pn": "pn/search?q=", + "pr": "com.pr/search?q=", + "ps": "ps/search?q=", + "pt": "pt/search?q=", + "py": "com.py/search?q=", + "qa": "com.qa/search?q=", + "ro": "ro/search?q=", + "rs": "rs/search?q=", + "ru": "ru/search?q=", + "rw": "rw/search?q=", + "sa": "com.sa/search?q=", + "sb": "com.sb/search?q=", + "sc": "sc/search?q=", + "se": "se/search?q=", + "sg": "com.sg/search?q=", + "sh": "sh/search?q=", + "si": "si/search?q=", + "sk": "sk/search?q=", + "sl": "com.sl/search?q=", + "sn": "sn/search?q=", + "sm": "sm/search?q=", + "so": "so/search?q=", + "st": "st/search?q=", + "sv": "com.sv/search?q=", + "td": "td/search?q=", + "tg": "tg/search?q=", + "th": "co.th/search?q=", + "tj": "com.tj/search?q=", + "tk": "tk/search?q=", + "tl": "tl/search?q=", + "tm": "tm/search?q=", + "to": "to/search?q=", + "tn": "tn/search?q=", + "tr": "com.tr/search?q=", + "tt": "tt/search?q=", + "tw": "com.tw/search?q=", + "tz": "co.tz/search?q=", + "ua": "com.ua/search?q=", + "ug": "co.ug/search?q=", + "uk": "co.uk/search?q=", + "uy": "com.uy/search?q=", + "uz": "co.uz/search?q=", + "vc": "com.vc/search?q=", + "ve": "co.ve/search?q=", + "vg": "vg/search?q=", + "vi": "co.vi/search?q=", + "vn": "com.vn/search?q=", + "vu": "vu/search?q=", + "ws": "ws/search?q=", + "za": "co.za/search?q=", + "zm": "co.zm/search?q=", + "zw": "co.zw/search?q=", } // SearchOptions modifies how the Search function behaves. @@ -311,11 +313,10 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re linkHref, _ := sel.Find("a").Attr("href") linkText := strings.TrimSpace(linkHref) - titleText := strings.TrimSpace(sel.Find("div > div > a > h3").Text()) + titleText := strings.TrimSpace(sel.Find("div > div > div > a > h3").Text()) + descText := strings.TrimSpace(sel.Find("div > div > div > div:first-child > span:first-child").Text()) - descText := strings.TrimSpace(sel.Find("div > div > div > span:last-child").Text()) - - if linkText != "" && linkText != "#" { + if linkText != "" && linkText != "#" && titleText != "" { result := Result{ Rank: rank, URL: linkText, @@ -359,6 +360,14 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re return results, nil } +func base(url string) string { + if strings.HasPrefix(url, "http") { + return url + } else { + return stdGoogleBase + url + } +} + func url(searchTerm string, countryCode string, languageCode string, limit int, start int) string { searchTerm = strings.Trim(searchTerm, " ") searchTerm = strings.Replace(searchTerm, " ", "+", -1) @@ -368,15 +377,15 @@ func url(searchTerm string, countryCode string, languageCode string, limit int, if googleBase, found := GoogleDomains[countryCode]; found { if start == 0 { - url = fmt.Sprintf("%s%s&hl=%s", googleBase, searchTerm, languageCode) + url = fmt.Sprintf("%s%s&hl=%s", base(googleBase), searchTerm, languageCode) } else { - url = fmt.Sprintf("%s%s&hl=%s&start=%d", googleBase, searchTerm, languageCode, start) + url = fmt.Sprintf("%s%s&hl=%s&start=%d", base(googleBase), searchTerm, languageCode, start) } } else { if start == 0 { - url = fmt.Sprintf("%s%s&hl=%s", GoogleDomains["us"], searchTerm, languageCode) + url = fmt.Sprintf("%s%s&hl=%s", stdGoogleBase+GoogleDomains["us"], searchTerm, languageCode) } else { - url = fmt.Sprintf("%s%s&hl=%s&start=%d", GoogleDomains["us"], searchTerm, languageCode, start) + url = fmt.Sprintf("%s%s&hl=%s&start=%d", stdGoogleBase+GoogleDomains["us"], searchTerm, languageCode, start) } }