diff --git a/README.md b/README.md index 657a93a..b035c37 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,12 @@ Quickly scrape Google Search Results. package main import ( - "context" "fmt" "github.com/rocketlaunchr/google-search" ) func main() { - ctx := context.Background() - fmt.Println(googlesearch.Search(ctx, "cars for sale in Toronto, Canada")) + fmt.Println(googlesearch.Search(nil, "cars for sale in Toronto, Canada")) } ``` @@ -53,14 +51,44 @@ func main() { } ``` -## Warning +## :warning: Warning The implementation relies on Google's search page DOM being constant. From time to time, Google changes their DOM and thus breaks the implementation. In the event it changes, this package will be updated as soon as possible. -Also note, that if you call this function too quickly, Google detects that it is being scraped and produces a [recaptcha](https://www.google.com/recaptcha/intro/v3.html) which interferes with the scraping. **Don't call it in quick succession.** +Also note, that if you call this function too quickly, Google detects that it is being scraped and produces a [recaptcha](https://www.google.com/recaptcha/intro/v3.html) which interferes with the scraping. **Don't call it in quick succession. It may take some time before Google unlocks you.** +You can try the built-in [rate-limiter](https://godoc.org/github.com/rocketlaunchr/google-search#RateLimit). + +
+ Further Details + + + +
+
+
+

+
+ +
+ About this page

+ + Our systems have detected unusual traffic from your computer network. This page checks to see if it's really you sending the requests, and not a robot. Why did this happen?

+ + + + IP address: xxx.xx.xxx.xx
Time: 2021-01-13T05:27:34Z
URL: https://www.google.com/search?q=Hello+World&hl=en&num=20
+
+
+
+
+
+
+
## Credits @@ -71,6 +99,7 @@ Special thanks to [Edmund Martin](https://edmundmartin.com/scraping-google-with- Other useful packages ------------ +- [awesome-svelte](https://github.com/rocketlaunchr/awesome-svelte) - Resources for killing react - [dataframe-go](https://github.com/rocketlaunchr/dataframe-go) - Statistics and data manipulation - [dbq](https://github.com/rocketlaunchr/dbq) - Zero boilerplate database operations for Go - [electron-alert](https://github.com/rocketlaunchr/electron-alert) - SweetAlert2 for Electron Applications @@ -78,3 +107,4 @@ Other useful packages - [mysql-go](https://github.com/rocketlaunchr/mysql-go) - Properly cancel slow MySQL queries - [react](https://github.com/rocketlaunchr/react) - Build front end applications using Go - [remember-go](https://github.com/rocketlaunchr/remember-go) - Cache slow database queries +- [testing-go](https://github.com/rocketlaunchr/testing-go) - Testing framework for unit testing diff --git a/go.mod b/go.mod index d20431f..64e39ba 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module github.com/rocketlaunchr/google-search go 1.12 -require github.com/gocolly/colly/v2 v2.0.1 +require ( + github.com/gocolly/colly/v2 v2.0.1 + golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 +) diff --git a/limit.go b/limit.go new file mode 100644 index 0000000..f91ad5e --- /dev/null +++ b/limit.go @@ -0,0 +1,10 @@ +package googlesearch + +import "golang.org/x/time/rate" + +// RateLimit sets a global limit to how many requests to Google Search can be made in a given time interval. +// The default is unlimited (but obviously Google Search will block you temporarily if you do too many +// calls too quickly). +// +// See: https://godoc.org/golang.org/x/time/rate#NewLimiter +var RateLimit = rate.NewLimiter(rate.Inf, 0) diff --git a/screenshot.png b/screenshot.png index 3bd40cc..ae3f620 100644 Binary files a/screenshot.png and b/screenshot.png differ diff --git a/search.go b/search.go index 04cfcc3..8f37b45 100644 --- a/search.go +++ b/search.go @@ -256,6 +256,13 @@ type SearchOptions struct { // Search returns a list of search results from Google. func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Result, error) { + if ctx == nil { + ctx = context.Background() + } + + if err := RateLimit.Wait(ctx); err != nil { + return nil, err + } c := colly.NewCollector(colly.MaxDepth(1)) if len(opts) == 0 {