- add rate limiting
- update readme
This commit is contained in:
		
							
								
								
									
										40
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								README.md
									
									
									
									
									
								
							| @@ -17,14 +17,12 @@ Quickly scrape Google Search Results. | ||||
| package main | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"github.com/rocketlaunchr/google-search" | ||||
| ) | ||||
|  | ||||
| func main() { | ||||
| 	ctx := context.Background() | ||||
| 	fmt.Println(googlesearch.Search(ctx, "cars for sale in Toronto, Canada")) | ||||
| 	fmt.Println(googlesearch.Search(nil, "cars for sale in Toronto, Canada")) | ||||
| } | ||||
| ``` | ||||
|  | ||||
| @@ -53,14 +51,44 @@ func main() { | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Warning | ||||
| ## :warning: Warning | ||||
|  | ||||
| The implementation relies on Google's search page DOM being constant. From time to time, Google changes their DOM and thus breaks the implementation. | ||||
|  | ||||
| In the event it changes, this package will be updated as soon as possible. | ||||
|  | ||||
| Also note, that if you call this function too quickly, Google detects that it is being scraped and produces a [recaptcha](https://www.google.com/recaptcha/intro/v3.html) which interferes with the scraping. **Don't call it in quick succession.** | ||||
| Also note, that if you call this function too quickly, Google detects that it is being scraped and produces a [recaptcha](https://www.google.com/recaptcha/intro/v3.html) which interferes with the scraping. **Don't call it in quick succession. It may take some time before Google unlocks you.** | ||||
|  | ||||
| You can try the built-in [rate-limiter](https://godoc.org/github.com/rocketlaunchr/google-search#RateLimit). | ||||
|  | ||||
| <details> | ||||
|   <summary>Further Details</summary> | ||||
|    | ||||
| <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg"> | ||||
| <foreignObject width="100" height="100"> | ||||
|     <div xmlns="http://www.w3.org/1999/xhtml"> | ||||
|     <div style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();}"> | ||||
|     <div style="max-width:400px;"> | ||||
|     <hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br> | ||||
|     <hr noshade size="1" style="color:#ccc; background-color:#ccc;"> | ||||
|  | ||||
|     <div style="font-size:13px;"> | ||||
|     <b>About this page</b><br><br> | ||||
|  | ||||
|     Our systems have detected unusual traffic from your computer network.  This page checks to see if it's really you sending the requests, and not a robot.  <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br> | ||||
|  | ||||
|     <div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;"> | ||||
|     This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop.  In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests.  If you share your network connection, ask your administrator for help — a different computer using the same IP address may be responsible.  <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly. | ||||
|     </div> | ||||
|  | ||||
|     IP address: xxx.xx.xxx.xx<br>Time: 2021-01-13T05:27:34Z<br>URL: https://www.google.com/search?q=Hello+World&hl=en&num=20<br> | ||||
|     </div> | ||||
|     </div> | ||||
|     </div> | ||||
|     </div> | ||||
| </foreignObject> | ||||
| </svg> | ||||
| </details> | ||||
|  | ||||
|  | ||||
| ## Credits | ||||
| @@ -71,6 +99,7 @@ Special thanks to [Edmund Martin](https://edmundmartin.com/scraping-google-with- | ||||
| Other useful packages | ||||
| ------------ | ||||
|  | ||||
| - [awesome-svelte](https://github.com/rocketlaunchr/awesome-svelte) - Resources for killing react | ||||
| - [dataframe-go](https://github.com/rocketlaunchr/dataframe-go) - Statistics and data manipulation | ||||
| - [dbq](https://github.com/rocketlaunchr/dbq) - Zero boilerplate database operations for Go | ||||
| - [electron-alert](https://github.com/rocketlaunchr/electron-alert) - SweetAlert2 for Electron Applications | ||||
| @@ -78,3 +107,4 @@ Other useful packages | ||||
| - [mysql-go](https://github.com/rocketlaunchr/mysql-go) - Properly cancel slow MySQL queries | ||||
| - [react](https://github.com/rocketlaunchr/react) - Build front end applications using Go | ||||
| - [remember-go](https://github.com/rocketlaunchr/remember-go) - Cache slow database queries | ||||
| - [testing-go](https://github.com/rocketlaunchr/testing-go) - Testing framework for unit testing | ||||
|   | ||||
							
								
								
									
										5
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								go.mod
									
									
									
									
									
								
							| @@ -2,4 +2,7 @@ module github.com/rocketlaunchr/google-search | ||||
|  | ||||
| go 1.12 | ||||
|  | ||||
| require github.com/gocolly/colly/v2 v2.0.1 | ||||
| require ( | ||||
| 	github.com/gocolly/colly/v2 v2.0.1 | ||||
| 	golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 | ||||
| ) | ||||
|   | ||||
							
								
								
									
										10
									
								
								limit.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								limit.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| package googlesearch | ||||
|  | ||||
| import "golang.org/x/time/rate" | ||||
|  | ||||
| // RateLimit sets a global limit to how many requests to Google Search can be made in a given time interval. | ||||
| // The default is unlimited (but obviously Google Search will block you temporarily if you do too many | ||||
| // calls too quickly). | ||||
| // | ||||
| // See: https://godoc.org/golang.org/x/time/rate#NewLimiter | ||||
| var RateLimit = rate.NewLimiter(rate.Inf, 0) | ||||
							
								
								
									
										
											BIN
										
									
								
								screenshot.png
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								screenshot.png
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 21 KiB | 
| @@ -256,6 +256,13 @@ type SearchOptions struct { | ||||
|  | ||||
| // Search returns a list of search results from Google. | ||||
| func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Result, error) { | ||||
| 	if ctx == nil { | ||||
| 		ctx = context.Background() | ||||
| 	} | ||||
|  | ||||
| 	if err := RateLimit.Wait(ctx); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	c := colly.NewCollector(colly.MaxDepth(1)) | ||||
| 	if len(opts) == 0 { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user