Added support to follow to next page
This commit is contained in:
		
							
								
								
									
										68
									
								
								search.go
									
									
									
									
									
								
							
							
						
						
									
										68
									
								
								search.go
									
									
									
									
									
								
							@@ -6,9 +6,13 @@ import (
 | 
				
			|||||||
	"context"
 | 
						"context"
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
 | 
						"strconv"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"github.com/gocolly/colly/v2"
 | 
						"github.com/gocolly/colly/v2"
 | 
				
			||||||
	"github.com/gocolly/colly/v2/proxy"
 | 
						"github.com/gocolly/colly/v2/proxy"
 | 
				
			||||||
 | 
						"github.com/gocolly/colly/v2/queue"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"net/url"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Result represents a single result from Google Search.
 | 
					// Result represents a single result from Google Search.
 | 
				
			||||||
@@ -260,6 +264,9 @@ type SearchOptions struct {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	// ProxyAddr sets a proxy address to avoid IP blocking.
 | 
						// ProxyAddr sets a proxy address to avoid IP blocking.
 | 
				
			||||||
	ProxyAddr string
 | 
						ProxyAddr string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// follow links
 | 
				
			||||||
 | 
						FollowLinks bool 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Search returns a list of search results from Google.
 | 
					// Search returns a list of search results from Google.
 | 
				
			||||||
@@ -290,7 +297,18 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re
 | 
				
			|||||||
		lc = opts[0].LanguageCode
 | 
							lc = opts[0].LanguageCode
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						q, _ := queue.New(
 | 
				
			||||||
 | 
							2,
 | 
				
			||||||
 | 
							&queue.InMemoryQueueStorage{MaxSize: 10000},
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						limit := opts[0].Limit
 | 
				
			||||||
 | 
						if opts[0].OverLimit {
 | 
				
			||||||
 | 
							limit = int(float64(opts[0].Limit) * 1.5)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	results := []Result{}
 | 
						results := []Result{}
 | 
				
			||||||
 | 
						nextPageLink := ""
 | 
				
			||||||
	var rErr error
 | 
						var rErr error
 | 
				
			||||||
	rank := 1
 | 
						rank := 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -300,6 +318,12 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re
 | 
				
			|||||||
			rErr = err
 | 
								rErr = err
 | 
				
			||||||
			return
 | 
								return
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							if opts[0].FollowLinks == true && nextPageLink != "" {
 | 
				
			||||||
 | 
								req, err := r.New("GET", nextPageLink, nil)
 | 
				
			||||||
 | 
								if err == nil {
 | 
				
			||||||
 | 
									q.AddRequest(req)
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	c.OnError(func(r *colly.Response, err error) {
 | 
						c.OnError(func(r *colly.Response, err error) {
 | 
				
			||||||
@@ -326,14 +350,31 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re
 | 
				
			|||||||
			results = append(results, result)
 | 
								results = append(results, result)
 | 
				
			||||||
			rank += 1
 | 
								rank += 1
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// check if there is a next button at the end.
 | 
				
			||||||
 | 
							// Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same
 | 
				
			||||||
 | 
							nextPageHref, _ := sel.Find("a #pnnext").Attr("href")
 | 
				
			||||||
 | 
							nextPageLink = strings.TrimSpace(nextPageHref)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	limit := opts[0].Limit
 | 
						c.OnHTML("div.g", func(e *colly.HTMLElement) {
 | 
				
			||||||
	if opts[0].OverLimit {
 | 
					 | 
				
			||||||
		limit = int(float64(opts[0].Limit) * 1.5)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	url := url(searchTerm, opts[0].CountryCode, lc, limit, opts[0].Start)
 | 
							sel := e.DOM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// check if there is a next button at the end.
 | 
				
			||||||
 | 
							// Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same
 | 
				
			||||||
 | 
							nextPageHref, exists := sel.Attr("href")
 | 
				
			||||||
 | 
							if exists == true {
 | 
				
			||||||
 | 
								start := getStart(strings.TrimSpace(nextPageHref))
 | 
				
			||||||
 | 
								nextPageLink = buildUrl(searchTerm, opts[0].CountryCode, lc, limit, start)
 | 
				
			||||||
 | 
								q.AddURL(nextPageLink)
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								nextPageLink = ""
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						})
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						url := buildUrl(searchTerm, opts[0].CountryCode, lc, limit, opts[0].Start)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if opts[0].ProxyAddr != "" {
 | 
						if opts[0].ProxyAddr != "" {
 | 
				
			||||||
		rp, err := proxy.RoundRobinProxySwitcher(opts[0].ProxyAddr)
 | 
							rp, err := proxy.RoundRobinProxySwitcher(opts[0].ProxyAddr)
 | 
				
			||||||
@@ -343,7 +384,8 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re
 | 
				
			|||||||
		c.SetProxyFunc(rp)
 | 
							c.SetProxyFunc(rp)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	c.Visit(url)
 | 
						q.AddURL(url)
 | 
				
			||||||
 | 
						q.Run(c)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if rErr != nil {
 | 
						if rErr != nil {
 | 
				
			||||||
		if strings.Contains(rErr.Error(), "Too Many Requests") {
 | 
							if strings.Contains(rErr.Error(), "Too Many Requests") {
 | 
				
			||||||
@@ -360,6 +402,18 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re
 | 
				
			|||||||
	return results, nil
 | 
						return results, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func getStart(uri string) int {
 | 
				
			||||||
 | 
						u, err := url.Parse(uri)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							fmt.Println(err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						q := u.Query()
 | 
				
			||||||
 | 
						ss := q.Get("start")
 | 
				
			||||||
 | 
						si, _ := strconv.Atoi(ss)
 | 
				
			||||||
 | 
						return si
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func base(url string) string {
 | 
					func base(url string) string {
 | 
				
			||||||
	if strings.HasPrefix(url, "http") {
 | 
						if strings.HasPrefix(url, "http") {
 | 
				
			||||||
		return url
 | 
							return url
 | 
				
			||||||
@@ -368,7 +422,7 @@ func base(url string) string {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func url(searchTerm string, countryCode string, languageCode string, limit int, start int) string {
 | 
					func buildUrl(searchTerm string, countryCode string, languageCode string, limit int, start int) string {
 | 
				
			||||||
	searchTerm = strings.Trim(searchTerm, " ")
 | 
						searchTerm = strings.Trim(searchTerm, " ")
 | 
				
			||||||
	searchTerm = strings.Replace(searchTerm, " ", "+", -1)
 | 
						searchTerm = strings.Replace(searchTerm, " ", "+", -1)
 | 
				
			||||||
	countryCode = strings.ToLower(countryCode)
 | 
						countryCode = strings.ToLower(countryCode)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user