- a few cosmetic changes
This commit is contained in:
		
							
								
								
									
										29
									
								
								search.go
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								search.go
									
									
									
									
									
								
							| @@ -5,14 +5,13 @@ package googlesearch | |||||||
| import ( | import ( | ||||||
| 	"context" | 	"context" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"strings" | 	"net/url" | ||||||
| 	"strconv" | 	"strconv" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
| 	"github.com/gocolly/colly/v2" | 	"github.com/gocolly/colly/v2" | ||||||
| 	"github.com/gocolly/colly/v2/proxy" | 	"github.com/gocolly/colly/v2/proxy" | ||||||
| 	"github.com/gocolly/colly/v2/queue" | 	"github.com/gocolly/colly/v2/queue" | ||||||
|  |  | ||||||
| 	"net/url" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // Result represents a single result from Google Search. | // Result represents a single result from Google Search. | ||||||
| @@ -265,8 +264,8 @@ type SearchOptions struct { | |||||||
| 	// ProxyAddr sets a proxy address to avoid IP blocking. | 	// ProxyAddr sets a proxy address to avoid IP blocking. | ||||||
| 	ProxyAddr string | 	ProxyAddr string | ||||||
|  |  | ||||||
| 	// follow links | 	// FollowNextPage, when set, scrapes subsequent result pages. | ||||||
| 	FollowLinks bool  | 	FollowNextPage bool | ||||||
| } | } | ||||||
|  |  | ||||||
| // Search returns a list of search results from Google. | // Search returns a list of search results from Google. | ||||||
| @@ -297,10 +296,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 		lc = opts[0].LanguageCode | 		lc = opts[0].LanguageCode | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	q, _ := queue.New( | 	q, _ := queue.New(1, &queue.InMemoryQueueStorage{MaxSize: 10000}) | ||||||
| 		2, |  | ||||||
| 		&queue.InMemoryQueueStorage{MaxSize: 10000}, |  | ||||||
| 	) |  | ||||||
|  |  | ||||||
| 	limit := opts[0].Limit | 	limit := opts[0].Limit | ||||||
| 	if opts[0].OverLimit { | 	if opts[0].OverLimit { | ||||||
| @@ -310,6 +306,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 	results := []Result{} | 	results := []Result{} | ||||||
| 	nextPageLink := "" | 	nextPageLink := "" | ||||||
| 	var rErr error | 	var rErr error | ||||||
|  | 	filteredRank := 1 | ||||||
| 	rank := 1 | 	rank := 1 | ||||||
|  |  | ||||||
| 	c.OnRequest(func(r *colly.Request) { | 	c.OnRequest(func(r *colly.Request) { | ||||||
| @@ -318,7 +315,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 			rErr = err | 			rErr = err | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
| 		if opts[0].FollowLinks == true && nextPageLink != "" { | 		if opts[0].FollowNextPage && nextPageLink != "" { | ||||||
| 			req, err := r.New("GET", nextPageLink, nil) | 			req, err := r.New("GET", nextPageLink, nil) | ||||||
| 			if err == nil { | 			if err == nil { | ||||||
| 				q.AddRequest(req) | 				q.AddRequest(req) | ||||||
| @@ -340,15 +337,16 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 		titleText := strings.TrimSpace(sel.Find("div > div > div > a > h3").Text()) | 		titleText := strings.TrimSpace(sel.Find("div > div > div > a > h3").Text()) | ||||||
| 		descText := strings.TrimSpace(sel.Find("div > div > div > div:first-child > span:first-child").Text()) | 		descText := strings.TrimSpace(sel.Find("div > div > div > div:first-child > span:first-child").Text()) | ||||||
|  |  | ||||||
|  | 		rank += 1 | ||||||
| 		if linkText != "" && linkText != "#" && titleText != "" { | 		if linkText != "" && linkText != "#" && titleText != "" { | ||||||
| 			result := Result{ | 			result := Result{ | ||||||
| 				Rank:        rank, | 				Rank:        filteredRank, | ||||||
| 				URL:         linkText, | 				URL:         linkText, | ||||||
| 				Title:       titleText, | 				Title:       titleText, | ||||||
| 				Description: descText, | 				Description: descText, | ||||||
| 			} | 			} | ||||||
| 			results = append(results, result) | 			results = append(results, result) | ||||||
| 			rank += 1 | 			filteredRank += 1 | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// check if there is a next button at the end. | 		// check if there is a next button at the end. | ||||||
| @@ -364,8 +362,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
|  |  | ||||||
| 		// check if there is a next button at the end. | 		// check if there is a next button at the end. | ||||||
| 		// Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same | 		// Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same | ||||||
| 		nextPageHref, exists := sel.Attr("href") | 		if nextPageHref, exists := sel.Attr("href"); exists { | ||||||
| 		if exists == true { |  | ||||||
| 			start := getStart(strings.TrimSpace(nextPageHref)) | 			start := getStart(strings.TrimSpace(nextPageHref)) | ||||||
| 			nextPageLink = buildUrl(searchTerm, opts[0].CountryCode, lc, limit, start) | 			nextPageLink = buildUrl(searchTerm, opts[0].CountryCode, lc, limit, start) | ||||||
| 			q.AddURL(nextPageLink) | 			q.AddURL(nextPageLink) | ||||||
| @@ -373,7 +370,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 			nextPageLink = "" | 			nextPageLink = "" | ||||||
| 		} | 		} | ||||||
| 	}) | 	}) | ||||||
| 	 |  | ||||||
| 	url := buildUrl(searchTerm, opts[0].CountryCode, lc, limit, opts[0].Start) | 	url := buildUrl(searchTerm, opts[0].CountryCode, lc, limit, opts[0].Start) | ||||||
|  |  | ||||||
| 	if opts[0].ProxyAddr != "" { | 	if opts[0].ProxyAddr != "" { | ||||||
| @@ -398,7 +395,7 @@ func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Re | |||||||
| 	if opts[0].Limit != 0 && len(results) > opts[0].Limit { | 	if opts[0].Limit != 0 && len(results) > opts[0].Limit { | ||||||
| 		return results[:opts[0].Limit], nil | 		return results[:opts[0].Limit], nil | ||||||
| 	} | 	} | ||||||
| 	 |  | ||||||
| 	return results, nil | 	return results, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user