Skip to content

Commit 0a3c07b

Browse files
committed
Collects google ads/answers + captcha solver test
1 parent 482f725 commit 0a3c07b

File tree

13 files changed

+283
-92
lines changed

13 files changed

+283
-92
lines changed

README.md

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ docker-compose up --build
3030
| file | File extension to search (e.g. `PDF`, `DOC`) |
3131
| site | Search within a specific website |
3232
| limit | Limit the number of results
33+
| answers | Include google answers as negative rank indexes (e.g. `true`, `false`)
3334

3435
### **Search**
3536
### *Example request*
@@ -47,27 +48,18 @@ You can replace `google` to `yandex` or `baidu` in query to change search engine
4748
"rank": 1,
4849
"url": "https://en.wikipedia.org/wiki/%22Hello,_World!%22_program",
4950
"title": "\"Hello, World!\" program",
50-
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\"."
51+
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\".",
52+
"ad": false
5153
},
5254
]
5355
```
54-
### **Images**
56+
### **Images** **[WIP]**
5557
### *Example request*
5658
Get 100 **Google** results for `golden puppy`:
5759
```
5860
GET http://127.0.0.1:7000/google/image?text=golden puppy&limit=100
5961
```
60-
### *Example response*
61-
```JSON
62-
[
63-
{
64-
"rank": 1,
65-
"url": "https://en.wikipedia.org/wiki/%22Hello,_World!%22_program",
66-
"title": "\"Hello, World!\" program",
67-
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\"."
68-
},
69-
]
70-
```
62+
7163

7264
## CLI <a name="cli"></a> ⌨️
7365
* Use `-h` flag to see commands.
@@ -86,7 +78,8 @@ As a result you should get JSON output containting search results:
8678
"rank": 1,
8779
"url": "https://www.cyberoptik.net/blog/6-sure-fire-ways-to-get-banned-from-google/",
8880
"title": "11 Sure-Fire Ways to Get Banned From Google | CyberOptik",
89-
"description": "How To Get Banned From Google · 1. Cloaking: The Art of Deception · 2. Plagiarism: Because Originality is Overrated · 3. Keyword Stuffing: More is Always Better · 4 ..."
81+
"description": "How To Get Banned From Google · 1. Cloaking: The Art of Deception · 2. Plagiarism: Because Originality is Overrated · 3. Keyword Stuffing: More is Always Better · 4 ...",
82+
"ad": false
9083
},
9184
]
9285
```

cmd/root.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package cmd
22

33
import (
4-
"errors"
54
"fmt"
65
"strings"
76

@@ -13,16 +12,21 @@ import (
1312
)
1413

1514
const (
16-
version = "0.2.1"
15+
version = "0.3"
1716
defaultConfigFilename = "config"
1817
envPrefix = "OPENSERP"
1918
)
2019

2120
type Config struct {
22-
App AppConfig `mapstructure:"app"`
23-
GoogleConfig core.SearchEngineOptions `mapstructure:"google"`
24-
YandexConfig core.SearchEngineOptions `mapstructure:"yandex"`
25-
BaiduConfig core.SearchEngineOptions `mapstructure:"baidu"`
21+
App AppConfig `mapstructure:"app"`
22+
Config2Capcha Config2Captcha `mapstructure:"2captcha"`
23+
GoogleConfig core.SearchEngineOptions `mapstructure:"google"`
24+
YandexConfig core.SearchEngineOptions `mapstructure:"yandex"`
25+
BaiduConfig core.SearchEngineOptions `mapstructure:"baidu"`
26+
}
27+
28+
type Config2Captcha struct {
29+
ApiKey string `mapstructure:"apikey"`
2630
}
2731

2832
type AppConfig struct {
@@ -88,7 +92,7 @@ func initializeConfig(cmd *cobra.Command) error {
8892
// 1. Config. Return an error if we cannot parse the config file.
8993
err := v.ReadInConfig()
9094
if err != nil {
91-
err = errors.New(fmt.Sprintf("Cannot read config: %v", err))
95+
err = fmt.Errorf("cannot read config: %v", err)
9296
logrus.Warn(err)
9397
}
9498

@@ -107,7 +111,7 @@ func initializeConfig(cmd *cobra.Command) error {
107111
// Dump Viper values to config struct
108112
err = v.Unmarshal(&config)
109113
if err != nil {
110-
return errors.New(fmt.Sprintf("Cannot unmarshall config: %v", err))
114+
return fmt.Errorf("cannot unmarshall config: %v", err)
111115
}
112116

113117
if config.App.IsDebug {
@@ -128,4 +132,5 @@ func init() {
128132
RootCmd.PersistentFlags().BoolVarP(&config.App.IsLeakless, "leakless", "l", false, "Use leakless mode to insure browser instances are closed after search")
129133
RootCmd.PersistentFlags().BoolVarP(&config.App.IsRawRequests, "raw", "r", false, "Disable browser usage, use HTTP requests")
130134
RootCmd.PersistentFlags().BoolVarP(&config.App.IsLeaveHead, "leave", "", false, "Leave browser and tabs opened after search is made")
135+
RootCmd.PersistentFlags().StringVarP(&config.Config2Capcha.ApiKey, "2captcha_key", "", "", "2 captcha api key")
131136
}

cmd/search.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ func searchBrowser(engineType string, query core.Query) ([]core.SearchResult, er
5454
var engine core.SearchEngine
5555

5656
opts := core.BrowserOpts{
57-
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
58-
IsLeakless: config.App.IsLeakless,
59-
Timeout: time.Second * time.Duration(config.App.Timeout),
60-
LeavePageOpen: config.App.IsLeaveHead,
57+
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
58+
IsLeakless: config.App.IsLeakless,
59+
Timeout: time.Second * time.Duration(config.App.Timeout),
60+
LeavePageOpen: config.App.IsLeaveHead,
61+
CaptchaSolverApiKey: config.Config2Capcha.ApiKey,
6162
}
6263

6364
if config.App.IsDebug {

cmd/serve.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ var serveCMD = &cobra.Command{
2121

2222
func serve(cmd *cobra.Command, args []string) {
2323
opts := core.BrowserOpts{
24-
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
25-
IsLeakless: config.App.IsLeakless,
26-
Timeout: time.Second * time.Duration(config.App.Timeout),
27-
LeavePageOpen: config.App.IsLeaveHead,
24+
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
25+
IsLeakless: config.App.IsLeakless,
26+
Timeout: time.Second * time.Duration(config.App.Timeout),
27+
LeavePageOpen: config.App.IsLeaveHead,
28+
CaptchaSolverApiKey: config.Config2Capcha.ApiKey,
2829
}
2930

3031
if config.App.IsDebug {

config.yaml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,20 @@ app:
66
timeout: 15
77
head: false
88
leakless: false
9+
leave_head: false
10+
11+
2captcha:
12+
apikey: "123123123123123"
913

1014
google:
11-
rate_requests: 4 # Number of requests per Minute
12-
rate_burst: 2 # Number of non-ratelimited requests per Minute
15+
rate_requests: 4 # Number of requests per Minute
16+
rate_burst: 2 # Number of non-ratelimited requests per Minute
17+
captcha: true
1318

1419
yandex:
15-
rate_requests: 4
16-
rate_burst: 2
20+
rate_requests: 4
21+
rate_burst: 2
1722

1823
baidu:
19-
rate_requests: 4
20-
rate_burst: 2
24+
rate_requests: 4
25+
rate_burst: 2

core/captcha.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package core
2+
3+
import (
4+
api2captcha "github.com/2captcha/2captcha-go"
5+
)
6+
7+
type CaptchaSolver struct {
8+
client *api2captcha.Client
9+
}
10+
11+
func NewSolver(apikey string) *CaptchaSolver {
12+
cs := CaptchaSolver{}
13+
cs.client = api2captcha.NewClient(apikey)
14+
return &cs
15+
}
16+
17+
func (cs *CaptchaSolver) SolveReCaptcha2(sitekey, pageUrl, dataS string) (string, error) {
18+
cap := api2captcha.ReCaptcha{
19+
SiteKey: sitekey,
20+
Url: pageUrl,
21+
DataS: dataS,
22+
Invisible: false,
23+
Action: "verify",
24+
}
25+
req := cap.ToRequest()
26+
req.SetProxy("HTTPS", "login:password@IP_address:PORT")
27+
return cs.client.Solve(req)
28+
}

core/captcha_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package core
2+
3+
import (
4+
"testing"
5+
)
6+
7+
var (
8+
API_KEY = ""
9+
)
10+
11+
func Test2Captcha(t *testing.T) {
12+
solver := NewSolver(API_KEY)
13+
sitekey := "6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b"
14+
url := "https://www.google.com/sorry/index?continue=https://www.google.de/search%3Fhl%3DDE%26lr%3Dlang_de%26nfpr%3D1%26num%3D500%26pws%3D0%26q%3Dwhere%2Bwhy%2Beach&hl=DE&q=EgRegw55GObHiq4GIjDqmzFKayGXrS2-s9ooWfcskhpK8-6tIjWSaSvhxd3f5eAyUXj7lYq2DYLDXB8ASz0yAXJaAUM"
15+
datas := "Ghk0n7ZQNDS0c7ES53eef_YBfSdfeXnyRD0p2OR0R4Dg91CUXKS_hio5Do6TpJ8sHhhOat_NymTASZGe1gqAjP7w9dSvhvRT7QXsrdziO3JPngLDSRzDdjT42GDcSbO0kzInlDPxe1yy2t4yifo9xHpMnlZU7pTVNTQUIXqOMLHAR-iERi6aoSQDQ4d-88-jW3LEinquxEut0OhHG2l2stwG9AnCmNvCsUNJda-H24saFlOh5csK9KNXeeQmpr6at52_skMIMiLXSlY56vYFVCRMkXLQdAM"
16+
resp, err := solver.SolveReCaptcha2(sitekey, url, datas)
17+
if err != nil || resp == "" {
18+
t.Fatalf("Failed to solve recaptchaV2: %s", err)
19+
}
20+
}

core/common.go

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ import (
99
"github.com/gofiber/fiber/v2"
1010
)
1111

12-
var ErrCaptcha = errors.New("Captcha detected")
13-
var ErrSearchTimeout = errors.New("Timeout. Cannot find element on page")
12+
var ErrCaptcha = errors.New("captcha detected")
13+
var ErrSearchTimeout = errors.New("timeout. Cannot find element on page")
1414

1515
type SearchResult struct {
1616
Rank int `json:"rank"`
1717
URL string `json:"url"`
1818
Title string `json:"title"`
1919
Description string `json:"description"`
20+
Ad bool `json:"ad"`
2021
}
2122

2223
func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult {
@@ -39,6 +40,7 @@ type Query struct {
3940
Filetype string // File extension to search.
4041
Site string // Search site
4142
Limit int // Limit the number of results
43+
Answers bool // Include question and answers from SERP page to results with negative indexes
4244
}
4345

4446
func (q Query) IsEmpty() bool {
@@ -48,23 +50,27 @@ func (q Query) IsEmpty() bool {
4850
return false
4951
}
5052

51-
func (q *Query) InitFromContext(c *fiber.Ctx) error {
52-
q.Text = c.Query("text")
53-
q.LangCode = c.Query("lang")
54-
q.DateInterval = c.Query("date")
55-
q.Filetype = c.Query("file")
56-
q.Site = c.Query("site")
53+
func (searchQuery *Query) InitFromContext(reqCtx *fiber.Ctx) error {
54+
searchQuery.Text = reqCtx.Query("text")
55+
searchQuery.LangCode = reqCtx.Query("lang")
56+
searchQuery.DateInterval = reqCtx.Query("date")
57+
searchQuery.Filetype = reqCtx.Query("file")
58+
searchQuery.Site = reqCtx.Query("site")
5759

58-
limit, err := strconv.Atoi(c.Query("limit", "25"))
60+
limit, err := strconv.Atoi(reqCtx.Query("limit", "25"))
5961
if err != nil {
6062
return err
6163
}
62-
q.Limit = limit
64+
searchQuery.Limit = limit
6365

64-
if q.IsEmpty() {
65-
return errors.New("Query cannot be empty")
66+
searchQuery.Answers, err = strconv.ParseBool(reqCtx.Query("answers", "0"))
67+
if err != nil {
68+
return err
6669
}
6770

71+
if searchQuery.IsEmpty() {
72+
return errors.New("Query cannot be empty")
73+
}
6874
return nil
6975
}
7076

@@ -73,6 +79,7 @@ type SearchEngineOptions struct {
7379
RateTime int64 `mapstructure:"rate_seconds"`
7480
RateBurst int `mapstructure:"rate_burst"`
7581
SelectorTimeout int64 `mapstructure:"selector_timeout"` // CSS selector timeout in seconds
82+
IsSolveCaptcha bool `mapstructure:"captcha"`
7683
}
7784

7885
func (o *SearchEngineOptions) Init() {

core/server.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package core
22

33
import (
44
"context"
5-
"errors"
65
"fmt"
76
"strings"
87

@@ -52,9 +51,9 @@ func NewServer(host string, port int, searchEngines ...SearchEngine) *Server {
5251
if err != nil {
5352
switch err {
5453
case ErrCaptcha:
55-
err = errors.New(fmt.Sprintf("Captcha found, please stop sending requests for a while\n%s", err))
54+
err = fmt.Errorf("captcha found, please stop sending requests for a while\n%s", err)
5655
case ErrSearchTimeout:
57-
err = errors.New(fmt.Sprintf("%s", err))
56+
err = fmt.Errorf("%s", err)
5857
}
5958

6059
logrus.Errorf("Error during %s search: %s", locEngine.Name(), err)
@@ -87,9 +86,9 @@ func NewServer(host string, port int, searchEngines ...SearchEngine) *Server {
8786
if err != nil {
8887
switch err {
8988
case ErrCaptcha:
90-
err = errors.New(fmt.Sprintf("Captcha found, please stop sending requests for a while\n%s", err))
89+
err = fmt.Errorf("captcha found, please stop sending requests for a while: %s", err)
9190
case ErrSearchTimeout:
92-
err = errors.New(fmt.Sprintf("%s", err))
91+
err = fmt.Errorf("%s", err)
9392
}
9493

9594
logrus.Errorf("Error during %s search: %s", locEngine.Name(), err)

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module github.com/karust/openserp
33
go 1.20
44

55
require (
6+
github.com/2captcha/2captcha-go v1.1.4
67
github.com/PuerkitoBio/goquery v1.8.1
78
github.com/corpix/uarand v0.2.0
89
github.com/go-rod/rod v0.113.3

0 commit comments

Comments
 (0)