From 83476c787ccab52ac00f39d5fe6e6b70a666cb9b Mon Sep 17 00:00:00 2001 From: pigfaces Date: Thu, 7 Jul 2022 11:45:26 +0800 Subject: [PATCH] chore(readme): rm incognito-context flag & chrome path flag not need required --- README.md | 1 - README_zh-cn.md | 1 - cmd/crawlergo/flag.go | 1 - pkg/engine/browser.go | 8 ++++++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5fc4f5e..7b41223 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,6 @@ https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-te * `--form-keyword-values, -fkv` Customize the value of the form fill, set by keyword fuzzy match. The keyword matches the four attribute values of `id`, `name`, `class`, `type` of the input box label. For example, fuzzy match the pass keyword to fill 123456 and the user keyword to fill admin, `-fkv user=admin -fkv pass=123456`. (Default: Cralwergo) ### Advanced settings for the crawling process -* `--incognito-context, -i` Browser start incognito mode. (Default: true) * `--max-tab-count Number, -t Number` The maximum number of tabs the crawler can open at the same time. (Default: 8) * `--tab-run-timeout Timeout` Maximum runtime for a single tab page. (Default: 20s) * `--wait-dom-content-loaded-timeout Timeout` The maximum timeout to wait for the page to finish loading. (Default: 5s) diff --git a/README_zh-cn.md b/README_zh-cn.md index a503095..f8196d0 100644 --- a/README_zh-cn.md +++ b/README_zh-cn.md @@ -114,7 +114,6 @@ crawlergo 拥有灵活的参数配置,以下是详细的选项说明: * `--filter-mode Mode, -f Mode` 过滤模式,简单:只过滤静态资源和完全重复的请求。智能:拥有过滤伪静态的能力。严格:更加严格的伪静态过滤规则。 * `--output-mode value, -o value` 结果输出模式,`console`:打印当前域名结果。`json`:打印所有结果的json序列化字符串,可直接被反序列化解析。`none`:不打印输出。 * `--output-json filepath` 将爬虫结果JSON序列化之后写入到json文件。 -* `--incognito-context, -i` 浏览器启动隐身模式 * `--max-tab-count Number, -t Number` 爬虫同时开启最大标签页,即同时爬取的页面数量。 * `--fuzz-path` 使用常见路径Fuzz目标,获取更多入口。 * `--fuzz-path-dict` 通过字典文件自定义Fuzz目录,传入字典文件路径,如:`/home/user/fuzz_dir.txt`,文件每行代表一个要fuzz的目录。 diff --git a/cmd/crawlergo/flag.go b/cmd/crawlergo/flag.go index 39afc61..bf01f8e 100644 --- a/cmd/crawlergo/flag.go +++ b/cmd/crawlergo/flag.go @@ -40,7 +40,6 @@ func SetChromePath() *cli.PathFlag { Name: "chromium-path", Aliases: []string{"c"}, Usage: "`Path` of chromium executable. Such as \"/home/test/chrome-linux/chrome\"", - Required: true, Destination: &taskConfig.ChromiumPath, EnvVars: []string{"CRAWLERGO_CHROMIUM_PATH"}, } diff --git a/pkg/engine/browser.go b/pkg/engine/browser.go index a16abe5..d8703ae 100755 --- a/pkg/engine/browser.go +++ b/pkg/engine/browser.go @@ -25,8 +25,6 @@ func InitBrowser(chromiumPath string, extraHeaders map[string]interface{}, proxy var bro Browser opts := append(chromedp.DefaultExecAllocatorOptions[:], - // 执行路径 - chromedp.ExecPath(chromiumPath), // 无头模式 chromedp.Flag("headless", !noHeadless), // https://github.com/chromedp/chromedp/issues/997#issuecomment-1030596050 @@ -59,6 +57,12 @@ func InitBrowser(chromiumPath string, extraHeaders map[string]interface{}, proxy opts = append(opts, chromedp.ProxyServer(proxy)) } + if len(chromiumPath) > 0 { + + // 指定执行路径 + opts = append(opts, chromedp.ExecPath(chromiumPath)) + } + allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) bctx, _ := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf),