1.序

批量从天眼查采集公司的邮箱,用于广告营销。

2.分析

天眼查有反爬,尽量模拟人工查询。具体反爬技术分析:

反爬技术开启与否
IPNo
user AgentNo
cookieNo
访问时间间隔Yes

3.源码实现

from selenium import webdriver

import time
from selenium.webdriver.chrome.options import Options
import re
option = webdriver.ChromeOptions()
#option.add_argument('--headless')
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)

option.add_argument("--disable-gpu") #
option.add_argument("blink-settings=imagesEnabled=false") #
option.add_argument('--no-sandbox') #
#option.add_argument('--hide-scrollbars') #
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option)

url='https://www.tianyancha.com/search?key=%E4%BF%9D%E7%A8%8E%E7%89%A9%E6%B5%81&sessionNo=1723362304.11208604&base=sd&cacheCode=00370000V2020'
driver.get(url)
page_src = driver.page_source
#print(page_src)
allEmaill=re.findall(r'<span class="label">邮箱:</span><span>(.*?)</span>',page_src)
#print(allEmaill)
for email in allEmaill:
  print(email)

#driver.quit()