批量从天眼查采集公司的邮箱,用于广告营销。
天眼查有反爬,尽量模拟人工查询。具体反爬技术分析:
反爬技术 | 开启与否 |
---|---|
IP | No |
user Agent | No |
cookie | No |
访问时间间隔 | Yes |
from selenium import webdriver import time from selenium.webdriver.chrome.options import Options import re option = webdriver.ChromeOptions() #option.add_argument('--headless') option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('useAutomationExtension', False) option.add_argument("--disable-gpu") # option.add_argument("blink-settings=imagesEnabled=false") # option.add_argument('--no-sandbox') # #option.add_argument('--hide-scrollbars') # driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option) url='https://www.tianyancha.com/search?key=%E4%BF%9D%E7%A8%8E%E7%89%A9%E6%B5%81&sessionNo=1723362304.11208604&base=sd&cacheCode=00370000V2020' driver.get(url) page_src = driver.page_source #print(page_src) allEmaill=re.findall(r'<span class="label">邮箱:</span><span>(.*?)</span>',page_src) #print(allEmaill) for email in allEmaill: print(email) #driver.quit()