from selenium import webdriver import time option = webdriver.ChromeOptions() option.add_argument('headless') driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option) url='https://www.letpub.com.cn/index.php?journalid=10&page=journalapp&view=detail#tonglytougjy' for i in range(1,223000): url='https://www.letpub.com.cn/index.php?journalid={}&page=journalapp&view=detail#tonglytougjy'.format(i) time.sleep(20) driver.get(url) page_src = driver.page_source time.sleep(30) print(str(i)+': strlen'+str(len(page_src))+'\n') driver.quit()
selenium采集,综合应用反爬
阅读:292 输入:2024-08-04 20:05:35