主页 M

selenium采集,综合应用反爬

2024-08-04 网页编程网 网页编程网
from selenium import webdriver
import time



option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option)

url='https://www.letpub.com.cn/index.php?journalid=10&page=journalapp&view=detail#tonglytougjy'

for i in range(1,223000):
	url='https://www.letpub.com.cn/index.php?journalid={}&page=journalapp&view=detail#tonglytougjy'.format(i)
	time.sleep(20)
	driver.get(url)
	page_src = driver.page_source
	time.sleep(30)
	print(str(i)+': strlen'+str(len(page_src))+'\n')
	driver.quit()

阅读原文
阅读 412
123 显示电脑版