主页 M

selenium采集,综合应用反爬

2024-08-04 网页编程网 网页编程网
from selenium import webdriver
import time
import re


option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option)

url='https://www.letpub.com.cn/index.php?journalid=10&page=journalapp&view=detail#tonglytougjy'


title=''
short=''
ISSN=''
EISSN=''
tds=''

for i in range(1,2):
    url='https://www.letpub.com.cn/index.php?journalid={}&page=journalapp&view=detail#tonglytougjy'.format(i)
    time.sleep(2)
    driver.get(url)
    page_src = driver.page_source
    #print(page_src)
    title=re.findall(r'

(.*?)期刊基本',page_src)     short=re.findall(r'(.*?)',page_src)     ISSN=re.findall(r'期刊ISSN(.*?)(.*?)',page_src)         #tds+=driver.find_element_by_class_name('table_yjfx').text     print(title)     print(short,ISSN,EISSN)     time.sleep(3)     print(str(i)+': strlen'+str(len(page_src))+' ')     driver.quit()

阅读原文
阅读 1021
123 显示电脑版