from selenium import webdriver
import time
import re
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option)
url='https://www.letpub.com.cn/index.php?journalid=10&page=journalapp&view=detail#tonglytougjy'
title=''
short=''
ISSN=''
EISSN=''
tds=''
for i in range(1,2):
url='https://www.letpub.com.cn/index.php?journalid={}&page=journalapp&view=detail#tonglytougjy'.format(i)
time.sleep(2)
driver.get(url)
page_src = driver.page_source
#print(page_src)
title=re.findall(r'(.*?)期刊基本',page_src)
short=re.findall(r'(.*?)',page_src)
ISSN=re.findall(r'期刊ISSN(.*?) | (.*?)',page_src)
#tds+=driver.find_element_by_class_name('table_yjfx').text
print(title)
print(short,ISSN,EISSN)
time.sleep(3)
print(str(i)+': strlen'+str(len(page_src))+'
')
driver.quit()
|