主页 M

python应用xpath爬小说网站并保存本地

2020-03-28 网页编程网 网页编程网
# -*- coding: utf-8 -*-
from lxml import etree
import requests
#from urllib.parse import urljoin
url='https://www.qqxsnew.com/43/43475/'

response=requests.get(url)
dom=etree.HTML(response.text)
nodes=dom.xpath('//dl/dd[position()>12]')
#print(nodes)
url_list=[]
for i in nodes:
    dic={}
    dic['title']=i.xpath('./a/text()')[0]
    #dic['url']=urljoin(url,i.xpath('./a/@href')[0])
    dic['url']='https://www.qqxsnew.com'+i.xpath('./a/@href')[0]
    url_list.append(dic)
    print(url_list)
    
for dic in url_list:
    content=requests.get(dic['url']).text
    con=etree.HTML(content).xpath('//div[@id="content"]/text()')
    print(con)
    for c in con:
        with open(dic["title"]+'.txt','a',encoding='utf-8') as f:
            f.write(c+'\n')
        
阅读原文
阅读 3912
123 显示电脑版