# -*- coding: utf-8 -*- from lxml import etree import requests #from urllib.parse import urljoin url='https://www.qqxsnew.com/43/43475/' response=requests.get(url) dom=etree.HTML(response.text) nodes=dom.xpath('//dl/dd[position()>12]') #print(nodes) url_list=[] for i in nodes: dic={} dic['title']=i.xpath('./a/text()')[0] #dic['url']=urljoin(url,i.xpath('./a/@href')[0]) dic['url']='https://www.qqxsnew.com'+i.xpath('./a/@href')[0] url_list.append(dic) print(url_list) for dic in url_list: content=requests.get(dic['url']).text con=etree.HTML(content).xpath('//div[@id="content"]/text()') print(con) for c in con: with open(dic["title"]+'.txt','a',encoding='utf-8') as f: f.write(c+'\n')
python应用xpath爬小说网站并保存本地
阅读:3911 输入:2020-03-28 21:29:31