# -*- coding: utf-8 -*-
from lxml import etree
import requests
#from urllib.parse import urljoin
url='https://www.qqxsnew.com/43/43475/'
response=requests.get(url)
dom=etree.HTML(response.text)
nodes=dom.xpath('//dl/dd[position()>12]')
#print(nodes)
url_list=[]
for i in nodes:
dic={}
dic['title']=i.xpath('./a/text()')[0]
#dic['url']=urljoin(url,i.xpath('./a/@href')[0])
dic['url']='https://www.qqxsnew.com'+i.xpath('./a/@href')[0]
url_list.append(dic)
print(url_list)
for dic in url_list:
content=requests.get(dic['url']).text
con=etree.HTML(content).xpath('//div[@id="content"]/text()')
print(con)
for c in con:
with open(dic["title"]+'.txt','a',encoding='utf-8') as f:
f.write(c+'\n')