# -*- coding: utf-8 -*- import requests from lxml import etree def get_data(data): abc=etree.HTML(data) div_list=abc.xpath('//div[@class="info clear"]') for div in div_list: #点是当前节点,/从根html开始取值与//是从任意取值 title=div.xpath('./div/a/text()')[0] positioninfo=div.xpath('.//div[@class="totalPrice"]/span/text()')[0] price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0] #w是追加方式,一直写 with open('你是一只狗.csv','a',encoding= "utf-8-sig") as f: f.write("{},{},{}".format(title,positioninfo,price)) num=int(input("页数")) for i in range(0,num): url='https://cs.lianjia.com/ershoufang/pg'+str(i)+'/' html_data=requests.get(url) get_data(html_data.text)
python应用xpath实现爬链家网数据并保存
阅读:3240 输入:2020-03-16 16:06:07