# -*- coding: utf-8 -*-
import requests
from lxml import etree
def get_data(data):
abc=etree.HTML(data)
div_list=abc.xpath('//div[@class="info clear"]')
for div in div_list:
#点是当前节点,/从根html开始取值与//是从任意取值
title=div.xpath('./div/a/text()')[0]
positioninfo=div.xpath('.//div[@class="totalPrice"]/span/text()')[0]
price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
#w是追加方式,一直写
with open('你是一只狗.csv','a',encoding= "utf-8-sig") as f:
f.write("{},{},{}".format(title,positioninfo,price))
num=int(input("页数"))
for i in range(0,num):
url='https://cs.lianjia.com/ershoufang/pg'+str(i)+'/'
html_data=requests.get(url)
get_data(html_data.text)python应用xpath实现爬链家网数据并保存
阅读:3793 输入:2020-03-16 16:06:07