主页 M

python应用xpath实现爬链家网数据并保存

2020-03-16 网页编程网 网页编程网
# -*- coding: utf-8 -*-
import requests
from lxml import etree

def get_data(data):
    abc=etree.HTML(data)
    div_list=abc.xpath('//div[@class="info clear"]')
    for div in div_list:
        #点是当前节点,/从根html开始取值与//是从任意取值
        title=div.xpath('./div/a/text()')[0]
        positioninfo=div.xpath('.//div[@class="totalPrice"]/span/text()')[0]
        price=div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
        
        #w是追加方式,一直写

        with open('你是一只狗.csv','a',encoding= "utf-8-sig") as f:
            f.write("{},{},{}".format(title,positioninfo,price))

num=int(input("页数"))
for i in range(0,num):
    url='https://cs.lianjia.com/ershoufang/pg'+str(i)+'/'
    html_data=requests.get(url)
    get_data(html_data.text)
阅读原文
阅读 3242
123 显示电脑版