# -*- coding: utf-8 -*- import requests from lxml import etree import csv headers={ 'Referer':'https://image.baidu.com/search/index?', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36' } url='http://bj.ganji.com/zufang/' response=requests.get(url,headers=headers) #返回二进制形式,图片视频时用response.content html=etree.HTML(response.text) house_list=html.xpath('//div[@class="f-list js-tips-list"]/div')[1:] with open('godd.csv','a',encoding='utf-8') as f: csv_writer=csv.writer(f,delimiter=',') for house in house_list: house_title=house.xpath('.//dd[@class="dd-item title"]/a/text()')[0] house_url=house.xpath('.//dd[@class="dd-item title"]/a/@href')[0] if not house_url.startswith('http'): house_url='http:'+house_url house_addr=house.xpath('.//span[@class="area"]/a[2]/span/text()')[0] house_price = house.xpath('//div[@class="price"]/span[1]/text()')[0] csv_writer.writerow([house_title,house_addr,house_price,house_url])
python用xpath爬赶集网租房数据并保存成csv
阅读:3546 输入:2020-05-27 15:57:04
- 上一篇:纯js实现幻灯,学习幻灯原理
- 下一篇:css3实现炫酷侧边栏,fixed应用