# -*- coding: utf-8 -*-
"""
爬虫
获取url
发送请求,第三方库
检测出爬虫,要登录
"""
import requests
from lxml import etree
import csv
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36',
'Referer':'https://maoyan.com/board/4?offset=10'
}
def parse_page(res):
tree=etree.HTML(res)
#排行榜
top=tree.xpath('//dd/i/text()')
#电影名称
movie=tree.xpath('//p[@class="name"]/a/text()')
#主演
performer=tree.xpath('//p[@class="star"]/text()')
performer=[i.strip() for i in performer]
#上映时间
releasetime=tree.xpath('//p[@class="releasetime"]/text()')
#封面,单独取链接
movie_img=tree.xpath('//img[@class="board-img"]/@data-src')
#print(movie_img)
#数据打包
results=zip(top,movie,performer,releasetime,movie_img)
for i in results:
with open('file.csv','a') as f:
wr=csv.writer(f)
wr.writerow(i)
for i in range(0,10):
url='https://maoyan.com/board/4?offset={}'.format(i*10)
response=requests.get(url,headers=headers)
#print(response.text)
parse_page(response.text)python应用xpath爬虫猫眼电影排行并保存成excle
阅读:4358 输入:2020-03-13 21:26:32