主页 M

python应用xpath爬虫猫眼电影排行并保存成excle

2020-03-13 网页编程网 网页编程网
# -*- coding: utf-8 -*-
"""
爬虫
获取url
发送请求,第三方库
检测出爬虫,要登录
"""
import requests
from lxml import etree
import csv

headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36',
        'Referer':'https://maoyan.com/board/4?offset=10'
        }

def parse_page(res):
    tree=etree.HTML(res)
    #排行榜
    top=tree.xpath('//dd/i/text()')
    #电影名称
    movie=tree.xpath('//p[@class="name"]/a/text()')
    #主演
    performer=tree.xpath('//p[@class="star"]/text()')
    performer=[i.strip() for i in performer]
    #上映时间
    releasetime=tree.xpath('//p[@class="releasetime"]/text()')
    #封面,单独取链接
    movie_img=tree.xpath('//img[@class="board-img"]/@data-src')
    #print(movie_img)
    #数据打包
    results=zip(top,movie,performer,releasetime,movie_img)

    for i in results:
        with open('file.csv','a') as f:
            wr=csv.writer(f)
            wr.writerow(i)
            
for i in range(0,10):
    url='https://maoyan.com/board/4?offset={}'.format(i*10)
        
    response=requests.get(url,headers=headers)
    #print(response.text)
    parse_page(response.text)
阅读原文
阅读 3530
123 显示电脑版