# -*- coding: utf-8 -*- """ 爬虫 获取url 发送请求,第三方库 检测出爬虫,要登录 """ import requests from lxml import etree import csv headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36', 'Referer':'https://maoyan.com/board/4?offset=10' } def parse_page(res): tree=etree.HTML(res) #排行榜 top=tree.xpath('//dd/i/text()') #电影名称 movie=tree.xpath('//p[@class="name"]/a/text()') #主演 performer=tree.xpath('//p[@class="star"]/text()') performer=[i.strip() for i in performer] #上映时间 releasetime=tree.xpath('//p[@class="releasetime"]/text()') #封面,单独取链接 movie_img=tree.xpath('//img[@class="board-img"]/@data-src') #print(movie_img) #数据打包 results=zip(top,movie,performer,releasetime,movie_img) for i in results: with open('file.csv','a') as f: wr=csv.writer(f) wr.writerow(i) for i in range(0,10): url='https://maoyan.com/board/4?offset={}'.format(i*10) response=requests.get(url,headers=headers) #print(response.text) parse_page(response.text)