# -*- coding: utf-8 -*-
"""
第1页,就是0,方便合起来写。每页的url找其规律
有一部电影没有,用三目运算符处理
列表换个位置
"""
import requests
from bs4 import BeautifulSoup
import openpyxl
headers={
'user-agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36'
}
lists=[['编号','名称','推荐','评分','链接']]
for page in range(10):
url='https://movie.douban.com/top250?start={}&filter='.format(page*25)
print(url)
response=requests.get(url,headers=headers)
bs=BeautifulSoup(response.text,'html.parser')#解析器html.parser
# title=bs.find('span',class_='title')
# 有多个相同的,故用findAll
grid_view=bs.find('ol',class_='grid_view')#所有ol中
all_li=grid_view.findAll('li')
for i in all_li:
number=i.find('em').text
title=i.find('span',class_='title').text
tuijian=i.find('span',class_='inq')
scorce=i.find('span',class_='rating_num').text
url=i.find('a')['href']
lists.append([number,title,tuijian.text if tuijian !=None else '',scorce,url])
'''
for ii in lists:
print(ii)
'''
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='我喜欢的电影'
for item in lists:
sheet.append(item)
wb.save('films.xlsx')