# -*- coding: utf-8 -*- """ 第1页,就是0,方便合起来写。每页的url找其规律 有一部电影没有,用三目运算符处理 列表换个位置 """ import requests from bs4 import BeautifulSoup import openpyxl headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36' } lists=[['编号','名称','推荐','评分','链接']] for page in range(10): url='https://movie.douban.com/top250?start={}&filter='.format(page*25) print(url) response=requests.get(url,headers=headers) bs=BeautifulSoup(response.text,'html.parser')#解析器html.parser # title=bs.find('span',class_='title') # 有多个相同的,故用findAll grid_view=bs.find('ol',class_='grid_view')#所有ol中 all_li=grid_view.findAll('li') for i in all_li: number=i.find('em').text title=i.find('span',class_='title').text tuijian=i.find('span',class_='inq') scorce=i.find('span',class_='rating_num').text url=i.find('a')['href'] lists.append([number,title,tuijian.text if tuijian !=None else '',scorce,url]) ''' for ii in lists: print(ii) ''' wb=openpyxl.Workbook() sheet=wb.active sheet.title='我喜欢的电影' for item in lists: sheet.append(item) wb.save('films.xlsx')