# -*- coding: utf-8 -*-
import requests
import urllib.request#下载方法
import re
from importlib import reload
import sys
reload(sys)#设编码,删除也可
url_name=[]
#新建一个函数,获取数据
def get():
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36'
}
url='http://www.budejie.com/video/'
html=requests.get(url,headers=headers).text
#print(html)
#r是保留字符串原有的意思
url_content=re.compile(r'(<div class="j-r-list-c">.*?</div>.*?</div>)',re.S)
contents=re.findall(url_content,html)
for i in contents:
url_reg=r'data-mp4="(.*")'#视频地址
url_it=re.findall(url_reg,i)
if url_it:#有些视频看不了,不存在的
name_reg=re.compile(r'<a href="/detail-.{8}.html">(.*?)</a>',re.S)
name_it=re.findall(name_reg,i)
#print(name_it)
for i,k in zip(name_it,url_it):#zip将对应元素打包成元组
url_name.append([i,k])
for i in url_name:#i[0]=name i[1]=url
urllib.request.urlretrieve(i[1],r'video\%s.mp4' %str(i[0]))
print('down...'+i[0])
if __name__ =='__main__':
get()