import requests
from bs4 import BeautifulSoup
import smtplib
import time
def weather(url):
response=requests.get(url)
response.encoding='gb18030'
bs=BeautifulSoup(response.text,'html.parser')
if len(response.text) < 200:
return 'null'
else:
siteName=bs.find(class_='name_r r')
siteNameHref=siteName.find('a')
print(siteName)
#siteURL=siteNameHref.get("href")
siteURL=bs.find(class_='url_r r').get_text()
lists=bs.find(class_='leibie_r r')
#print(lists.text)
desc=bs.find(class_='jianjie_r r')
str=lists.text.strip();
l1=''
l2=''
if len(str)>4:
listr=str.split(' >> ')
l1=listr[0]
l2=listr[1]
urltemp=url.split("=")
with open('afs6.txt','a',encoding='utf-8-sig') as f:
f.write('insert into table_temp (number,site,siteHref,list1,list2,desc) values ("{}","{}","{}","{}","{}","{}");'.format(urltemp[1],siteNameHref.text,siteURL,l1,l2,desc.find('p').get_text()))
if __name__ =='__main__':
for i in range(24280,90000):#24956
time.sleep(5)
url='http://www.w*o-r+l*d-6*8.com/show.asp?id={}'.format(i)
#url='http://www.w*o+r-l*d+6-8.com/show.asp?id=10376'
print(url)
weather(url)python应用BeautifulSoup爬世界网址并生成文本
阅读:4255 输入:2021-03-27 21:15:33