import requests from bs4 import BeautifulSoup import smtplib import time def weather(url): response=requests.get(url) response.encoding='gb18030' bs=BeautifulSoup(response.text,'html.parser') if len(response.text) < 200: return 'null' else: siteName=bs.find(class_='name_r r') siteNameHref=siteName.find('a') print(siteName) #siteURL=siteNameHref.get("href") siteURL=bs.find(class_='url_r r').get_text() lists=bs.find(class_='leibie_r r') #print(lists.text) desc=bs.find(class_='jianjie_r r') str=lists.text.strip(); l1='' l2='' if len(str)>4: listr=str.split(' >> ') l1=listr[0] l2=listr[1] urltemp=url.split("=") with open('afs6.txt','a',encoding='utf-8-sig') as f: f.write('insert into table_temp (number,site,siteHref,list1,list2,desc) values ("{}","{}","{}","{}","{}","{}");'.format(urltemp[1],siteNameHref.text,siteURL,l1,l2,desc.find('p').get_text())) if __name__ =='__main__': for i in range(24280,90000):#24956 time.sleep(5) url='http://www.w*o-r+l*d-6*8.com/show.asp?id={}'.format(i) #url='http://www.w*o+r-l*d+6-8.com/show.asp?id=10376' print(url) weather(url)
python应用BeautifulSoup爬世界网址并生成文本
阅读:3668 输入:2021-03-27 21:15:33