主页 M

python应用BeautifulSoup爬世界网址并生成文本

2021-03-27 网页编程网 网页编程网
import requests
from bs4 import BeautifulSoup
import smtplib
import time
def weather(url):
    response=requests.get(url)
    response.encoding='gb18030'
    bs=BeautifulSoup(response.text,'html.parser')
    if len(response.text) < 200:
        return 'null'
    else:
        siteName=bs.find(class_='name_r r')
        siteNameHref=siteName.find('a')
        print(siteName)
        #siteURL=siteNameHref.get("href")
        siteURL=bs.find(class_='url_r r').get_text()
        lists=bs.find(class_='leibie_r r')
        #print(lists.text)
        desc=bs.find(class_='jianjie_r r')
        str=lists.text.strip();
        l1=''
        l2=''        
        if len(str)>4:
            listr=str.split(' >> ')
            l1=listr[0]
            l2=listr[1]
            urltemp=url.split("=")
            with open('afs6.txt','a',encoding='utf-8-sig') as f:
                f.write('insert into table_temp (number,site,siteHref,list1,list2,desc) values ("{}","{}","{}","{}","{}","{}");'.format(urltemp[1],siteNameHref.text,siteURL,l1,l2,desc.find('p').get_text()))

if __name__ =='__main__': 
    for i in range(24280,90000):#24956
        time.sleep(5)
        url='http://www.w*o-r+l*d-6*8.com/show.asp?id={}'.format(i)
        #url='http://www.w*o+r-l*d+6-8.com/show.asp?id=10376'
        print(url)
        weather(url)
阅读原文
阅读 3672
123 显示电脑版