主页 M

python应用parsel xpath采集构建代理ip池

2020-06-29 网页编程网 网页编程网
# -*- coding: utf-8 -*-
import requests
import parsel
base_url='http://www.ip3366.net/'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36"
}
proxies_list=[]
response=requests.get(url=base_url,headers=headers)
response.encoding=response.apparent_encoding#自动识别
html_data=response.text
#print(html_data)

parse_data=parsel.Selector(html_data)
tr_list=parse_data.xpath('//table[@class="table table-bordered table-striped"]/tbody/tr')

for tr in tr_list:
    http_type=tr.xpath('./td[4]/text()').get()
    ip=tr.xpath('./td[1]/text()').get()
    port=tr.xpath('./td[2]/text()').get()
    #print(http_type,ip,port)
    proxies_dict={}
    proxies_dict[http_type]=ip+':'+port
    proxies_list.append(proxies_dict)
     
print(proxies_list)
阅读原文
阅读 3941
123 显示电脑版