# -*- coding: utf-8 -*- import requests import parsel base_url='http://www.ip3366.net/' headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36" } proxies_list=[] response=requests.get(url=base_url,headers=headers) response.encoding=response.apparent_encoding#自动识别 html_data=response.text #print(html_data) parse_data=parsel.Selector(html_data) tr_list=parse_data.xpath('//table[@class="table table-bordered table-striped"]/tbody/tr') for tr in tr_list: http_type=tr.xpath('./td[4]/text()').get() ip=tr.xpath('./td[1]/text()').get() port=tr.xpath('./td[2]/text()').get() #print(http_type,ip,port) proxies_dict={} proxies_dict[http_type]=ip+':'+port proxies_list.append(proxies_dict) print(proxies_list)
python应用parsel xpath采集构建代理ip池
阅读:3936 输入:2020-06-29 21:12:16
- 上一篇:带内容提示幻灯轮播图
- 下一篇:css3设计伪元素实现吃豆豆动画特效