# -*- coding: utf-8 -*- import requests from pyquery import PyQuery as pq headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36' } #获取首页,以求内容页url def Index(): url='http://mil.news.sina.com.cn/roll/index.d.html?cid=57918' #decode()解码 response=requests.get(url,headers=headers).content.decode() #print(response) #数据初始化 listURL=pq(response) #id选择器, linkNews=listURL('.linkNews li a').items() #print(linkNews) for i in linkNews: #print(i) #通过属性取数据attr() #print(i.attr('href')) details=i.attr('href') Detail(details) def Detail(urlDetail): #urlDetail='https://mil.news.sina.com.cn/china/2020-03-19/doc-iimxyqwa1556368.shtml' response=requests.get(urlDetail,headers=headers).content.decode() contentAll=pq(response) content=contentAll('.article p').text() print(content) if __name__=='__main__': Index()
python应用pyquery采集新浪列表页内容页,需数据解码
阅读:3203 输入:2020-03-19 05:28:40
- 上一篇:java枚举与可变参数
- 下一篇:[gkk]传智-适配器设计模式,如同电源适配器