# -*- coding: utf-8 -*-
import requests
from pyquery import PyQuery as pq
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36'
}
#获取首页,以求内容页url
def Index():
url='http://mil.news.sina.com.cn/roll/index.d.html?cid=57918'
#decode()解码
response=requests.get(url,headers=headers).content.decode()
#print(response)
#数据初始化
listURL=pq(response)
#id选择器,
linkNews=listURL('.linkNews li a').items()
#print(linkNews)
for i in linkNews:
#print(i)
#通过属性取数据attr()
#print(i.attr('href'))
details=i.attr('href')
Detail(details)
def Detail(urlDetail):
#urlDetail='https://mil.news.sina.com.cn/china/2020-03-19/doc-iimxyqwa1556368.shtml'
response=requests.get(urlDetail,headers=headers).content.decode()
contentAll=pq(response)
content=contentAll('.article p').text()
print(content)
if __name__=='__main__':
Index()