# -*- coding: utf-8 -*- import requests from pyquery import PyQuery as pq headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36' } #获取首页,以求内容页url def Index(): url='http://mil.news.sina.com.cn/roll/index.d.html?cid=57918' #decode()解码 response=requests.get(url,headers=headers).content.decode() #print(response) #数据初始化 listURL=pq(response) #id选择器, linkNews=listURL('.linkNews li a').items() #print(linkNews) for i in linkNews: #print(i) #通过属性取数据attr() #print(i.attr('href')) details=i.attr('href') Detail(details) def Detail(urlDetail): #urlDetail='https://mil.news.sina.com.cn/china/2020-03-19/doc-iimxyqwa1556368.shtml' response=requests.get(urlDetail,headers=headers).content.decode() contentAll=pq(response) content=contentAll('.article p').text() print(content) if __name__=='__main__': Index()