主页 M

python面向对象xpath爬起点中文网

2020-03-30 网页编程网 网页编程网
# -*- coding: utf-8 -*-
import requests
from lxml import etree#抽取数据,只写了爬前页,章节页没写
'''oop例子
class Pig:#大写
    def eat(self):
      print('Hello World')
           
t=Pig()
t.eat()
'''
class Spider(object):
    def start_request(self):
        response=requests.get('https://www.qidian.com/all')
        html=response.content.decode()
        dom=etree.HTML(html)

        #标题xpath
        Bigtit_list=dom.xpath('//div[@class="book-mid-info"]/h4/a/text()')
        #print(Bigtit_list)
        Bighref_list=dom.xpath('//div[@class="book-mid-info"]/h4/a/@href')
        #print(Bighref_list)  
        #代码管理 一一对应zip
        for Bigtit_list,Bighref_list in zip (Bigtit_list,Bighref_list):
            print(Bigtit_list,Bighref_list)

    def start_chapter(self,url):
        response=requests.get(url)
        html=response.content.decode()
        dom=etree.HTML(html)
        print(dom)
        
      
spider=Spider()
spider.start_request()
#spider.start_chapter('https://book.qidian.com/info/1009480992')
阅读原文
阅读 3583
123 显示电脑版