# -*- coding: utf-8 -*- import requests from lxml import etree from fake_useragent import UserAgent import time class kitchen(object): u = 0 def __init__(self): self.url = "https://www.xiachufang.com/category/40076/" ua = UserAgent(verify_ssl=False) for i in range(1, 50): self.headers = { 'User-Agent': ua.random, } '''发送请求 获取响应''' def get_page(self, url): res = requests.get(url=url, headers=self.headers) html = res.content.decode("utf-8") time.sleep(2) return html def parse_page(self, html): parse_html = etree.HTML(html) image_src_list = parse_html.xpath('//li/div/a/@href') for i in image_src_list: try: url = "https://www.xiachufang.com" + i # print(url) html1 = self.get_page(url) # 第二个发生请求 parse_html1 = etree.HTML(html1) # print(parse_html1) num = parse_html1.xpath('.//h2[@id="steps"]/text()')[0].strip() name = parse_html1.xpath('.//li[@class="container"]/p/text()') ingredients = parse_html1.xpath('.//td//a/text()') self.u += 1 # print(self.u) # print(str(self.u)+"."+house_dict["名 称 :"]+":") # da=tuple(house_dict["材 料:"]) food_info = ''' 第 %s 种 菜 名 : %s 原 料 : %s 下 载 链 接 : %s, ====== ''' % (str(self.u), num, ingredients, url) # print(food_info) f = open('下厨房菜谱.txt', 'a', encoding='utf-8') f.write(str(food_info)) print(str(food_info)) f.close() except: print('xpath没获取到内容!') def main(self): startPage = int(input("起始页:")) endPage = int(input("终止页:")) for page in range(startPage, endPage + 1): url = self.url.format(page) html = self.get_page(url) self.parse_page(html) time.sleep(2.4) print("=第 %s 页爬取成功=" % page) if __name__ == '__main__': imageSpider = kitchen() imageSpider.main()