# -*- coding: utf-8 -*-
"""
起始未登录有反爬,爬失败。策略:重新登录,并在headers中加入cookie,成功了。
"""
import requests
from lxml import etree
headers={
        'Cookie':'your long cookie',
        #请求类型
        'Origin':'https://www.tianyancha.com',
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4068.4 Safari/537.36'
        }
url='https://www.tianyancha.com/search?key=%E4%BA%AC%E4%B8%9C';
#查看请求headers,response=requests.get(url).request.headers是个爬虫
response=requests.get(url,headers=headers).text
html=etree.HTML(response)
#//从根节点,//div表示全选所有div
name=html.xpath('//div[@class="header"]/a/text()')
#print(name)
nickName=html.xpath('//div[@class="title -wider text-ellipsis"]/a/text()')
#print(nickName)
money=html.xpath('//div[@class="title -narrow text-ellipsis"]/span/text()')
#print(money)
#拼在一起
for n,N,m in zip(name,nickName,money):
    #print(n,N,m),文件写入方式,文件存在就写入,不存在就创建。要用a
    r=open('天眼查数据.txt','a')
    r.write('{},{},{}\n'.format(n,N,m))
    r.close()