AllCity.txt
{ "北京": ["bj|北京"], "天津": ["tj|天津"], "上海": ["sh|上海"], "台湾": ["tw|台湾"], "香港": ["hk|香港"], "澳门": ["am|澳门"], "河北": ["bd|保定", "cangzhou|沧州", "chengde|承德", "dingzhou|定州", "gt|馆陶", "hd|邯郸", "hs|衡水", "lf|廊坊", "qhd|秦皇岛", "sjz|石家庄", "ts|唐山", "xt|邢台", "zjk|张家口", "zd|正定", "zx|赵县", "zhangbei|张北"], "河南": ["ay|安阳", "changge|长葛", "hb|鹤壁", "jiaozuo|焦作", "jiyuan|济源", "kaifeng|开封", "luoyang|洛阳", "luohe|漯河", "mg|明港", "ny|南阳", "pds|平顶山", "puyang|濮阳", "sq|商丘", "smx|三门峡", "xx|新乡", "xc|许昌", "xy|信阳", "yuzhou|禹州", "yanling|鄢陵", "zz|郑州", "zk|周口", "zmd|驻马店"], "黑龙江": ["dq|大庆","dxal|大兴安岭", "hrb|哈尔滨", "hegang|鹤岗", "heihe|黑河", "jms|佳木斯", "jixi|鸡西", "mdj|牡丹江", "qqhr|齐齐哈尔", "qth|七台河", "suihua|绥化", "sys|双鸭山", "yich|伊春"], "吉林": ["bc|白城", "baishan|白山", "cc|长春", "jl|吉林", "liaoyuan|辽源", "songyuan|松原", "sp|四平", "th|通化", "yanbian|延边"], "辽宁" : ["as|鞍山", "benxi|本溪", "cy|朝阳", "dl|大连", "dandong|丹东", "fushun|抚顺", "fx|阜新", "hld|葫芦岛", "jinzhou|锦州", "liaoyang|辽阳", "pj|盘锦", "sy|沈阳", "tl|铁岭", "wfd|瓦房店", "yk|营口", "pld|庄河"], "山东": ["bz|滨州", "dz|德州", "dy|东营", "heze|菏泽", "jn|济南", "jining|济宁", "kl|垦利", "linyi|临沂", "lc|聊城", "lw|莱芜", "qd|青岛", "rizhao|日照", "shouguang|寿光", "longkou|龙口", "ta|泰安", "wf|潍坊", "weihai|威海", "yt|烟台", "zb|淄博", "zaozhuang|枣庄", "zhangqiu|章丘", "zc|诸城"], "内蒙古": ["alsm|阿拉善盟", "bt|包头", "bycem|巴彦淖尔", "chifeng|赤峰", "erds|鄂尔多斯", "hu|呼和浩特", "hlbe|呼伦贝尔", "hlr|海拉尔", "tongliao|通辽", "wuhai|乌海", "wlcb|乌兰察布", "xl|锡林郭勒", "xam|兴安盟"], "江苏": ["cz|常州", "dafeng|大丰", "danyang|丹阳", "dongtai|东台", "donghai|东海", "ha|淮安", "haimen|海门", "haian|海安", "jingjiang|靖江", "jianhu|建湖", "liyang|溧阳", "lyg|连云港", "nj|南京", "nt|南通", "pizhou|邳州", "qidong|启东", "rugao|如皋", "rudong|如东", "su|苏州", "shuyang|沭阳", "suqian|宿迁", "taizhou|泰州", "taixing|泰兴", "wx|无锡", "xinghuashi|兴化", "xinyishi|新沂", "xz|徐州", "xzpeixian|沛县", "yangzhong|扬中", "yz|扬州", "yancheng|盐城", "zj|镇江"], "安徽": ["anqing|安庆", "bengbu|蚌埠", "bozhou|亳州", "ch|巢湖", "chizhou|池州", "chuzhou|滁州", "fy|阜阳", "hf|合肥", "hn|淮南", "huaibei|淮北", "huangshan|黄山", "hexian|和县", "hq|霍邱", "la|六安", "mas|马鞍山", "ningguo|宁国", "suzhou|宿州", "tianchang|天长", "tongling|铜陵", "tongcheng|桐城", "wuhu|芜湖", "xuancheng|宣城"], "山西": ["changzhi|长治", "dt|大同", "jincheng|晋城", "jz|晋中", "lvliang|吕梁", "linfen|临汾", "linyixian|临猗", "qingxu|清徐", "shuozhou|朔州", "ty|太原", "xinzhou|忻州", "yuncheng|运城", "yq|阳泉"], "陕西": ["ankang|安康", "baoji|宝鸡", "hanzhong|汉中", "sl|商洛", "tc|铜川", "wn|渭南", "xa|西安", "xianyang|咸阳", "yanan|延安", "yl|榆林"], "甘肃": ["by|白银", "dx|定西", "gn|甘南", "jinchang|金昌", "jyg|嘉峪关", "jq|酒泉", "lz|兰州", "linxia|临夏", "ln|陇南", "pl|平凉", "qingyang|庆阳", "tianshui|天水", "wuwei|武威", "zhangye|张掖"], "浙江": ["hz|杭州", "cixi|慈溪", "changxing|长兴", "deqing|德清", "dongyang|东阳", "haining|海宁", "huzhou|湖州", "jiashanx|嘉善", "jx|嘉兴", "jh|金华", "lishui|丽水", "nb|宁波", "quzhou|衢州", "ruiancity|瑞安", "sx|绍兴", "tongxiang|桐乡", "tz|台州", "wenling|温岭", "wz|温州", "xiangshanxian|象山", "yiwu|义乌", "yueqingcity|乐清", "yuyao|余姚", "zhoushan|舟山", "zhuji|诸暨"], "江西": ["fuzhou|抚州", "ganzhou|赣州", "jj|九江", "ja|吉安", "jdz|景德镇", "nc|南昌", "px|萍乡", "sr|上饶", "xinyu|新余", "yingtan|鹰潭", "yichun|宜春", "yxx|永新"], "湖北": ["es|恩施", "ez|鄂州", "hshi|黄石", "hg|黄冈", "jingzhou|荆州", "jingmen|荆门", "qianjiang|潜江", "shiyan|十堰", "snj|神农架", "suizhou|随州", "tm|天门", "wh|武汉", "xf|襄阳", "xiaogan|孝感", "xiantao|仙桃", "xianning|咸宁", "yc|宜昌", "yidou|宜都"], "湖南": ["cs|长沙", "changde|常德", "chenzhou|郴州", "hy|衡阳", "hh|怀化", "ld|娄底", "shaoyang|邵阳", "xiangtan|湘潭", "xiangxi|湘西", "yy|岳阳", "yongzhou|永州", "yiyang|益阳", "zhuzhou|株洲", "zjj|张家界"], "贵州": ["anshun|安顺", "bijie|毕节", "gy|贵阳", "lps|六盘水", "qdn|黔东南", "qn|黔南", "qxn|黔西南", "tr|铜仁", "zunyi|遵义"], "四川": ["ab|阿坝", "bazhong|巴中", "cd|成都", "deyang|德阳", "dazhou|达州", "ga|广安", "guangyuan|广元", "ganzi|甘孜", "ls|乐山", "luzhou|泸州", "liangshan|凉山", "mianyang|绵阳", "ms|眉山", "scnj|内江", "nanchong|南充", "panzhihua|攀枝花", "suining|遂宁", "yb|宜宾", "ya|雅安", "zg|自贡", "zy|资阳"], "云南": ["bs|保山", "cx|楚雄", "dali|大理", "diqing|迪庆", "dh|德宏", "honghe|红河", "km|昆明", "lj|丽江", "lincang|临沧", "nujiang|怒江", "pe|普洱", "qj|曲靖", "ws|文山", "bn|西双版纳", "yx|玉溪", "zt|昭通"], "新疆": ["aks|阿克苏", "ale|阿拉尔", "bygl|巴音郭楞", "betl|博尔塔拉", "changji|昌吉", "hami|哈密", "ht|和田", "klmy|克拉玛依", "kel|库尔勒", "ks|喀什", "kzls|克孜勒苏", "shz|石河子", "tlf|吐鲁番", "tmsk|图木舒克", "xj|乌鲁木齐", "wjq|五家渠", "yili|伊犁", "alt|阿勒泰", "tac|塔城"], "宁夏": ["guyuan|固原", "szs|石嘴山", "wuzhong|吴忠", "yinchuan|银川", "zw|中卫"], "青海": ["guoluo|果洛", "huangnan|黄南", "hx|海西", "haidong|海东", "haibei|海北", "hainan|海南", "xn|西宁", "ys|玉树"], "西藏": ["al|阿里", "changdu|昌都", "lasa|拉萨", "linzhi|林芝", "nq|那曲", "rkz|日喀则", "sn|山南", "rituxian|日土", "gaizexian|改则"], "广西": ["baise|百色", "bh|北海", "chongzuo|崇左", "fcg|防城港", "gl|桂林", "gg|贵港", "hc|河池", "hezhou|贺州", "liuzhou|柳州", "lb|来宾", "nn|南宁", "qinzhou|钦州", "wuzhou|梧州", "yulin|玉林"], "广东": ["chaozhou|潮州", "dg|东莞", "fs|佛山", "gz|广州", "huidong|惠东", "huizhou|惠州", "heyuan|河源", "jm|江门", "jy|揭阳", "mm|茂名", "mz|梅州", "qingyuan|清远", "sd|顺德", "sz|深圳", "st|汕头", "sg|韶关", "sw|汕尾", "taishan|台山", "yj|阳江", "yangchun|阳春", "yf|云浮", "zh|珠海", "zs|中山", "zhanjiang|湛江", "zq|肇庆", "boluo|博罗"], "福建": ["fz|福州", "jinjiangshi|晋江", "ly|龙岩", "nd|宁德", "np|南平", "nananshi|南安", "pt|莆田", "qz|泉州", "sm|三明", "shishi|石狮", "wuyishan|武夷山", "xm|厦门", "zhangzhou|漳州"], "海南": ["haikou|海口", "sansha|三沙", "sanya|三亚", "wzs|五指山", "qh|琼海", "wenchang|文昌", "wanning|万宁", "tunchang|屯昌", "qiongzhong|琼中", "lingshui|陵水", "df|东方", "da|定安", "cm|澄迈", "baoting|保亭", "baish|白沙", "tanzhou|儋州"] }
hotel.py
import json from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import time from bs4 import BeautifulSoup import re import pymysql # 获取谷歌驱动 driver = webdriver.Chrome("chromedriver.exe") # 访问途牛网 driver.get("https://hotel.tuniu.com/list/602p0s0b0?cityName=%E5%B9%BF%E5%B7%9E") # 最大化 driver.maximize_window() # 睡眠3秒等待页面加载 time.sleep(3) # 判断一下数据为不为空 为空就将字符串"null"返回去 def judgeLen(temp): if len(temp) > 0: data = temp[0] else: data = "null" return data def getData(): # 连接数据库 connect = pymysql.connect(host="xxxxx", port=12345, user="xxx", passwd="xxxx",database="mydata",charset="utf8") # 获取游标 cursor = connect.cursor() # 建表操作在可视化提前建好即可,或者自行写代码创建 # 打开准备好的全部城市名字的文本文件 with open("AllCity.txt",mode="r",encoding="utf-8") as file: # 将文本读取进来 text = file.read() # 用json解析文本文件 jsondata = json.loads(text) # 遍历解析出来的字典 pro就是key 省份 for pro in jsondata: tempList = jsondata[pro] # 通过key遍历values 这里遍历出来的就是city for city in tempList: # 通过切割得到后面中文的城市名 place = (str(city).split("|")[1]) # -----------------自动修改城市名进行跳转----------------------- # 清空一下输入城市那个标签的内容 driver.find_element_by_css_selector(".city-div > input:nth-child(1)").clear() # 将遍历出来的中文城市名填进去 driver.find_element_by_css_selector(".city-div > input:nth-child(1)").send_keys(place) time.sleep(2) # 点击一下提示框的第一个地点 就会自动跳转到那个城市 ActionChains(driver).move_by_offset(226, 263).click().perform() # 回到原点 ActionChains(driver).move_by_offset(-226, -263).perform() time.sleep(5) # 对驱动返回的页面内容进行解析 bs = BeautifulSoup(driver.page_source, "html.parser") # 获取每个酒店div标签 data = bs.find_all("div", class_="hotel-item") # 遍历div标签 for div in data: # 正则表达式获取每个数据 # 酒店名 namepatt = re.compile(r'span.*?hotel-name f-m.*?>(.*?)') # 钻石图标,当做星星用了 diapatt = re.compile(r'(icon icon-diamond)') # 星星 starpatt = re.compile(r'(icon icon-star)') # 评分 ratingpatt = re.compile( r'"hotel-score f-b f-DINA" data-v-74d0f10f="" style="background: rgb.*?;">(.*?)