主页 M

python应用re正则、字体工具采集数据,实现字体替换

2020-10-09 网页编程网 网页编程网
import re
import base64
import io
from fontTools.ttLib import TTFont
import requests


url = 'http://tatungmotor.b2b.huangye88.com/'

response = requests.get(url, headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'})
response_ = response.text

font_data=re.findall('8;base64,(.*?)"',response.text)


convert_dict = {
    'hyphen':'-',
    'one': '1',
    'two': '2',
    'three': '3',
    'four': '4',
    'five': '5',
    'six': '6',
    'seven': '7',
    'eight': '8',
    'nine': '9',
    'period': '.',
    'zero': '0'
}
#print(font_data[0])
#font_face='AAEAAAAKAIAAAwAgT1MvMla194sAAACsAAAAYGNtYXALMAPOAAABDAAAAa5nbHlmZrwdwAAAArwAAAakaGVhZBQx4JoAAAlgAAAANmhoZWEFswFxAAAJmAAAACRobXR4DVYBYgAACbwAAAAubG9jYQwQCnYAAAnsAAAAIm1heHAAFABOAAAKEAAAACBuYW1lUuodRwAACjAAAAGecG9zdDHgxUkAAAvQAAAAdAAEAgsBkAAFAAACmQLMAAAAjwKZAswAAAHrADMBCQAAAgAGAwAAAAAAAAAAAAEQAAAAAAAAAAAAAABQZkVkAMAAI4N8Ayz/LABcAywA1AAAAAEAAAAAAxgAAAAAACAAAQAAAAQAAAADAAAAJAABAAAAAABcAAMAAQAAACQAAwAKAAABYgAEADgAAAAKAAgAAgACACMAKwAtAC///wAAACMAKgAtAC/////e/9j/1//WAAEAAAAAAAAAAAAAAAABBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAgMABAAFAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMAAAAAABMAAAAAAAAAAUAAAAjAAAAIwAAAAEAAAAqAAAAKwAAAAIAAAAtAAAALQAAAAQAAAAvAAAALwAAAAUACINzAAiDfAAAAAYAAAACACIAAAEyAqoAAwAHAAA3ESERJzMRIyIBEO7MzAACqv1WIgJmAAAAAgAdAAACIALbABsAHwAAARUjByM3IwcjNyM1MzcjNTM3MwczNzMHMxUjByMzNyMB/4AmSCZrJ0knZnQjdoQkSSVrJkkmYnAitWwkbAEUR83Nzc1HuUjGxsbGSLm5AAAAAQAkAKQB3gI2ABEAABM3FyczBzcXBxcHJxcjNwcnNyQumSJzJZkun58umSRyIZguoAGXZ26mpGpmKClma6anbWYqAAABAEMAkwH6AkoACwAAARUjNSM1MzUzFTMVAUNKtrZKtwFKt7dJt7dJAAAAAAEAGgFCASQBrQADAAATNSEVGgEKAUJrawAAAAABAAD/gwEnAwoAAwAAFycTM0pK30h9AQOGAAAAAgAj//YCGgLmABMAJwAAARQOAiMiLgI1ND4CMzIeAgUUHgIzMj4CNTQuAiMiDgICGhw9X0NGYDwaGjxgR0JfPRz+qAgUJB0cJBUHBxQkHB0kFQgBb1WLYzY2Y4xVVYpiNTVii1VKc08qKk9zSklzTykpT3MAAAAAAQArAAACCgLfACEAADc1MzI+AjURDgMjIi4CNT4DPwEzERQeAjsBFWRUDRMNBhQiIB8PDRUQChAiJiwaSHIFCxUQUgA3Bg8aEwIBGCccDwoUHBEEDBIbEjX9mhAZEQg3AAAAAAEAJAAAAg4C5gArAAABFA4EDwEzMjY/ATMHITU3PgM1NCYjIgYVIi4CNTQ+AjMyHgIB9AsYKDtPM2fvHy0JCD0G/hyYLz0jDiomNCodMCMTHThUODpXPB4CPBgtMDZATjFhJCMf12qaMU5HRSg6NllYCxgnGxwyJhcYLD8AAAAAAQAd//YCDgLmAEQAABciLgI1ND4CMxQeAjMyPgI1NC4CKwE1MzI+AjU0JiMiDgIVIiY1ND4CMzIeAhUUDgIHHgMVFA4C+TpTNhkOGB8SEiEvHBktIxUVKDsnP0MhMSAQKyobIxMHQEUdOVQ4N1c+IRgqOSIfQTUiL01kChQiLRgTHhUKITEhEA4iOiweMSMUQBUoOCE4PxstOR4tLxsvJBQWKz4oIzouIgwFGSo/LD5VNBYAAgAOAAACKQLbABgAIwAAJRUUHgI7ARUhNTMyPgI9ASE1ATMRMxUlNDY3DgMPATMBvw0XHxEN/pkcEh4XDf7lASKPav8AAwQFFhkXBorUvz8YHQ8FNzcFDx0YPz4B3v4nQ/YtaDAMKiwoCeUAAQAp//YCBgLbADoAADcyPgI1NCYjIg4CBycTIRcjJy4DKwEUDgIPAT4DMzIeAhUUDgIjIi4CNTQ2MxQeAuwZLiIVSUMTIBsYCy8gAYQFOwgCBgsQDNUCAgMBCAgZHiIPPGBFJTBNXy85UDIXLSUMGis+ECVAL0xLAwUHAxIBYrojCQ4KBgEQGyISXgMGBAMcNlI3Q1o3GBUiLRgkIxYsIxYAAAACAC7/9gIZAuYALAA8AAABIg4CBz4DMzIeAhUUDgIjIi4CNTQ+AjMyHgIVFA4CIzQuAgMiDgIHFB4CMzI2NTQmAUkeMSMVAwobIysaL0s2HR48WDs5XUMlJEhuSjJFKxMNHS4iBg8bNw4fHBgGEh4pFygtMgKpJEVkQQcNCwcdN04yN1tBJCpWg1lVk20/EyAoFhAdFg0XLyYY/tkIDhIJSWpEIFBZU0wAAAAAAQAtAAACGwLbAAsAADcBISIGDwEjNyEVAakBEf7yHBwDBj4FAen+5QACbBsZNNcy/VcAAAMAH//2Ah4C5gAlADkATQAANzQ+AjcuATU0PgIzMh4CFRQOAgceAxUUDgIjIi4CFzI+AjU0LgInDgMVFB4CEzQuAiMiDgIVFB4CFz4DHxUoOCE9QRg4W0I2UjcbEyQzIC5BKBMkQ2E+QF4+Hf4aKx4QESU4KBEeFQ0RHit6DBgkFxUhFgsOHCkbExsSCLshNSslESNaPCRDNCAbMEInHi8nIRAXLTI2HzFLNBwfNUhiEyIvHBkpIyISCx0jLBseMiMUAgQWKyEUER8qGBsoIBkNCxkgKAAAAAIAJP/2Ag8C5gAoADYAABciLgI1NDY3HgMzMjY3DgMjIi4CNTQ+AjMyHgIVFA4CAzI2NzQuAiMiBhUUFukvQCgRGBoHFR4nGkVKBQwdJS0aLEo1HiA9Vzc3XkUmIUdvHyU1DxEcKBgsMDAKFCAqFhYfBRcoHRGVkw8ZEwobNk80N1tCJChUglpVlG9AAW4lH0JePB1WV0dJAAAAAAEAAAABAACkAILFXw889QALBAAAAAAA2XTOiAAAAADZdM6IAAD/gwIpAwoAAAAIAAIAAAAAAAAAAQAAAyz/LABcAj0AAAAAAikAAQAAAAAAAAAAAAAAAAAAAAcBdgAiAj0AHQICACQCPQBDAT4AGgEnAAACPQAjACsAJAAdAA4AKQAuAC0AHwAkAAAAAAAUAEQAZgB8AIoAmADUAQYBRgGgAdYCKAJ+ApgDBANSAAAAAQAAABAATgADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAlgABAAAAAAABAA0AAAABAAAAAAACAAYADQABAAAAAAADAA0AEwABAAAAAAAEAA0AIAABAAAAAAAFAB4ALQABAAAAAAAGAA0ASwADAAEECQABABoAWAADAAEECQACAAwAcgADAAEECQADABoAfgADAAEECQAEABoAmAADAAEECQAFADwAsgADAAEECQAGABoA7kxlZVRyZWVzaGFkb3dNZWRpdW1MZWVUcmVlc2hhZG93TGVlVHJlZXNoYWRvd1ZlcnNpb24gMS4wOyBGb250RWRpdG9yICh2MS4wKUxlZVRyZWVzaGFkb3cATABlAGUAVAByAGUAZQBzAGgAYQBkAG8AdwBNAGUAZABpAHUAbQBMAGUAZQBUAHIAZQBlAHMAaABhAGQAbwB3AEwAZQBlAFQAcgBlAGUAcwBoAGEAZABvAHcAVgBlAHIAcwBpAG8AbgAgADEALgAwADsAIABGAG8AbgB0AEUAZABpAHQAbwByACAAKAB2ADEALgAwACkATABlAGUAVAByAGUAZQBzAGgAYQBkAG8AdwAAAAIAAAAAAAAAMgAAAAAAAAAAAAAAAAAAAAAAAAAAABAAEAAAAAYADQAOABAAEgECAQMBBAEFAQYBBwEIAQkBCgELBHplcm8Db25lA3R3bwV0aHJlZQRmb3VyBGZpdmUDc2l4BXNldmVuBWVpZ2h0BG5pbmU='
base64_str = font_data[0]  # 提取前端中的字体加密字符串
b = base64.b64decode(base64_str)  # base64解码
font = TTFont(io.BytesIO(b))  # 读取ttf数据
bestcmap = font['cmap'].getBestCmap()  # 得到camp标签下的数据

#print(bestcmap)

newmap = dict()

for key in bestcmap.keys():
    newmap[hex(key)] = bestcmap[key]
    
#print(newmap)

# 把页面上自定义字体替换成正常字体,这一步就很简单了

for key,value in newmap.items():
    key_ = key.replace('0x','&#x') + ';'
    if key_ in response_:
        #print(value)
        response_ = response_.replace(key_,convert_dict[value])

print(response_)
阅读原文
阅读 3481
123 显示电脑版