use r'' for regex strings
This commit is contained in:
parent
414cb00115
commit
bb13747023
1 changed files with 81 additions and 81 deletions
162
ox/text.py
162
ox/text.py
|
@ -133,86 +133,86 @@ UA_NAMES = {
|
|||
}
|
||||
UA_REGEXPS = {
|
||||
'browser': [
|
||||
'(Camino)\/(\d+)',
|
||||
'(Chimera)\/(\d+)',
|
||||
'(chromeframe)\/(\d+)',
|
||||
'(Edge)\/(\d+)',
|
||||
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
|
||||
'(Chromium)\/(\d+)', # before Chrome
|
||||
'(Chrome)\/(\d+)',
|
||||
'(FBForIPhone)',
|
||||
'(Firefox)\/(\d+)',
|
||||
'(Galeon)\/(\d+)',
|
||||
'(IEMobile)\/(\d+)',
|
||||
'(iCab) (\d+)',
|
||||
'(iCab)\/(\d+)',
|
||||
'(konqueror)\/(\d+)',
|
||||
'(Konqueror)\/(\d+)',
|
||||
'(Lynx)\/(\d+)',
|
||||
'(Netscape)\d?\/(\d+)',
|
||||
'(NokiaBrowser)\/(\d+)',
|
||||
'(OmniWeb)\/(\d+)',
|
||||
'(Opera)\/.+Version\/(\d+)',
|
||||
'(OviBrowser)\/(\d+)',
|
||||
'Version\/(\d+).+(Safari)',
|
||||
'(WebKit)\/(\d+)',
|
||||
'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
|
||||
'(Trident)\/.*?rv:(\d+)',
|
||||
'(Gecko)',
|
||||
'(Mozilla)\/(3|4)'
|
||||
r'(Camino)\/(\d+)',
|
||||
r'(Chimera)\/(\d+)',
|
||||
r'(chromeframe)\/(\d+)',
|
||||
r'(Edge)\/(\d+)',
|
||||
r'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
|
||||
r'(Chromium)\/(\d+)', # before Chrome
|
||||
r'(Chrome)\/(\d+)',
|
||||
r'(FBForIPhone)',
|
||||
r'(Firefox)\/(\d+)',
|
||||
r'(Galeon)\/(\d+)',
|
||||
r'(IEMobile)\/(\d+)',
|
||||
r'(iCab) (\d+)',
|
||||
r'(iCab)\/(\d+)',
|
||||
r'(konqueror)\/(\d+)',
|
||||
r'(Konqueror)\/(\d+)',
|
||||
r'(Lynx)\/(\d+)',
|
||||
r'(Netscape)\d?\/(\d+)',
|
||||
r'(NokiaBrowser)\/(\d+)',
|
||||
r'(OmniWeb)\/(\d+)',
|
||||
r'(Opera)\/.+Version\/(\d+)',
|
||||
r'(OviBrowser)\/(\d+)',
|
||||
r'Version\/(\d+).+(Safari)',
|
||||
r'(WebKit)\/(\d+)',
|
||||
r'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
|
||||
r'(Trident)\/.*?rv:(\d+)',
|
||||
r'(Gecko)',
|
||||
r'(Mozilla)\/(3|4)'
|
||||
],
|
||||
'robot': [
|
||||
'(BingPreview)\/(\d+)',
|
||||
'(Google Web Preview).+Chrome\/(\d+)',
|
||||
'(Googlebot)\/(\d+)',
|
||||
'(WebCrawler)\/(\d+)',
|
||||
'(Yahoo! Slurp)\/(\d+)',
|
||||
'(YandexBot)\/([\d\.]+)',
|
||||
'(YandexMobileBot)\/([\d\.]+)',
|
||||
r'(BingPreview)\/(\d+)',
|
||||
r'(Google Web Preview).+Chrome\/(\d+)',
|
||||
r'(Googlebot)\/(\d+)',
|
||||
r'(WebCrawler)\/(\d+)',
|
||||
r'(Yahoo! Slurp)\/(\d+)',
|
||||
r'(YandexBot)\/([\d\.]+)',
|
||||
r'(YandexMobileBot)\/([\d\.]+)',
|
||||
],
|
||||
'system': [
|
||||
'(Android) (\d+)',
|
||||
'(Android)',
|
||||
'(BB)(\d+)',
|
||||
'(BeOS)',
|
||||
'(BlackBerry) (\d+)',
|
||||
'(BlackBerry)',
|
||||
'(Darwin)',
|
||||
'(BSD) (FreeBSD|NetBSD|OpenBSD)',
|
||||
'(CPU OS) (\d+)',
|
||||
'(iPhone OS) (\d+)',
|
||||
'(iPhone)', # Opera
|
||||
'(J2ME\/MIDP)',
|
||||
'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
|
||||
'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
|
||||
'(Linux)',
|
||||
'(Mac OS X) (10.\d+)',
|
||||
'(Mac OS X)',
|
||||
'(Mac_PowerPC)',
|
||||
'(Mac_PPC)',
|
||||
'(Macintosh)',
|
||||
'Nintendo (Wii).+NX\/(\d+)',
|
||||
'(PLAYSTATION) (\d+)',
|
||||
'(PlayStation) Vita (\d+)',
|
||||
'(RIM Tablet OS) (\d+)',
|
||||
'(S)(60);',
|
||||
'(Series) ?(40|60)',
|
||||
'(Symbian OS)',
|
||||
'(SymbianOS)\/(\d+)',
|
||||
'(SymbOS)',
|
||||
'(OS\/2)',
|
||||
'(Unix) (AIX|HP-UX|IRIX|SunOS)',
|
||||
'(Unix)',
|
||||
'(Windows) (NT \d\.\d)',
|
||||
'(Windows Phone) (\d+)',
|
||||
'(Windows Phone OS) (\d+)',
|
||||
'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
|
||||
'(Win) (9x 4\.90)', # Firefox
|
||||
'(Win)(16)', # Firefox
|
||||
'(Win)(9\d)', # Firefox
|
||||
'(Win)(NT)', # Firefox
|
||||
'(Win)(NT4\.0)', # Firefox
|
||||
'(X11)'
|
||||
r'(Android) (\d+)',
|
||||
r'(Android)',
|
||||
r'(BB)(\d+)',
|
||||
r'(BeOS)',
|
||||
r'(BlackBerry) (\d+)',
|
||||
r'(BlackBerry)',
|
||||
r'(Darwin)',
|
||||
r'(BSD) (FreeBSD|NetBSD|OpenBSD)',
|
||||
r'(CPU OS) (\d+)',
|
||||
r'(iPhone OS) (\d+)',
|
||||
r'(iPhone)', # Opera
|
||||
r'(J2ME\/MIDP)',
|
||||
r'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
|
||||
r'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
|
||||
r'(Linux)',
|
||||
r'(Mac OS X) (10.\d+)',
|
||||
r'(Mac OS X)',
|
||||
r'(Mac_PowerPC)',
|
||||
r'(Mac_PPC)',
|
||||
r'(Macintosh)',
|
||||
r'Nintendo (Wii).+NX\/(\d+)',
|
||||
r'(PLAYSTATION) (\d+)',
|
||||
r'(PlayStation) Vita (\d+)',
|
||||
r'(RIM Tablet OS) (\d+)',
|
||||
r'(S)(60);',
|
||||
r'(Series) ?(40|60)',
|
||||
r'(Symbian OS)',
|
||||
r'(SymbianOS)\/(\d+)',
|
||||
r'(SymbOS)',
|
||||
r'(OS\/2)',
|
||||
r'(Unix) (AIX|HP-UX|IRIX|SunOS)',
|
||||
r'(Unix)',
|
||||
r'(Windows) (NT \d\.\d)',
|
||||
r'(Windows Phone) (\d+)',
|
||||
r'(Windows Phone OS) (\d+)',
|
||||
r'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
|
||||
r'(Win) (9x 4\.90)', # Firefox
|
||||
r'(Win)(16)', # Firefox
|
||||
r'(Win)(9\d)', # Firefox
|
||||
r'(Win)(NT)', # Firefox
|
||||
r'(Win)(NT4\.0)', # Firefox
|
||||
r'(X11)'
|
||||
]
|
||||
}
|
||||
UA_VERSIONS = {
|
||||
|
@ -332,9 +332,9 @@ def get_sort_name(name):
|
|||
|
||||
first_names = name.split(' ')
|
||||
last_names = []
|
||||
if re.search('^[0-9]+$', first_names[-1]):
|
||||
if re.search(r'^[0-9]+$', first_names[-1]):
|
||||
add_name()
|
||||
if re.search('[(\[].+?[)\]]$', first_names[-1]):
|
||||
if re.search(r'[(\[].+?[)\]]$', first_names[-1]):
|
||||
add_name()
|
||||
if find_name(SUFFIXES):
|
||||
add_name()
|
||||
|
@ -425,7 +425,7 @@ def parse_useragent(useragent):
|
|||
matches = list(match.groups())
|
||||
if len(matches) == 1:
|
||||
matches.append('')
|
||||
swap = re.match('^\d', matches[0]) or matches[1] == 'Linux'
|
||||
swap = re.match(r'^\d', matches[0]) or matches[1] == 'Linux'
|
||||
name = matches[1 if swap else 0]
|
||||
version = matches[0 if swap else 1].replace('_', '.')
|
||||
name = UA_NAMES[key][name] if name in UA_NAMES[key] else name
|
||||
|
@ -685,8 +685,8 @@ def sort_string(string):
|
|||
string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th')
|
||||
|
||||
# pad numbered titles
|
||||
string = re.sub('(\d),(\d{3})', '\\1\\2', string)
|
||||
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
|
||||
string = re.sub(r'(\d),(\d{3})', '\\1\\2', string)
|
||||
string = re.sub(r'(\d+)', lambda x: '%010d' % int(x.group(0)), string)
|
||||
return unicodedata.normalize('NFKD', string)
|
||||
|
||||
def sorted_strings(strings, key=None):
|
||||
|
|
Loading…
Reference in a new issue