use r'' for regex strings
This commit is contained in:
parent
414cb00115
commit
bb13747023
1 changed files with 81 additions and 81 deletions
162
ox/text.py
162
ox/text.py
|
@ -133,86 +133,86 @@ UA_NAMES = {
|
||||||
}
|
}
|
||||||
UA_REGEXPS = {
|
UA_REGEXPS = {
|
||||||
'browser': [
|
'browser': [
|
||||||
'(Camino)\/(\d+)',
|
r'(Camino)\/(\d+)',
|
||||||
'(Chimera)\/(\d+)',
|
r'(Chimera)\/(\d+)',
|
||||||
'(chromeframe)\/(\d+)',
|
r'(chromeframe)\/(\d+)',
|
||||||
'(Edge)\/(\d+)',
|
r'(Edge)\/(\d+)',
|
||||||
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
|
r'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
|
||||||
'(Chromium)\/(\d+)', # before Chrome
|
r'(Chromium)\/(\d+)', # before Chrome
|
||||||
'(Chrome)\/(\d+)',
|
r'(Chrome)\/(\d+)',
|
||||||
'(FBForIPhone)',
|
r'(FBForIPhone)',
|
||||||
'(Firefox)\/(\d+)',
|
r'(Firefox)\/(\d+)',
|
||||||
'(Galeon)\/(\d+)',
|
r'(Galeon)\/(\d+)',
|
||||||
'(IEMobile)\/(\d+)',
|
r'(IEMobile)\/(\d+)',
|
||||||
'(iCab) (\d+)',
|
r'(iCab) (\d+)',
|
||||||
'(iCab)\/(\d+)',
|
r'(iCab)\/(\d+)',
|
||||||
'(konqueror)\/(\d+)',
|
r'(konqueror)\/(\d+)',
|
||||||
'(Konqueror)\/(\d+)',
|
r'(Konqueror)\/(\d+)',
|
||||||
'(Lynx)\/(\d+)',
|
r'(Lynx)\/(\d+)',
|
||||||
'(Netscape)\d?\/(\d+)',
|
r'(Netscape)\d?\/(\d+)',
|
||||||
'(NokiaBrowser)\/(\d+)',
|
r'(NokiaBrowser)\/(\d+)',
|
||||||
'(OmniWeb)\/(\d+)',
|
r'(OmniWeb)\/(\d+)',
|
||||||
'(Opera)\/.+Version\/(\d+)',
|
r'(Opera)\/.+Version\/(\d+)',
|
||||||
'(OviBrowser)\/(\d+)',
|
r'(OviBrowser)\/(\d+)',
|
||||||
'Version\/(\d+).+(Safari)',
|
r'Version\/(\d+).+(Safari)',
|
||||||
'(WebKit)\/(\d+)',
|
r'(WebKit)\/(\d+)',
|
||||||
'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
|
r'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
|
||||||
'(Trident)\/.*?rv:(\d+)',
|
r'(Trident)\/.*?rv:(\d+)',
|
||||||
'(Gecko)',
|
r'(Gecko)',
|
||||||
'(Mozilla)\/(3|4)'
|
r'(Mozilla)\/(3|4)'
|
||||||
],
|
],
|
||||||
'robot': [
|
'robot': [
|
||||||
'(BingPreview)\/(\d+)',
|
r'(BingPreview)\/(\d+)',
|
||||||
'(Google Web Preview).+Chrome\/(\d+)',
|
r'(Google Web Preview).+Chrome\/(\d+)',
|
||||||
'(Googlebot)\/(\d+)',
|
r'(Googlebot)\/(\d+)',
|
||||||
'(WebCrawler)\/(\d+)',
|
r'(WebCrawler)\/(\d+)',
|
||||||
'(Yahoo! Slurp)\/(\d+)',
|
r'(Yahoo! Slurp)\/(\d+)',
|
||||||
'(YandexBot)\/([\d\.]+)',
|
r'(YandexBot)\/([\d\.]+)',
|
||||||
'(YandexMobileBot)\/([\d\.]+)',
|
r'(YandexMobileBot)\/([\d\.]+)',
|
||||||
],
|
],
|
||||||
'system': [
|
'system': [
|
||||||
'(Android) (\d+)',
|
r'(Android) (\d+)',
|
||||||
'(Android)',
|
r'(Android)',
|
||||||
'(BB)(\d+)',
|
r'(BB)(\d+)',
|
||||||
'(BeOS)',
|
r'(BeOS)',
|
||||||
'(BlackBerry) (\d+)',
|
r'(BlackBerry) (\d+)',
|
||||||
'(BlackBerry)',
|
r'(BlackBerry)',
|
||||||
'(Darwin)',
|
r'(Darwin)',
|
||||||
'(BSD) (FreeBSD|NetBSD|OpenBSD)',
|
r'(BSD) (FreeBSD|NetBSD|OpenBSD)',
|
||||||
'(CPU OS) (\d+)',
|
r'(CPU OS) (\d+)',
|
||||||
'(iPhone OS) (\d+)',
|
r'(iPhone OS) (\d+)',
|
||||||
'(iPhone)', # Opera
|
r'(iPhone)', # Opera
|
||||||
'(J2ME\/MIDP)',
|
r'(J2ME\/MIDP)',
|
||||||
'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
|
r'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
|
||||||
'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
|
r'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
|
||||||
'(Linux)',
|
r'(Linux)',
|
||||||
'(Mac OS X) (10.\d+)',
|
r'(Mac OS X) (10.\d+)',
|
||||||
'(Mac OS X)',
|
r'(Mac OS X)',
|
||||||
'(Mac_PowerPC)',
|
r'(Mac_PowerPC)',
|
||||||
'(Mac_PPC)',
|
r'(Mac_PPC)',
|
||||||
'(Macintosh)',
|
r'(Macintosh)',
|
||||||
'Nintendo (Wii).+NX\/(\d+)',
|
r'Nintendo (Wii).+NX\/(\d+)',
|
||||||
'(PLAYSTATION) (\d+)',
|
r'(PLAYSTATION) (\d+)',
|
||||||
'(PlayStation) Vita (\d+)',
|
r'(PlayStation) Vita (\d+)',
|
||||||
'(RIM Tablet OS) (\d+)',
|
r'(RIM Tablet OS) (\d+)',
|
||||||
'(S)(60);',
|
r'(S)(60);',
|
||||||
'(Series) ?(40|60)',
|
r'(Series) ?(40|60)',
|
||||||
'(Symbian OS)',
|
r'(Symbian OS)',
|
||||||
'(SymbianOS)\/(\d+)',
|
r'(SymbianOS)\/(\d+)',
|
||||||
'(SymbOS)',
|
r'(SymbOS)',
|
||||||
'(OS\/2)',
|
r'(OS\/2)',
|
||||||
'(Unix) (AIX|HP-UX|IRIX|SunOS)',
|
r'(Unix) (AIX|HP-UX|IRIX|SunOS)',
|
||||||
'(Unix)',
|
r'(Unix)',
|
||||||
'(Windows) (NT \d\.\d)',
|
r'(Windows) (NT \d\.\d)',
|
||||||
'(Windows Phone) (\d+)',
|
r'(Windows Phone) (\d+)',
|
||||||
'(Windows Phone OS) (\d+)',
|
r'(Windows Phone OS) (\d+)',
|
||||||
'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
|
r'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
|
||||||
'(Win) (9x 4\.90)', # Firefox
|
r'(Win) (9x 4\.90)', # Firefox
|
||||||
'(Win)(16)', # Firefox
|
r'(Win)(16)', # Firefox
|
||||||
'(Win)(9\d)', # Firefox
|
r'(Win)(9\d)', # Firefox
|
||||||
'(Win)(NT)', # Firefox
|
r'(Win)(NT)', # Firefox
|
||||||
'(Win)(NT4\.0)', # Firefox
|
r'(Win)(NT4\.0)', # Firefox
|
||||||
'(X11)'
|
r'(X11)'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
UA_VERSIONS = {
|
UA_VERSIONS = {
|
||||||
|
@ -332,9 +332,9 @@ def get_sort_name(name):
|
||||||
|
|
||||||
first_names = name.split(' ')
|
first_names = name.split(' ')
|
||||||
last_names = []
|
last_names = []
|
||||||
if re.search('^[0-9]+$', first_names[-1]):
|
if re.search(r'^[0-9]+$', first_names[-1]):
|
||||||
add_name()
|
add_name()
|
||||||
if re.search('[(\[].+?[)\]]$', first_names[-1]):
|
if re.search(r'[(\[].+?[)\]]$', first_names[-1]):
|
||||||
add_name()
|
add_name()
|
||||||
if find_name(SUFFIXES):
|
if find_name(SUFFIXES):
|
||||||
add_name()
|
add_name()
|
||||||
|
@ -425,7 +425,7 @@ def parse_useragent(useragent):
|
||||||
matches = list(match.groups())
|
matches = list(match.groups())
|
||||||
if len(matches) == 1:
|
if len(matches) == 1:
|
||||||
matches.append('')
|
matches.append('')
|
||||||
swap = re.match('^\d', matches[0]) or matches[1] == 'Linux'
|
swap = re.match(r'^\d', matches[0]) or matches[1] == 'Linux'
|
||||||
name = matches[1 if swap else 0]
|
name = matches[1 if swap else 0]
|
||||||
version = matches[0 if swap else 1].replace('_', '.')
|
version = matches[0 if swap else 1].replace('_', '.')
|
||||||
name = UA_NAMES[key][name] if name in UA_NAMES[key] else name
|
name = UA_NAMES[key][name] if name in UA_NAMES[key] else name
|
||||||
|
@ -685,8 +685,8 @@ def sort_string(string):
|
||||||
string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th')
|
string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th')
|
||||||
|
|
||||||
# pad numbered titles
|
# pad numbered titles
|
||||||
string = re.sub('(\d),(\d{3})', '\\1\\2', string)
|
string = re.sub(r'(\d),(\d{3})', '\\1\\2', string)
|
||||||
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
|
string = re.sub(r'(\d+)', lambda x: '%010d' % int(x.group(0)), string)
|
||||||
return unicodedata.normalize('NFKD', string)
|
return unicodedata.normalize('NFKD', string)
|
||||||
|
|
||||||
def sorted_strings(strings, key=None):
|
def sorted_strings(strings, key=None):
|
||||||
|
|
Loading…
Reference in a new issue