use r'' for regex strings

This commit is contained in:
j 2024-06-30 09:52:59 +01:00
parent 414cb00115
commit bb13747023

View file

@ -133,86 +133,86 @@ UA_NAMES = {
} }
UA_REGEXPS = { UA_REGEXPS = {
'browser': [ 'browser': [
'(Camino)\/(\d+)', r'(Camino)\/(\d+)',
'(Chimera)\/(\d+)', r'(Chimera)\/(\d+)',
'(chromeframe)\/(\d+)', r'(chromeframe)\/(\d+)',
'(Edge)\/(\d+)', r'(Edge)\/(\d+)',
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari r'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
'(Chromium)\/(\d+)', # before Chrome r'(Chromium)\/(\d+)', # before Chrome
'(Chrome)\/(\d+)', r'(Chrome)\/(\d+)',
'(FBForIPhone)', r'(FBForIPhone)',
'(Firefox)\/(\d+)', r'(Firefox)\/(\d+)',
'(Galeon)\/(\d+)', r'(Galeon)\/(\d+)',
'(IEMobile)\/(\d+)', r'(IEMobile)\/(\d+)',
'(iCab) (\d+)', r'(iCab) (\d+)',
'(iCab)\/(\d+)', r'(iCab)\/(\d+)',
'(konqueror)\/(\d+)', r'(konqueror)\/(\d+)',
'(Konqueror)\/(\d+)', r'(Konqueror)\/(\d+)',
'(Lynx)\/(\d+)', r'(Lynx)\/(\d+)',
'(Netscape)\d?\/(\d+)', r'(Netscape)\d?\/(\d+)',
'(NokiaBrowser)\/(\d+)', r'(NokiaBrowser)\/(\d+)',
'(OmniWeb)\/(\d+)', r'(OmniWeb)\/(\d+)',
'(Opera)\/.+Version\/(\d+)', r'(Opera)\/.+Version\/(\d+)',
'(OviBrowser)\/(\d+)', r'(OviBrowser)\/(\d+)',
'Version\/(\d+).+(Safari)', r'Version\/(\d+).+(Safari)',
'(WebKit)\/(\d+)', r'(WebKit)\/(\d+)',
'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE r'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
'(Trident)\/.*?rv:(\d+)', r'(Trident)\/.*?rv:(\d+)',
'(Gecko)', r'(Gecko)',
'(Mozilla)\/(3|4)' r'(Mozilla)\/(3|4)'
], ],
'robot': [ 'robot': [
'(BingPreview)\/(\d+)', r'(BingPreview)\/(\d+)',
'(Google Web Preview).+Chrome\/(\d+)', r'(Google Web Preview).+Chrome\/(\d+)',
'(Googlebot)\/(\d+)', r'(Googlebot)\/(\d+)',
'(WebCrawler)\/(\d+)', r'(WebCrawler)\/(\d+)',
'(Yahoo! Slurp)\/(\d+)', r'(Yahoo! Slurp)\/(\d+)',
'(YandexBot)\/([\d\.]+)', r'(YandexBot)\/([\d\.]+)',
'(YandexMobileBot)\/([\d\.]+)', r'(YandexMobileBot)\/([\d\.]+)',
], ],
'system': [ 'system': [
'(Android) (\d+)', r'(Android) (\d+)',
'(Android)', r'(Android)',
'(BB)(\d+)', r'(BB)(\d+)',
'(BeOS)', r'(BeOS)',
'(BlackBerry) (\d+)', r'(BlackBerry) (\d+)',
'(BlackBerry)', r'(BlackBerry)',
'(Darwin)', r'(Darwin)',
'(BSD) (FreeBSD|NetBSD|OpenBSD)', r'(BSD) (FreeBSD|NetBSD|OpenBSD)',
'(CPU OS) (\d+)', r'(CPU OS) (\d+)',
'(iPhone OS) (\d+)', r'(iPhone OS) (\d+)',
'(iPhone)', # Opera r'(iPhone)', # Opera
'(J2ME\/MIDP)', r'(J2ME\/MIDP)',
'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)', r'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)', r'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
'(Linux)', r'(Linux)',
'(Mac OS X) (10.\d+)', r'(Mac OS X) (10.\d+)',
'(Mac OS X)', r'(Mac OS X)',
'(Mac_PowerPC)', r'(Mac_PowerPC)',
'(Mac_PPC)', r'(Mac_PPC)',
'(Macintosh)', r'(Macintosh)',
'Nintendo (Wii).+NX\/(\d+)', r'Nintendo (Wii).+NX\/(\d+)',
'(PLAYSTATION) (\d+)', r'(PLAYSTATION) (\d+)',
'(PlayStation) Vita (\d+)', r'(PlayStation) Vita (\d+)',
'(RIM Tablet OS) (\d+)', r'(RIM Tablet OS) (\d+)',
'(S)(60);', r'(S)(60);',
'(Series) ?(40|60)', r'(Series) ?(40|60)',
'(Symbian OS)', r'(Symbian OS)',
'(SymbianOS)\/(\d+)', r'(SymbianOS)\/(\d+)',
'(SymbOS)', r'(SymbOS)',
'(OS\/2)', r'(OS\/2)',
'(Unix) (AIX|HP-UX|IRIX|SunOS)', r'(Unix) (AIX|HP-UX|IRIX|SunOS)',
'(Unix)', r'(Unix)',
'(Windows) (NT \d\.\d)', r'(Windows) (NT \d\.\d)',
'(Windows Phone) (\d+)', r'(Windows Phone) (\d+)',
'(Windows Phone OS) (\d+)', r'(Windows Phone OS) (\d+)',
'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera r'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
'(Win) (9x 4\.90)', # Firefox r'(Win) (9x 4\.90)', # Firefox
'(Win)(16)', # Firefox r'(Win)(16)', # Firefox
'(Win)(9\d)', # Firefox r'(Win)(9\d)', # Firefox
'(Win)(NT)', # Firefox r'(Win)(NT)', # Firefox
'(Win)(NT4\.0)', # Firefox r'(Win)(NT4\.0)', # Firefox
'(X11)' r'(X11)'
] ]
} }
UA_VERSIONS = { UA_VERSIONS = {
@ -332,9 +332,9 @@ def get_sort_name(name):
first_names = name.split(' ') first_names = name.split(' ')
last_names = [] last_names = []
if re.search('^[0-9]+$', first_names[-1]): if re.search(r'^[0-9]+$', first_names[-1]):
add_name() add_name()
if re.search('[(\[].+?[)\]]$', first_names[-1]): if re.search(r'[(\[].+?[)\]]$', first_names[-1]):
add_name() add_name()
if find_name(SUFFIXES): if find_name(SUFFIXES):
add_name() add_name()
@ -425,7 +425,7 @@ def parse_useragent(useragent):
matches = list(match.groups()) matches = list(match.groups())
if len(matches) == 1: if len(matches) == 1:
matches.append('') matches.append('')
swap = re.match('^\d', matches[0]) or matches[1] == 'Linux' swap = re.match(r'^\d', matches[0]) or matches[1] == 'Linux'
name = matches[1 if swap else 0] name = matches[1 if swap else 0]
version = matches[0 if swap else 1].replace('_', '.') version = matches[0 if swap else 1].replace('_', '.')
name = UA_NAMES[key][name] if name in UA_NAMES[key] else name name = UA_NAMES[key][name] if name in UA_NAMES[key] else name
@ -685,8 +685,8 @@ def sort_string(string):
string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th') string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th')
# pad numbered titles # pad numbered titles
string = re.sub('(\d),(\d{3})', '\\1\\2', string) string = re.sub(r'(\d),(\d{3})', '\\1\\2', string)
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string) string = re.sub(r'(\d+)', lambda x: '%010d' % int(x.group(0)), string)
return unicodedata.normalize('NFKD', string) return unicodedata.normalize('NFKD', string)
def sorted_strings(strings, key=None): def sorted_strings(strings, key=None):