use r'' for regex strings

This commit is contained in:
j 2024-06-30 09:52:59 +01:00
parent 414cb00115
commit bb13747023

View file

@ -133,86 +133,86 @@ UA_NAMES = {
}
UA_REGEXPS = {
'browser': [
'(Camino)\/(\d+)',
'(Chimera)\/(\d+)',
'(chromeframe)\/(\d+)',
'(Edge)\/(\d+)',
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
'(Chromium)\/(\d+)', # before Chrome
'(Chrome)\/(\d+)',
'(FBForIPhone)',
'(Firefox)\/(\d+)',
'(Galeon)\/(\d+)',
'(IEMobile)\/(\d+)',
'(iCab) (\d+)',
'(iCab)\/(\d+)',
'(konqueror)\/(\d+)',
'(Konqueror)\/(\d+)',
'(Lynx)\/(\d+)',
'(Netscape)\d?\/(\d+)',
'(NokiaBrowser)\/(\d+)',
'(OmniWeb)\/(\d+)',
'(Opera)\/.+Version\/(\d+)',
'(OviBrowser)\/(\d+)',
'Version\/(\d+).+(Safari)',
'(WebKit)\/(\d+)',
'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
'(Trident)\/.*?rv:(\d+)',
'(Gecko)',
'(Mozilla)\/(3|4)'
r'(Camino)\/(\d+)',
r'(Chimera)\/(\d+)',
r'(chromeframe)\/(\d+)',
r'(Edge)\/(\d+)',
r'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
r'(Chromium)\/(\d+)', # before Chrome
r'(Chrome)\/(\d+)',
r'(FBForIPhone)',
r'(Firefox)\/(\d+)',
r'(Galeon)\/(\d+)',
r'(IEMobile)\/(\d+)',
r'(iCab) (\d+)',
r'(iCab)\/(\d+)',
r'(konqueror)\/(\d+)',
r'(Konqueror)\/(\d+)',
r'(Lynx)\/(\d+)',
r'(Netscape)\d?\/(\d+)',
r'(NokiaBrowser)\/(\d+)',
r'(OmniWeb)\/(\d+)',
r'(Opera)\/.+Version\/(\d+)',
r'(OviBrowser)\/(\d+)',
r'Version\/(\d+).+(Safari)',
r'(WebKit)\/(\d+)',
r'(MSIE) (\d\d?(?!\d))', # last, since Opera used to mask as MSIE
r'(Trident)\/.*?rv:(\d+)',
r'(Gecko)',
r'(Mozilla)\/(3|4)'
],
'robot': [
'(BingPreview)\/(\d+)',
'(Google Web Preview).+Chrome\/(\d+)',
'(Googlebot)\/(\d+)',
'(WebCrawler)\/(\d+)',
'(Yahoo! Slurp)\/(\d+)',
'(YandexBot)\/([\d\.]+)',
'(YandexMobileBot)\/([\d\.]+)',
r'(BingPreview)\/(\d+)',
r'(Google Web Preview).+Chrome\/(\d+)',
r'(Googlebot)\/(\d+)',
r'(WebCrawler)\/(\d+)',
r'(Yahoo! Slurp)\/(\d+)',
r'(YandexBot)\/([\d\.]+)',
r'(YandexMobileBot)\/([\d\.]+)',
],
'system': [
'(Android) (\d+)',
'(Android)',
'(BB)(\d+)',
'(BeOS)',
'(BlackBerry) (\d+)',
'(BlackBerry)',
'(Darwin)',
'(BSD) (FreeBSD|NetBSD|OpenBSD)',
'(CPU OS) (\d+)',
'(iPhone OS) (\d+)',
'(iPhone)', # Opera
'(J2ME\/MIDP)',
'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
'(Linux)',
'(Mac OS X) (10.\d+)',
'(Mac OS X)',
'(Mac_PowerPC)',
'(Mac_PPC)',
'(Macintosh)',
'Nintendo (Wii).+NX\/(\d+)',
'(PLAYSTATION) (\d+)',
'(PlayStation) Vita (\d+)',
'(RIM Tablet OS) (\d+)',
'(S)(60);',
'(Series) ?(40|60)',
'(Symbian OS)',
'(SymbianOS)\/(\d+)',
'(SymbOS)',
'(OS\/2)',
'(Unix) (AIX|HP-UX|IRIX|SunOS)',
'(Unix)',
'(Windows) (NT \d\.\d)',
'(Windows Phone) (\d+)',
'(Windows Phone OS) (\d+)',
'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
'(Win) (9x 4\.90)', # Firefox
'(Win)(16)', # Firefox
'(Win)(9\d)', # Firefox
'(Win)(NT)', # Firefox
'(Win)(NT4\.0)', # Firefox
'(X11)'
r'(Android) (\d+)',
r'(Android)',
r'(BB)(\d+)',
r'(BeOS)',
r'(BlackBerry) (\d+)',
r'(BlackBerry)',
r'(Darwin)',
r'(BSD) (FreeBSD|NetBSD|OpenBSD)',
r'(CPU OS) (\d+)',
r'(iPhone OS) (\d+)',
r'(iPhone)', # Opera
r'(J2ME\/MIDP)',
r'(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)',
r'(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)',
r'(Linux)',
r'(Mac OS X) (10.\d+)',
r'(Mac OS X)',
r'(Mac_PowerPC)',
r'(Mac_PPC)',
r'(Macintosh)',
r'Nintendo (Wii).+NX\/(\d+)',
r'(PLAYSTATION) (\d+)',
r'(PlayStation) Vita (\d+)',
r'(RIM Tablet OS) (\d+)',
r'(S)(60);',
r'(Series) ?(40|60)',
r'(Symbian OS)',
r'(SymbianOS)\/(\d+)',
r'(SymbOS)',
r'(OS\/2)',
r'(Unix) (AIX|HP-UX|IRIX|SunOS)',
r'(Unix)',
r'(Windows) (NT \d\.\d)',
r'(Windows Phone) (\d+)',
r'(Windows Phone OS) (\d+)',
r'(Windows) (3\.1|95|98|2000|2003|CE|ME|Mobile|NT|XP)', # Opera
r'(Win) (9x 4\.90)', # Firefox
r'(Win)(16)', # Firefox
r'(Win)(9\d)', # Firefox
r'(Win)(NT)', # Firefox
r'(Win)(NT4\.0)', # Firefox
r'(X11)'
]
}
UA_VERSIONS = {
@ -332,9 +332,9 @@ def get_sort_name(name):
first_names = name.split(' ')
last_names = []
if re.search('^[0-9]+$', first_names[-1]):
if re.search(r'^[0-9]+$', first_names[-1]):
add_name()
if re.search('[(\[].+?[)\]]$', first_names[-1]):
if re.search(r'[(\[].+?[)\]]$', first_names[-1]):
add_name()
if find_name(SUFFIXES):
add_name()
@ -425,7 +425,7 @@ def parse_useragent(useragent):
matches = list(match.groups())
if len(matches) == 1:
matches.append('')
swap = re.match('^\d', matches[0]) or matches[1] == 'Linux'
swap = re.match(r'^\d', matches[0]) or matches[1] == 'Linux'
name = matches[1 if swap else 0]
version = matches[0 if swap else 1].replace('_', '.')
name = UA_NAMES[key][name] if name in UA_NAMES[key] else name
@ -685,8 +685,8 @@ def sort_string(string):
string = string.replace('Æ', 'AE').replace('Ø', 'O').replace('Þ', 'Th')
# pad numbered titles
string = re.sub('(\d),(\d{3})', '\\1\\2', string)
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
string = re.sub(r'(\d),(\d{3})', '\\1\\2', string)
string = re.sub(r'(\d+)', lambda x: '%010d' % int(x.group(0)), string)
return unicodedata.normalize('NFKD', string)
def sorted_strings(strings, key=None):