add sort_string, sorted_strings

This commit is contained in:
j 2012-05-16 12:29:52 +02:00
parent 45488de06f
commit 7af3c5ffcb

View file

@ -3,6 +3,7 @@
# GPL 2008
import math
import re
import unicodedata
ARTICLES = list(set([
# def sg, def pl, indef sg, indef pl (each m/f/n)
@ -520,3 +521,13 @@ def words(text):
"""
text = text.split()
return map(lambda x: re.sub("(([.!?:-_]|'s)$)", '', x), text)
def sort_string(string):
string = string.replace(u'Æ', 'AE').replace(u'Ø', 'O').replace(u'Þ', 'Th')
#pad numbered titles
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
return unicodedata.normalize('NFKD', string)
def sorted_strings(strings):
return sorted(strings, cmp=lambda a, b: cmp(sort_string(a), sort_string(b)))