From 82b7ff413c0a34191aa941ac87853827db3ae389 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 1 Sep 2010 14:55:52 +0200 Subject: [PATCH] stripAccents --- ox/normalize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ox/normalize.py b/ox/normalize.py index e571be2..076c411 100644 --- a/ox/normalize.py +++ b/ox/normalize.py @@ -2,6 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 # GPL 2008 import re +import unicodedata _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el', "l'", 'il', 'das', 'les', 'o', 'ein', 'i', 'un', 'los', 'de', @@ -191,3 +192,8 @@ def normalizePath(path): if path.endswith('.'): path = path[:-1] + '_' return path +def stripAccents(s): + if isinstance(s, str): + s = unicode(s) + return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) +