From 4cfd74b4b989e823cb71b34cfd96e42466d82eef Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 30 Oct 2011 12:54:59 +0100 Subject: [PATCH] words --- ox/text.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ox/text.py b/ox/text.py index cf4efc3..7a4884b 100644 --- a/ox/text.py +++ b/ox/text.py @@ -375,3 +375,9 @@ def smartSplit(text): else: yield bit +def words(text): + """ + returns words in text, removing punctuation + """ + text = text.split() + return map(lambda x: re.sub("(([.!?:-_]|'s)$)", '', x), text)