diff --git a/ox/text.py b/ox/text.py index cf4efc3..7a4884b 100644 --- a/ox/text.py +++ b/ox/text.py @@ -375,3 +375,9 @@ def smartSplit(text): else: yield bit +def words(text): + """ + returns words in text, removing punctuation + """ + text = text.split() + return map(lambda x: re.sub("(([.!?:-_]|'s)$)", '', x), text)