openmedialibrary/oml/meta/utils.py

27 lines
637 B
Python
Raw Normal View History

2014-08-12 08:16:57 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-09-02 22:32:44 +00:00
2014-08-12 08:16:57 +00:00
2014-05-16 08:06:11 +00:00
import re
import stdnum.isbn
2014-05-14 09:57:11 +00:00
import ox
2014-08-12 08:16:57 +00:00
2014-05-14 09:57:11 +00:00
def normalize_isbn(value):
return ''.join([s for s in value if s.isdigit() or s == 'X'])
2014-05-16 08:06:11 +00:00
def find_isbns(text):
2014-09-02 22:32:44 +00:00
if isinstance(text, bytes):
text = text.decode()
2014-05-16 08:06:11 +00:00
matches = re.compile('\d[\d\-X\ ]+').findall(text)
matches = [normalize_isbn(value) for value in matches]
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
and len(isbn) in (10, 13)
and isbn not in (
'0' * 10,
'0' * 13,
)]
def get_language(lang):
return ox.iso.codeToLang(lang.split('-')[0]) or lang