openmedialibrary/oml/meta/utils.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4


import re
import stdnum.isbn

import ox
import ox.iso

def to_isbn13(isbn):
    try:
        isbn = stdnum.isbn.validate(isbn, True)
        if isbn[:2] != '97':
            isbn = None
    except:
        isbn = None
    return isbn

def normalize_isbn(value):
    return ''.join([s for s in value if s.isdigit() or s == 'X'])

def find_isbns(text):
    if isinstance(text, bytes):
        text = text.decode()
    matches = re.compile('\d[\d\-X\u2013\ ]+').findall(text)
    matches = [normalize_isbn(value) for value in matches]
    matches = [to_isbn13(value) for value in matches]
    matches = list(set([value for value in matches if value]))
    return matches

def get_language(lang):
    return ox.iso.codeToLang(lang.split('-')[0]) or lang

def decode_html_data(data):
    if isinstance(data, dict):
        for key in data:
            data[key] = decode_html_data(data[key])
    elif isinstance(data, list):
        data = [decode_html_data(v) for v in data]
    elif isinstance(data, str):
        data = ox.decode_html(data)
    return data
cleanup imports 2014-08-12 08:16:57 +00:00			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
port to python3 2014-09-02 22:32:44 +00:00
cleanup imports 2014-08-12 08:16:57 +00:00
find 2014-05-16 08:06:11 +00:00			`import re`
			`import stdnum.isbn`
meta 2014-05-14 09:57:11 +00:00
implement quit api. indicate if backend is offline 2016-01-08 04:32:24 +00:00			`import ox`
store metadata per user. remove primaryid. only store isbn13 2016-01-11 13:43:54 +00:00			`import ox.iso`

			`def to_isbn13(isbn):`
			`try:`
			`isbn = stdnum.isbn.validate(isbn, True)`
			`if isbn[:2] != '97':`
			`isbn = None`
			`except:`
			`isbn = None`
			`return isbn`
cleanup imports 2014-08-12 08:16:57 +00:00
meta 2014-05-14 09:57:11 +00:00			`def normalize_isbn(value):`
			`return ''.join([s for s in value if s.isdigit() or s == 'X'])`

find 2014-05-16 08:06:11 +00:00			`def find_isbns(text):`
port to python3 2014-09-02 22:32:44 +00:00			`if isinstance(text, bytes):`
			`text = text.decode()`
store metadata per user. remove primaryid. only store isbn13 2016-01-11 13:43:54 +00:00			`matches = re.compile('\d[\d\-X\u2013\ ]+').findall(text)`
find 2014-05-16 08:06:11 +00:00			`matches = [normalize_isbn(value) for value in matches]`
store metadata per user. remove primaryid. only store isbn13 2016-01-11 13:43:54 +00:00			`matches = [to_isbn13(value) for value in matches]`
			`matches = list(set([value for value in matches if value]))`
			`return matches`
find 2014-05-16 08:06:11 +00:00
implement quit api. indicate if backend is offline 2016-01-08 04:32:24 +00:00			`def get_language(lang):`
			`return ox.iso.codeToLang(lang.split('-')[0]) or lang`
import text not html 2016-01-08 10:22:07 +00:00
			`def decode_html_data(data):`
			`if isinstance(data, dict):`
			`for key in data:`
			`data[key] = decode_html_data(data[key])`
			`elif isinstance(data, list):`
			`data = [decode_html_data(v) for v in data]`
			`elif isinstance(data, str):`
			`data = ox.decode_html(data)`
			`return data`