store metadata per user. remove primaryid. only store isbn13

This commit is contained in:
j 2016-01-11 19:13:54 +05:30
commit 02e040d9f5
16 changed files with 245 additions and 192 deletions

View file

@ -46,11 +46,10 @@ def info(key, value):
info['publisher'], info['edition'] = info['publisher'].split('; ', 1)
if 'ISBN-13' in content_info:
if not 'isbn' in info: info['isbn'] = []
info['isbn'] = content_info['ISBN-13'].replace('-', '')
info['isbn'].append(content_info['ISBN-13'].replace('-', ''))
if 'ISBN-10' in content_info:
if not 'isbn' in info: info['isbn'] = []
info['isbn'].append(content_info['ISBN-10'])
elif 'ISBN-10' in content_info:
info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10'])
a = doc.xpath('//span[@class="a-size-medium"]')
if a:

View file

@ -21,14 +21,13 @@ def find(query):
done = set()
for isbn in isbns:
if isbn not in done:
isbn = stdnum.isbn.to_isbn13(isbn)
r = {
'isbn': [isbn],
'primaryid': ['isbn', isbn]
}
results.append(r)
done.add(isbn)
if len(isbn) == 10:
done.add(stdnum.isbn.to_isbn13(isbn))
if len(isbn) == 13 and isbn.startswith('978'):
done.add(stdnum.isbn.to_isbn10(isbn))
return results

View file

@ -6,7 +6,7 @@ from ox.cache import get_json, store
import ox.web.google
import stdnum.isbn
from .utils import find_isbns, get_language, decode_html_data
from .utils import find_isbns, get_language, decode_html_data, to_isbn13
import logging
logger = logging.getLogger(__name__)
@ -51,6 +51,7 @@ def info(key, value):
data = {}
for key in [
'authors',
'categories',
'description',
'pageCount',
'publishedDate',
@ -83,6 +84,9 @@ def info(key, value):
data['isbn'].append(k['identifier'])
else:
print('unknown identifier', k)
if 'isbn' in data:
data['isbn'] = [to_isbn13(i) for i in data['isbn']][0]
if 'publisher' in data and isinstance(data['publisher'], str):
data['publisher'] = [data['publisher']]
if 'language' in _data:

View file

@ -6,6 +6,16 @@ import re
import stdnum.isbn
import ox
import ox.iso
def to_isbn13(isbn):
try:
isbn = stdnum.isbn.validate(isbn, True)
if isbn[:2] != '97':
isbn = None
except:
isbn = None
return isbn
def normalize_isbn(value):
return ''.join([s for s in value if s.isdigit() or s == 'X'])
@ -13,14 +23,11 @@ def normalize_isbn(value):
def find_isbns(text):
if isinstance(text, bytes):
text = text.decode()
matches = re.compile('\d[\d\-X\ ]+').findall(text)
matches = re.compile('\d[\d\-X\u2013\ ]+').findall(text)
matches = [normalize_isbn(value) for value in matches]
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
and len(isbn) in (10, 13)
and isbn not in (
'0' * 10,
'0' * 13,
)]
matches = [to_isbn13(value) for value in matches]
matches = list(set([value for value in matches if value]))
return matches
def get_language(lang):
return ox.iso.codeToLang(lang.split('-')[0]) or lang