store metadata per user. remove primaryid. only store isbn13
This commit is contained in:
parent
90648f9e65
commit
02e040d9f5
16 changed files with 245 additions and 192 deletions
|
|
@ -46,11 +46,10 @@ def info(key, value):
|
|||
info['publisher'], info['edition'] = info['publisher'].split('; ', 1)
|
||||
|
||||
if 'ISBN-13' in content_info:
|
||||
if not 'isbn' in info: info['isbn'] = []
|
||||
info['isbn'] = content_info['ISBN-13'].replace('-', '')
|
||||
info['isbn'].append(content_info['ISBN-13'].replace('-', ''))
|
||||
if 'ISBN-10' in content_info:
|
||||
if not 'isbn' in info: info['isbn'] = []
|
||||
info['isbn'].append(content_info['ISBN-10'])
|
||||
elif 'ISBN-10' in content_info:
|
||||
info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10'])
|
||||
|
||||
a = doc.xpath('//span[@class="a-size-medium"]')
|
||||
if a:
|
||||
|
|
|
|||
|
|
@ -21,14 +21,13 @@ def find(query):
|
|||
done = set()
|
||||
for isbn in isbns:
|
||||
if isbn not in done:
|
||||
isbn = stdnum.isbn.to_isbn13(isbn)
|
||||
r = {
|
||||
'isbn': [isbn],
|
||||
'primaryid': ['isbn', isbn]
|
||||
}
|
||||
results.append(r)
|
||||
done.add(isbn)
|
||||
if len(isbn) == 10:
|
||||
done.add(stdnum.isbn.to_isbn13(isbn))
|
||||
if len(isbn) == 13 and isbn.startswith('978'):
|
||||
done.add(stdnum.isbn.to_isbn10(isbn))
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from ox.cache import get_json, store
|
|||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import find_isbns, get_language, decode_html_data
|
||||
from .utils import find_isbns, get_language, decode_html_data, to_isbn13
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -51,6 +51,7 @@ def info(key, value):
|
|||
data = {}
|
||||
for key in [
|
||||
'authors',
|
||||
'categories',
|
||||
'description',
|
||||
'pageCount',
|
||||
'publishedDate',
|
||||
|
|
@ -83,6 +84,9 @@ def info(key, value):
|
|||
data['isbn'].append(k['identifier'])
|
||||
else:
|
||||
print('unknown identifier', k)
|
||||
if 'isbn' in data:
|
||||
data['isbn'] = [to_isbn13(i) for i in data['isbn']][0]
|
||||
|
||||
if 'publisher' in data and isinstance(data['publisher'], str):
|
||||
data['publisher'] = [data['publisher']]
|
||||
if 'language' in _data:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,16 @@ import re
|
|||
import stdnum.isbn
|
||||
|
||||
import ox
|
||||
import ox.iso
|
||||
|
||||
def to_isbn13(isbn):
|
||||
try:
|
||||
isbn = stdnum.isbn.validate(isbn, True)
|
||||
if isbn[:2] != '97':
|
||||
isbn = None
|
||||
except:
|
||||
isbn = None
|
||||
return isbn
|
||||
|
||||
def normalize_isbn(value):
|
||||
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
||||
|
|
@ -13,14 +23,11 @@ def normalize_isbn(value):
|
|||
def find_isbns(text):
|
||||
if isinstance(text, bytes):
|
||||
text = text.decode()
|
||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
||||
matches = re.compile('\d[\d\-X\u2013\ ]+').findall(text)
|
||||
matches = [normalize_isbn(value) for value in matches]
|
||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
||||
and len(isbn) in (10, 13)
|
||||
and isbn not in (
|
||||
'0' * 10,
|
||||
'0' * 13,
|
||||
)]
|
||||
matches = [to_isbn13(value) for value in matches]
|
||||
matches = list(set([value for value in matches if value]))
|
||||
return matches
|
||||
|
||||
def get_language(lang):
|
||||
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue