import/lists/autocompleteFolder

This commit is contained in:
j 2014-05-19 01:24:04 +02:00
commit d6f350e5a1
42 changed files with 955 additions and 436 deletions

View file

@ -2,8 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import logging
logger = logging.getLogger('meta')
import stdnum.isbn
import abebooks
import loc
@ -13,6 +12,10 @@ import worldcat
import google
import duckduckgo
import logging
logger = logging.getLogger('meta')
providers = [
('openlibrary', 'olid'),
('loc', 'lccn'),
@ -32,6 +35,8 @@ def find(title, author=None, publisher=None, date=None):
return results
def lookup(key, value):
if not isvalid_id(key, value):
return {}
data = {key: value}
ids = [(key, value)]
provider_data = {}
@ -59,4 +64,13 @@ def lookup(key, value):
data[k_] = v_
return data
def isvalid_id(key, value):
if key in ('isbn10', 'isbn13'):
if 'isbn%d'%len(value) != key or not stdnum.isbn.is_valid(value):
return False
if key == 'asin' and len(value) != 10:
return False
if key == 'olid' and not (value.startswith('OL') and value.endswith('M')):
return False
return True

View file

@ -9,10 +9,11 @@ import lxml.html
import logging
logger = logging.getLogger('meta.abebooks')
base = 'http://www.abebooks.com'
def get_ids(key, value):
ids = []
if key in ('isbn10', 'isbn13'):
base = 'http://www.abebooks.com'
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
data = read_url(url)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
@ -24,21 +25,20 @@ def get_ids(key, value):
def lookup(id):
logger.debug('lookup %s', id)
return {}
def get_data(id):
info = {}
base = 'http://www.abebooks.com'
data = {}
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
data = read_url(url)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
html = read_url(url)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
keys = {
'pubdate': 'date'
}
if urls:
details = '%s%s' % (base, urls[0])
data = read_url(details)
doc = lxml.html.document_fromstring(data)
html = read_url(details)
doc = lxml.html.document_fromstring(html)
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
key = e.attrib['id'].replace('biblio-', '')
value = e.text_content()
if value and key not in ('bookcondition', 'binding'):
info[key] = value
return info
if value and key not in ('bookcondition', 'binding', 'edition-amz'):
data[keys.get(key, key)] = value
return data

View file

@ -37,6 +37,6 @@ def find(title, author=None, publisher=None, date=None):
done.add(isbn)
if len(isbn) == 10:
done.add(stdnum.isbn.to_isbn13(isbn))
if len(isbn) == 13:
if len(isbn) == 13 and isbn.startswith('978'):
done.add(stdnum.isbn.to_isbn10(isbn))
return results

View file

@ -45,9 +45,9 @@ def lookup(id):
}
for key in keys:
r[key] = find_re(data, '<span class="title">%s:</span>(.*?)</li>'% re.escape(keys[key]))
if r[key] == '--':
r[key] = ''
if key == 'pages' and r[key]:
if r[key] == '--' or not r[key]:
del r[key]
if key == 'pages' and key in r:
r[key] = int(r[key])
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')