openmedialibrary/oml/meta/google.py

44 lines
1.1 KiB
Python
Raw Normal View History

2014-05-16 08:06:11 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import ox.web.google
import stdnum.isbn
from .utils import find_isbns
2014-05-17 14:26:59 +00:00
import logging
logger = logging.getLogger('meta.google')
2014-05-16 08:06:11 +00:00
def find(title, author=None, publisher=None, date=None):
2014-05-17 14:26:59 +00:00
logger.debug('find %s %s %s %s', title, author, publisher, date)
2014-05-16 08:06:11 +00:00
query = title
if author:
if isinstance(author, list):
author = ' '.join(author)
query += ' ' + author
query += ' isbn'
isbns = []
for r in ox.web.google.find(query):
isbns += find_isbns(' '.join(r))
2014-05-17 14:26:59 +00:00
logger.debug('isbns', isbns)
2014-05-16 08:06:11 +00:00
results = []
done = set()
for isbn in isbns:
if isbn not in done:
key = 'isbn%d'%len(isbn)
#r = lookup(key, isbn)
#r['mainid'] = key
r = {
key: isbn,
'mainid': key
}
results.append(r)
done.add(isbn)
if len(isbn) == 10:
done.add(stdnum.isbn.to_isbn13(isbn))
2014-05-17 09:19:32 +00:00
if len(isbn) == 13:
done.add(stdnum.isbn.to_isbn10(isbn))
2014-05-16 08:06:11 +00:00
return results