openmedialibrary/oml/meta/duckduckgo.py

30 lines
681 B
Python
Raw Normal View History

2014-05-17 11:19:32 +02:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-09-03 00:32:44 +02:00
2014-05-17 11:19:32 +02:00
import ox.web.duckduckgo
import stdnum.isbn
from .utils import find_isbns
2014-05-17 16:26:59 +02:00
import logging
2015-11-29 15:56:38 +01:00
logger = logging.getLogger(__name__)
2014-05-17 16:26:59 +02:00
2014-05-17 11:19:32 +02:00
2014-05-21 02:02:21 +02:00
def find(query):
logger.debug('find %s', query)
2014-05-17 11:19:32 +02:00
query += ' isbn'
isbns = []
for r in ox.web.duckduckgo.find(query):
isbns += find_isbns(' '.join(r))
results = []
done = set()
for isbn in isbns:
if isbn not in done:
isbn = stdnum.isbn.to_isbn13(isbn)
2016-01-11 20:26:11 +05:30
results.append(isbn)
2014-05-17 11:19:32 +02:00
done.add(isbn)
2014-05-19 01:24:04 +02:00
if len(isbn) == 13 and isbn.startswith('978'):
2014-05-17 11:19:32 +02:00
done.add(stdnum.isbn.to_isbn10(isbn))
return results