meta
This commit is contained in:
parent
edd42dfd76
commit
d385853186
48 changed files with 1344 additions and 488 deletions
69
oml/meta/worldcat.py
Normal file
69
oml/meta/worldcat.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from ox.cache import read_url
|
||||
import lxml.html
|
||||
import re
|
||||
from utils import normalize_isbn
|
||||
import stdnum.isbn
|
||||
|
||||
base_url = 'http://www.worldcat.org'
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key in ['isbn10', 'isbn13']:
|
||||
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
||||
html = read_url(url)
|
||||
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
||||
if matches:
|
||||
info = lookup(matches[0])
|
||||
ids.append(('oclc', matches[0]))
|
||||
for k in ['isbn10', 'isbn13']:
|
||||
if k in info and k != key:
|
||||
ids.append((k, info[k]))
|
||||
elif key == 'oclc':
|
||||
info = lookup(value)
|
||||
for k in ['isbn10', 'isbn13']:
|
||||
if k in info:
|
||||
ids.append((k, info[k]))
|
||||
if ids:
|
||||
print 'worldcat.get_ids', key, value
|
||||
print ids
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
data = {
|
||||
'oclc': id
|
||||
}
|
||||
url = '%s/oclc/%s' % (base_url, id)
|
||||
html = read_url(url).decode('utf-8')
|
||||
doc = lxml.html.document_fromstring(html)
|
||||
for e in doc.xpath("//*[contains(@id, 'bibtip')]"):
|
||||
key = e.attrib['id'].replace('bibtip_', '')
|
||||
value = e.text_content()
|
||||
data[key] = value
|
||||
info = doc.xpath('//textarea[@id="util-em-note"]')[0].text
|
||||
info = dict([i.split(':', 1) for i in info.split('\n\n')[1].split('\n')])
|
||||
for key in info:
|
||||
k = key.lower()
|
||||
data[k] = info[key].strip()
|
||||
for key in ('id', 'instance', 'mediatype', 'reclist', 'shorttitle'):
|
||||
if key in data:
|
||||
del data[key]
|
||||
if 'isxn' in data:
|
||||
for isbn in data.pop('isxn').split(' '):
|
||||
isbn = normalize_isbn(isbn)
|
||||
if stdnum.isbn.is_valid(isbn):
|
||||
data['isbn%d'%len(isbn)] = isbn
|
||||
if 'author' in data:
|
||||
data['author'] = [data['author']]
|
||||
print 'worldcat.lookup', id
|
||||
print data.keys()
|
||||
return data
|
||||
|
||||
info = lookup
|
||||
|
||||
def find(title, author, year):
|
||||
return []
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue