52 lines
2 KiB
Python
52 lines
2 KiB
Python
# -*- coding: utf-8 -*-
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
from __future__ import division
|
|
|
|
import ox
|
|
from ox.cache import read_url
|
|
import xml.etree.ElementTree as ET
|
|
|
|
from utils import normalize_isbn
|
|
from marc_countries import COUNTRIES
|
|
|
|
def info(id):
|
|
ns = '{http://www.loc.gov/mods/v3}'
|
|
url = 'http://lccn.loc.gov/%s/mods' % id
|
|
data = read_url(url)
|
|
mods = ET.fromstring(data)
|
|
|
|
info = {}
|
|
info['title'] = ''.join([e.text for e in mods.findall(ns + 'titleInfo')[0]])
|
|
origin = mods.findall(ns + 'originInfo')
|
|
if origin:
|
|
info['place'] = []
|
|
for place in origin[0].findall(ns + 'place'):
|
|
terms = place.findall(ns + 'placeTerm')
|
|
if terms and terms[0].attrib['type'] == 'text':
|
|
e = terms[0]
|
|
info['place'].append(e.text)
|
|
elif terms and terms[0].attrib['type'] == 'code':
|
|
e = terms[0]
|
|
info['country'] = COUNTRIES.get(e.text, e.text)
|
|
info['publisher'] = ''.join([e.text for e in origin[0].findall(ns + 'publisher')])
|
|
info['date'] = ''.join([e.text for e in origin[0].findall(ns + 'dateIssued')])
|
|
for i in mods.findall(ns + 'identifier'):
|
|
if i.attrib['type'] == 'oclc':
|
|
info['oclc'] = i.text.replace('ocn', '')
|
|
if i.attrib['type'] == 'lccn':
|
|
info['lccn'] = i.text
|
|
if i.attrib['type'] == 'isbn':
|
|
isbn = normalize_isbn(i.text)
|
|
info['isbn%s'%len(isbn)] = isbn
|
|
for i in mods.findall(ns + 'classification'):
|
|
if i.attrib['authority'] == 'ddc':
|
|
info['classification'] = i.text
|
|
info['author'] = []
|
|
for a in mods.findall(ns + 'name'):
|
|
if a.attrib['usage'] == 'primary':
|
|
info['author'].append(''.join([e.text for e in a.findall(ns + 'namePart')]))
|
|
info['author'] = [ox.normalize_name(a[:-1]) for a in info['author']]
|
|
for key in info.keys():
|
|
if not info[key]:
|
|
del info[key]
|
|
return info
|