remove unused scrapers
This commit is contained in:
parent
47647a7b86
commit
a172e7b4b7
8 changed files with 0 additions and 1951 deletions
|
@ -5,11 +5,6 @@
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
import ox
|
import ox
|
||||||
|
|
||||||
from . import abebooks
|
|
||||||
from . import loc
|
|
||||||
from . import lookupbyisbn
|
|
||||||
from . import openlibrary
|
|
||||||
from . import worldcat
|
|
||||||
from . import google
|
from . import google
|
||||||
from . import duckduckgo
|
from . import duckduckgo
|
||||||
|
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from ox.cache import read_url
|
|
||||||
import lxml.html
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
base = 'http://www.abebooks.com'
|
|
||||||
|
|
||||||
def get_ids(key, value):
|
|
||||||
ids = []
|
|
||||||
if key == 'isbn':
|
|
||||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, value)
|
|
||||||
data = read_url(url, unicode=True)
|
|
||||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
|
||||||
if urls:
|
|
||||||
ids.append((key, value))
|
|
||||||
if ids:
|
|
||||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def lookup(id):
|
|
||||||
logger.debug('lookup %s', id)
|
|
||||||
data = {}
|
|
||||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
|
||||||
html = read_url(url, unicode=True)
|
|
||||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
|
|
||||||
keys = {
|
|
||||||
'pubdate': 'date'
|
|
||||||
}
|
|
||||||
if urls:
|
|
||||||
details = '%s%s' % (base, urls[0])
|
|
||||||
html = read_url(details, unicode=True)
|
|
||||||
doc = lxml.html.document_fromstring(html)
|
|
||||||
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
|
||||||
key = e.attrib['id'].replace('biblio-', '')
|
|
||||||
value = e.text_content().strip()
|
|
||||||
k = keys.get(key, key)
|
|
||||||
if k == 'date' and value == 'Publication Date:':
|
|
||||||
value = ''
|
|
||||||
elif k == 'publisher' and value == 'Publisher:':
|
|
||||||
value = ''
|
|
||||||
if value and key not in ('bookcondition', 'binding', 'edition-amz'):
|
|
||||||
data[k] = value
|
|
||||||
return data
|
|
|
@ -1,962 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
def get_classification(id):
|
|
||||||
name = '%s' % id
|
|
||||||
base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()])
|
|
||||||
if base != '0':
|
|
||||||
base = base.lstrip('0')
|
|
||||||
if base in DEWEY:
|
|
||||||
name = '%s %s' % (name, DEWEY[base])
|
|
||||||
return name
|
|
||||||
|
|
||||||
DEWEY = {
|
|
||||||
"0": "Computer science, information & general works",
|
|
||||||
"1": "Philosophy & psychology",
|
|
||||||
"10": "Philosophy",
|
|
||||||
"100": "Philosophy, parapsychology and occultism, psychology",
|
|
||||||
"101": "Theory of philosophy",
|
|
||||||
"102": "Miscellany of philosophy",
|
|
||||||
"103": "Dictionaries, encyclopedias, concordances of philosophy",
|
|
||||||
"105": "Serial publications",
|
|
||||||
"106": "Organizations and management of philosophy",
|
|
||||||
"107": "Education, research, related topics of philosophy",
|
|
||||||
"108": "Groups of people",
|
|
||||||
"109": "Historical and collected persons treatment of philosophy",
|
|
||||||
"11": "Metaphysics",
|
|
||||||
"110": "Metaphysics",
|
|
||||||
"111": "Ontology",
|
|
||||||
"113": "Cosmology (Philosophy of nature)",
|
|
||||||
"114": "Space",
|
|
||||||
"115": "Time",
|
|
||||||
"116": "Change",
|
|
||||||
"117": "Structure",
|
|
||||||
"118": "Force and energy",
|
|
||||||
"119": "Number and quantity",
|
|
||||||
"12": "Epistemology",
|
|
||||||
"120": "Epistemology, causation & humankind",
|
|
||||||
"121": "Epistemology (Theory of knowledge)",
|
|
||||||
"122": "Causation",
|
|
||||||
"123": "Determinism and indeterminism",
|
|
||||||
"124": "Teleology",
|
|
||||||
"126": "The self",
|
|
||||||
"127": "The unconscious and the subconscious",
|
|
||||||
"128": "Humankind",
|
|
||||||
"129": "Origin and destiny of individual souls",
|
|
||||||
"13": "Parapsychology & occultism",
|
|
||||||
"130": "Parapsychology and occultism",
|
|
||||||
"131": "Parapsychological and occult techniques for achieving well-being, happiness, success",
|
|
||||||
"133": "Specific topics in parapsychology & occultism",
|
|
||||||
"135": "Dreams and mysteries",
|
|
||||||
"137": "Divinatory graphology",
|
|
||||||
"138": "Physiognomy",
|
|
||||||
"139": "Phrenology",
|
|
||||||
"14": "Philosophical schools of thought",
|
|
||||||
"140": "Specific philosophical schools",
|
|
||||||
"141": "Idealism & related systems",
|
|
||||||
"142": "Critical philosophy",
|
|
||||||
"143": "Bergsonism and intuitionism",
|
|
||||||
"144": "Humanism and related systems and doctrines",
|
|
||||||
"145": "Sensationalism",
|
|
||||||
"146": "Naturalism and related systems and doctrines",
|
|
||||||
"147": "Pantheism and related systems and doctrines",
|
|
||||||
"148": "Dogmatism, eclecticism, liberalism, syncretism, traditionalism",
|
|
||||||
"149": "Other philosophical systems",
|
|
||||||
"15": "Psychology",
|
|
||||||
"150": "Psychology",
|
|
||||||
"152": "Sensory perception, movement, emotions, physiological drives",
|
|
||||||
"153": "Conscious mental processes and intelligence",
|
|
||||||
"154": "Subconscious and altered states and processes",
|
|
||||||
"155": "Differential and developmental psychology",
|
|
||||||
"156": "Comparative psychology",
|
|
||||||
"158": "Applied psychology",
|
|
||||||
"16": "Philosophical logic",
|
|
||||||
"160": "Logic",
|
|
||||||
"161": "Induction",
|
|
||||||
"162": "Deduction",
|
|
||||||
"165": "Fallacies and sources of error",
|
|
||||||
"166": "Syllogisms",
|
|
||||||
"167": "Hypotheses",
|
|
||||||
"168": "Argument and persuasion",
|
|
||||||
"169": "Analogy",
|
|
||||||
"17": "Ethics",
|
|
||||||
"170": "Ethics",
|
|
||||||
"171": "Ethical systems",
|
|
||||||
"172": "Political ethics",
|
|
||||||
"173": "Ethics of family relationships",
|
|
||||||
"174": "Occupational ethics",
|
|
||||||
"175": "Ethics of recreation, leisure, public performances, communication",
|
|
||||||
"176": "Ethics of sex and reproduction",
|
|
||||||
"177": "Ethics of social relations",
|
|
||||||
"178": "Ethics of consumption",
|
|
||||||
"179": "Other ethical norms",
|
|
||||||
"18": "Ancient, medieval & eastern philosophy",
|
|
||||||
"180": "Ancient, medieval, eastern philosophy",
|
|
||||||
"181": "Eastern philosophy",
|
|
||||||
"182": "Pre-Socratic Greek philosophies",
|
|
||||||
"183": "Sophistic, Socratic, related Greek philosophies",
|
|
||||||
"184": "Platonic philosophy",
|
|
||||||
"185": "Aristotelian philosophy",
|
|
||||||
"186": "Skeptic and Neoplatonic philosophies",
|
|
||||||
"187": "Epicurean philosophy",
|
|
||||||
"188": "Stoic philosophy",
|
|
||||||
"189": "Medieval western philosophy",
|
|
||||||
"19": "Modern western philosophy",
|
|
||||||
"190": "Modern western and other noneastern philosophy",
|
|
||||||
"191": "United States and Canada",
|
|
||||||
"192": "Philosophy of British Isles",
|
|
||||||
"193": "Philosophy of Germany and Austria",
|
|
||||||
"194": "Philosophy of France",
|
|
||||||
"195": "Philosophy of Italy",
|
|
||||||
"196": "Philosophy of Spain and Portugal",
|
|
||||||
"197": "Philosophy of Russia",
|
|
||||||
"198": "Philosophy of Scandinavia and Finland",
|
|
||||||
"199": "Philosophy in other geographic areas",
|
|
||||||
"2": "Religion",
|
|
||||||
"20": "Religion",
|
|
||||||
"200": "Religion",
|
|
||||||
"201": "Religious mythology, general classes of religion, interreligious relations and attitudes, social theology",
|
|
||||||
"202": "Doctrines",
|
|
||||||
"203": "Public worship and other practices",
|
|
||||||
"204": "Religious experience, life, practice",
|
|
||||||
"205": "Religious ethics",
|
|
||||||
"206": "Leaders & organization",
|
|
||||||
"207": "Missions & religious education",
|
|
||||||
"208": "Sources",
|
|
||||||
"209": "Sects and reform movements",
|
|
||||||
"21": "Philosophy & theory of religion",
|
|
||||||
"210": "Philosophy & theory of religion",
|
|
||||||
"211": "Concepts of God",
|
|
||||||
"212": "Existence of God, ways of knowing God, attributes of God",
|
|
||||||
"213": "Creation",
|
|
||||||
"214": "Theodicy",
|
|
||||||
"215": "Science and religion",
|
|
||||||
"218": "Humankind",
|
|
||||||
"22": "The Bible",
|
|
||||||
"220": "Bible",
|
|
||||||
"221": "Old Testament (Tanakh)",
|
|
||||||
"222": "Historical books of Old Testament",
|
|
||||||
"223": "Poetic books of Old Testament",
|
|
||||||
"224": "Prophetic books of Old Testament",
|
|
||||||
"225": "New Testament",
|
|
||||||
"226": "Gospels and Acts",
|
|
||||||
"227": "Epistles",
|
|
||||||
"228": "Revelation (Apocalypse)",
|
|
||||||
"229": "Apocrypha & pseudepigrapha",
|
|
||||||
"23": "Christianity",
|
|
||||||
"230": "Christianity Christian theology",
|
|
||||||
"231": "God",
|
|
||||||
"232": "Jesus Christ and his family",
|
|
||||||
"233": "Humankind",
|
|
||||||
"234": "Salvation and grace",
|
|
||||||
"235": "Spiritual beings",
|
|
||||||
"236": "Eschatology",
|
|
||||||
"238": "Creeds, confessions of faith, covenants, catechisms",
|
|
||||||
"239": "Apologetics and polemics",
|
|
||||||
"24": "Christian practice & observance",
|
|
||||||
"240": "Christian moral & devotional theology",
|
|
||||||
"241": "Christian ethics",
|
|
||||||
"242": "Devotional literature",
|
|
||||||
"243": "Evangelistic writings for individuals and families",
|
|
||||||
"246": "Use of art in Christianity",
|
|
||||||
"247": "Church furnishings and related articles",
|
|
||||||
"248": "Christian experience, practice, life",
|
|
||||||
"249": "Christian observances in family life",
|
|
||||||
"25": "Christian pastoral practice & religious orders",
|
|
||||||
"250": "Local Christian church and Christian religious orders",
|
|
||||||
"251": "Preaching (Homiletics)",
|
|
||||||
"252": "Texts of sermons",
|
|
||||||
"253": "Pastoral office and work (Pastoral theology)",
|
|
||||||
"254": "Parish administration",
|
|
||||||
"255": "Religious congregations & orders",
|
|
||||||
"259": "Pastoral care of specific kinds of persons",
|
|
||||||
"26": "Christian organization, social work & worship",
|
|
||||||
"260": "Christian social and ecclesiastical theology",
|
|
||||||
"261": "Social theology and interreligious relations and attitudes",
|
|
||||||
"262": "Ecclesiology",
|
|
||||||
"263": "Days, times & places of observance",
|
|
||||||
"264": "Public worship",
|
|
||||||
"265": "Sacraments, other rites and acts",
|
|
||||||
"266": "Missions",
|
|
||||||
"267": "Associations for religious work",
|
|
||||||
"268": "Religious education",
|
|
||||||
"269": "Spiritual renewal",
|
|
||||||
"27": "History of Christianity",
|
|
||||||
"270": "History of Christianity & Christian church",
|
|
||||||
"271": "Religious congregations and orders in church history",
|
|
||||||
"272": "Persecutions in general church history",
|
|
||||||
"273": "Doctrinal controversies and heresies in general church history",
|
|
||||||
"274": "Christianity in Europe",
|
|
||||||
"275": "History of Christianity in Asia",
|
|
||||||
"276": "Christianity in Africa",
|
|
||||||
"277": "Christianity in North America",
|
|
||||||
"278": "Christianity in South America",
|
|
||||||
"279": "Christianity in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica",
|
|
||||||
"28": "Christian denominations",
|
|
||||||
"280": "Denominations and sects of Christian church",
|
|
||||||
"281": "Early church and Eastern churches",
|
|
||||||
"282": "Roman Catholic Church",
|
|
||||||
"283": "Anglican churches",
|
|
||||||
"284": "Protestant denominations of Continental origin and related bodies",
|
|
||||||
"285": "Presbyterian churches, Reformed churches centered in America, Congregational churches, Puritanism",
|
|
||||||
"286": "Baptist, Restoration movement, Adventist churches",
|
|
||||||
"287": "Methodist churches; churches related to Methodism",
|
|
||||||
"289": "Other denominations & sects",
|
|
||||||
"29": "Other religions",
|
|
||||||
"290": "Other religions",
|
|
||||||
"292": "Classical religion (Greek and Roman religion)",
|
|
||||||
"293": "Germanic religion",
|
|
||||||
"294": "Religions of Indic origin",
|
|
||||||
"295": "Zoroastrianism (Mazdaism, Parseeism)",
|
|
||||||
"296": "Judaism",
|
|
||||||
"297": "Islam, Babism, Bahai Faith",
|
|
||||||
"299": "Religions not provided for elsewhere",
|
|
||||||
"3": "Social sciences",
|
|
||||||
"30": "Social sciences, sociology & anthropology",
|
|
||||||
"300": "Social sciences",
|
|
||||||
"301": "Sociology and anthropology",
|
|
||||||
"302": "Social interaction",
|
|
||||||
"303": "Social processes",
|
|
||||||
"304": "Factors affecting social behavior",
|
|
||||||
"305": "Groups of people",
|
|
||||||
"306": "Culture and institutions",
|
|
||||||
"307": "Communities",
|
|
||||||
"31": "Statistics",
|
|
||||||
"310": "Collections of general statistics",
|
|
||||||
"314": "General statistics of Europe",
|
|
||||||
"315": "General statistics of Asia",
|
|
||||||
"316": "General statistics of Africa",
|
|
||||||
"317": "General statistics of North America",
|
|
||||||
"318": "General statistics of South America",
|
|
||||||
"319": "General statistics of other parts of the world Of Pacific Ocean islands",
|
|
||||||
"32": "Political science",
|
|
||||||
"320": "Political science (Politics and government)",
|
|
||||||
"321": "Systems of governments and states",
|
|
||||||
"322": "Relation of state to organized groups",
|
|
||||||
"323": "Civil and political rights",
|
|
||||||
"324": "The political process",
|
|
||||||
"325": "International migration and colonization",
|
|
||||||
"326": "Slavery and emancipation",
|
|
||||||
"327": "International relations",
|
|
||||||
"328": "The legislative process",
|
|
||||||
"33": "Economics",
|
|
||||||
"330": "Economics",
|
|
||||||
"331": "Labor economics",
|
|
||||||
"332": "Financial economics",
|
|
||||||
"333": "Economics of land and energy",
|
|
||||||
"334": "Cooperatives",
|
|
||||||
"335": "Socialism and related systems",
|
|
||||||
"336": "Public finance",
|
|
||||||
"337": "International economics",
|
|
||||||
"338": "Production",
|
|
||||||
"339": "Macroeconomics and related topics",
|
|
||||||
"34": "Law",
|
|
||||||
"340": "Law",
|
|
||||||
"341": "Law of nations",
|
|
||||||
"342": "Constitutional and administrative law",
|
|
||||||
"343": "Military, defense, public property, public finance, tax, commerce (trade), industrial law",
|
|
||||||
"344": "Labor, social, education & cultural law",
|
|
||||||
"345": "Criminal law",
|
|
||||||
"346": "Private law",
|
|
||||||
"347": "Procedure and courts",
|
|
||||||
"348": "Laws, regulations, cases",
|
|
||||||
"349": "Law of specific jurisdictions, areas, socioeconomic regions, regional intergovernmental organizations",
|
|
||||||
"35": "Public administration & military science",
|
|
||||||
"350": "Public administration and military science",
|
|
||||||
"351": "Public administration",
|
|
||||||
"352": "General considerations of public administration",
|
|
||||||
"353": "Specific fields of public administration",
|
|
||||||
"354": "Public administration of economy and environment",
|
|
||||||
"355": "Military science",
|
|
||||||
"356": "Foot forces and warfare",
|
|
||||||
"357": "Mounted forces & warfare",
|
|
||||||
"358": "Air and other specialized forces and warfare; engineering and related services",
|
|
||||||
"359": "Sea forces and warfare",
|
|
||||||
"36": "Social problems & social services",
|
|
||||||
"360": "Social problems & social services",
|
|
||||||
"361": "Social problems & social welfare in general",
|
|
||||||
"362": "Social welfare problems and services",
|
|
||||||
"363": "Other social problems and services",
|
|
||||||
"364": "Criminology",
|
|
||||||
"365": "Penal and related institutions",
|
|
||||||
"366": "Secret associations and societies",
|
|
||||||
"367": "General clubs",
|
|
||||||
"368": "Insurance",
|
|
||||||
"369": "Miscellaneous kinds of associations",
|
|
||||||
"37": "Education",
|
|
||||||
"370": "Education",
|
|
||||||
"371": "Schools and their activities; special education",
|
|
||||||
"372": "Primary education (Elementary education)",
|
|
||||||
"373": "Secondary education",
|
|
||||||
"374": "Adult education",
|
|
||||||
"375": "Curricula",
|
|
||||||
"378": "Higher education (Tertiary education)",
|
|
||||||
"379": "Public policy issues in education",
|
|
||||||
"38": "Commerce, communications & transportation",
|
|
||||||
"380": "Commerce, communications, transportation",
|
|
||||||
"381": "Commerce (Trade)",
|
|
||||||
"382": "International commerce (Foreign trade)",
|
|
||||||
"383": "Postal communication",
|
|
||||||
"384": "Communications",
|
|
||||||
"385": "Railroad transportation",
|
|
||||||
"386": "Inland waterway & ferry transportation",
|
|
||||||
"387": "Water, air & space transportation",
|
|
||||||
"388": "Transportation",
|
|
||||||
"389": "Metrology and standardization",
|
|
||||||
"39": "Customs, etiquette & folklore",
|
|
||||||
"390": "Customs, etiquette, folklore",
|
|
||||||
"391": "Costume and personal appearance",
|
|
||||||
"392": "Customs of life cycle and domestic life",
|
|
||||||
"393": "Death customs",
|
|
||||||
"394": "General customs",
|
|
||||||
"395": "Etiquette (Manners)",
|
|
||||||
"398": "Folklore",
|
|
||||||
"399": "Customs of war and diplomacy",
|
|
||||||
"4": "Language",
|
|
||||||
"40": "Language",
|
|
||||||
"400": "Language",
|
|
||||||
"401": "Philosophy and theory; international languages",
|
|
||||||
"402": "Miscellany",
|
|
||||||
"403": "Dictionaries, encyclopedias, concordances",
|
|
||||||
"404": "Special topics of language",
|
|
||||||
"405": "Serial publications",
|
|
||||||
"406": "Organizations and management",
|
|
||||||
"407": "Education, research & related topics",
|
|
||||||
"408": "Groups of people",
|
|
||||||
"409": "Geographic treatment and biography",
|
|
||||||
"41": "Linguistics",
|
|
||||||
"410": "Linguistics",
|
|
||||||
"411": "Writing systems",
|
|
||||||
"412": "Etymology of standard forms of languages",
|
|
||||||
"413": "Dictionaries of standard forms of languages",
|
|
||||||
"414": "Phonology & phonetics",
|
|
||||||
"415": "Grammar of standard forms of languages",
|
|
||||||
"417": "Dialectology and historical linguistics",
|
|
||||||
"418": "Standard usage (Prescriptive linguistics)",
|
|
||||||
"419": "Sign languages",
|
|
||||||
"42": "English & Old English languages",
|
|
||||||
"420": "English & Old English languages",
|
|
||||||
"421": "Writing system, phonology, phonetics of standard English",
|
|
||||||
"422": "Etymology of standard English",
|
|
||||||
"423": "Dictionaries of standard English",
|
|
||||||
"425": "Grammar of standard English",
|
|
||||||
"427": "Historical and geographic variations, modern nongeographic variations of English",
|
|
||||||
"428": "Standard English usage (Prescriptive linguistics)",
|
|
||||||
"429": "Old English (Anglo-Saxon)",
|
|
||||||
"43": "German & related languages",
|
|
||||||
"430": "German & related languages",
|
|
||||||
"431": "German writing systems & phonology",
|
|
||||||
"432": "Etymology of standard German",
|
|
||||||
"433": "Dictionaries of standard German",
|
|
||||||
"435": "Grammar of standard German",
|
|
||||||
"437": "Historical and geographic variations, modern nongeographic variations of German",
|
|
||||||
"438": "Standard German usage",
|
|
||||||
"439": "Other Germanic languages",
|
|
||||||
"44": "French & related languages",
|
|
||||||
"440": "Romance languages French",
|
|
||||||
"441": "Writing systems, phonology, phonetics of standard French",
|
|
||||||
"442": "Etymology of standard French",
|
|
||||||
"443": "Dictionaries of standard French",
|
|
||||||
"445": "Grammar of standard French",
|
|
||||||
"447": "Historical and geographic variations, modern nongeographic variations of French",
|
|
||||||
"448": "Standard French usage (Prescriptive linguistics)",
|
|
||||||
"449": "Occitan, Catalan, Franco-Provençal",
|
|
||||||
"45": "Italian, Romanian & related languages",
|
|
||||||
"450": "Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican",
|
|
||||||
"451": "Writing systems, phonology, phonetics of standard Italian",
|
|
||||||
"452": "Etymology of standard Italian",
|
|
||||||
"453": "Dictionaries of standard Italian",
|
|
||||||
"455": "Grammar of standard Italian",
|
|
||||||
"457": "Historical and geographic variations, modern nongeographic variations of Italian",
|
|
||||||
"458": "Standard Italian usage",
|
|
||||||
"459": "Sardinian",
|
|
||||||
"46": "Spanish, Portuguese, Galician",
|
|
||||||
"460": "Spanish, Portuguese, Galician",
|
|
||||||
"461": "Writing systems, phonology, phonetics of standard Spanish",
|
|
||||||
"462": "Etymology of standard Spanish",
|
|
||||||
"463": "Dictionaries of standard Spanish",
|
|
||||||
"465": "Grammar of standard Spanish",
|
|
||||||
"467": "Historical and geographic variations, modern nongeographic variations of Spanish",
|
|
||||||
"468": "Standard Spanish usage",
|
|
||||||
"469": "Portuguese",
|
|
||||||
"47": "Latin & Italic languages",
|
|
||||||
"470": "Italic languages Latin",
|
|
||||||
"471": "Writing systems, phonology, phonetics of classical Latin",
|
|
||||||
"472": "Classical Latin etymology",
|
|
||||||
"473": "Dictionaries of classical Latin",
|
|
||||||
"475": "Grammar of classical Latin",
|
|
||||||
"477": "Old, postclassical & Vulgar Latin",
|
|
||||||
"478": "Classical Latin usage (Prescriptive linguistics)",
|
|
||||||
"479": "Other Italic languages",
|
|
||||||
"48": "Classical & modern Greek languages",
|
|
||||||
"480": "Classical Greek and related Hellenic languages",
|
|
||||||
"481": "Writing systems, phonology, phonetics of classical Greek",
|
|
||||||
"482": "Etymology of classical Greek",
|
|
||||||
"483": "Dictionaries of classical Greek",
|
|
||||||
"485": "Grammar of classical Greek",
|
|
||||||
"487": "Preclassical and postclassical Greek",
|
|
||||||
"488": "Classical Greek usage (Prescriptive linguistics)",
|
|
||||||
"489": "Other Hellenic languages",
|
|
||||||
"49": "Other languages",
|
|
||||||
"490": "Other languages",
|
|
||||||
"491": "East Indo-European and Celtic languages",
|
|
||||||
"492": "Afro-Asiatic languages",
|
|
||||||
"493": "Non-Semitic Afro-Asiatic languages",
|
|
||||||
"494": "Altaic, Uralic, Hyperborean, Dravidian languages, miscellaneous languages of south Asia",
|
|
||||||
"495": "Languages of east and southeast Asia",
|
|
||||||
"496": "African languages",
|
|
||||||
"497": "North American native languages",
|
|
||||||
"498": "South American native languages",
|
|
||||||
"499": "Austronesian & other languages",
|
|
||||||
"5": "Science",
|
|
||||||
"50": "Science",
|
|
||||||
"500": "Science",
|
|
||||||
"501": "Philosophy & theory",
|
|
||||||
"502": "Miscellany",
|
|
||||||
"503": "Dictionaries, encyclopedias, concordances",
|
|
||||||
"505": "Serial publications",
|
|
||||||
"506": "Organizations and management",
|
|
||||||
"507": "Education, research, related topics",
|
|
||||||
"508": "Natural history",
|
|
||||||
"509": "Historical, geographic & persons treatment",
|
|
||||||
"51": "Mathematics",
|
|
||||||
"510": "Mathematics",
|
|
||||||
"511": "General principles of mathematics",
|
|
||||||
"512": "Algebra",
|
|
||||||
"513": "Arithmetic",
|
|
||||||
"514": "Topology",
|
|
||||||
"515": "Analysis",
|
|
||||||
"516": "Geometry",
|
|
||||||
"518": "Numerical analysis",
|
|
||||||
"519": "Probabilities and applied mathematics",
|
|
||||||
"52": "Astronomy",
|
|
||||||
"520": "Astronomy and allied sciences",
|
|
||||||
"521": "Celestial mechanics",
|
|
||||||
"522": "Techniques, procedures, apparatus, equipment, materials",
|
|
||||||
"523": "Specific celestial bodies and phenomena",
|
|
||||||
"525": "Earth (Astronomical geography)",
|
|
||||||
"526": "Mathematical geography",
|
|
||||||
"527": "Celestial navigation",
|
|
||||||
"528": "Ephemerides",
|
|
||||||
"529": "Chronology",
|
|
||||||
"53": "Physics",
|
|
||||||
"530": "Physics",
|
|
||||||
"531": "Classical mechanics",
|
|
||||||
"532": "Fluid mechanics; liquid mechanics",
|
|
||||||
"533": "Pneumatics (Gas mechanics)",
|
|
||||||
"534": "Sound and related vibrations",
|
|
||||||
"535": "Light and infrared and ultraviolet phenomena",
|
|
||||||
"536": "Heat",
|
|
||||||
"537": "Electricity & electronics",
|
|
||||||
"538": "Magnetism",
|
|
||||||
"539": "Modern physics",
|
|
||||||
"54": "Chemistry",
|
|
||||||
"540": "Chemistry and allied sciences",
|
|
||||||
"541": "Physical chemistry",
|
|
||||||
"542": "Techniques, equipment & materials",
|
|
||||||
"543": "Analytical chemistry",
|
|
||||||
"546": "Inorganic chemistry",
|
|
||||||
"547": "Organic chemistry",
|
|
||||||
"548": "Crystallography",
|
|
||||||
"549": "Mineralogy",
|
|
||||||
"55": "Earth sciences & geology",
|
|
||||||
"550": "Earth sciences",
|
|
||||||
"551": "Geology, hydrology, meteorology",
|
|
||||||
"552": "Petrology",
|
|
||||||
"553": "Economic geology",
|
|
||||||
"554": "Earth sciences of Europe",
|
|
||||||
"555": "Earth sciences of Asia",
|
|
||||||
"556": "Earth sciences of Africa",
|
|
||||||
"557": "Earth sciences of North America",
|
|
||||||
"558": "Earth sciences of South America",
|
|
||||||
"559": "Earth sciences of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
|
|
||||||
"56": "Fossils & prehistoric life",
|
|
||||||
"560": "Paleontology",
|
|
||||||
"561": "Paleobotany; fossil microorganisms",
|
|
||||||
"562": "Fossil invertebrates",
|
|
||||||
"563": "Miscellaneous fossil marine and seashore invertebrates",
|
|
||||||
"564": "Fossil Mollusca and Molluscoidea",
|
|
||||||
"565": "Fossil Arthropoda",
|
|
||||||
"566": "Fossil Chordata",
|
|
||||||
"567": "Fossil cold-blooded vertebrates",
|
|
||||||
"568": "Fossil birds",
|
|
||||||
"569": "Fossil mammals",
|
|
||||||
"57": "Biology",
|
|
||||||
"570": "Life sciences Biology",
|
|
||||||
"571": "Physiology and related subjects",
|
|
||||||
"572": "Biochemistry",
|
|
||||||
"573": "Specific physiological systems in animals, regional histology and physiology in animals",
|
|
||||||
"575": "Specific parts of and physiological systems in plants",
|
|
||||||
"576": "Genetics and evolution",
|
|
||||||
"577": "Ecology",
|
|
||||||
"578": "Natural history of organisms and related subjects",
|
|
||||||
"579": "Microorganisms, fungi, algae",
|
|
||||||
"58": "Plants (Botany)",
|
|
||||||
"580": "Plants",
|
|
||||||
"581": "Specific topics in natural history of plants",
|
|
||||||
"582": "Plants noted for specific vegetative characteristics and flowers",
|
|
||||||
"583": "Dicotyledons",
|
|
||||||
"584": "Monocotyledons",
|
|
||||||
"585": "Pinophyta (Gymnosperms)",
|
|
||||||
"586": "Seedless plants",
|
|
||||||
"587": "Vascular seedless plants",
|
|
||||||
"588": "Bryophyta",
|
|
||||||
"59": "Animals (Zoology)",
|
|
||||||
"590": "Animals",
|
|
||||||
"591": "Specific topics in natural history",
|
|
||||||
"592": "Invertebrates",
|
|
||||||
"593": "Miscellaneous marine and seashore invertebrates",
|
|
||||||
"594": "Mollusks & molluscoids",
|
|
||||||
"595": "Arthropoda",
|
|
||||||
"596": "Chordata",
|
|
||||||
"597": "Cold-blooded vertebrates",
|
|
||||||
"598": "Aves (Birds)",
|
|
||||||
"599": "Mammalia (Mammals)",
|
|
||||||
"6": "Technology",
|
|
||||||
"60": "Technology",
|
|
||||||
"600": "Technology",
|
|
||||||
"601": "Philosophy and theory",
|
|
||||||
"602": "Miscellany",
|
|
||||||
"603": "Dictionaries & encyclopedias",
|
|
||||||
"604": "Technical drawing, hazardous materials technology; groups of people",
|
|
||||||
"605": "Serial publications",
|
|
||||||
"606": "Organizations",
|
|
||||||
"607": "Education, research, related topics",
|
|
||||||
"608": "Patents",
|
|
||||||
"609": "Historical, geographic, persons treatment",
|
|
||||||
"61": "Medicine & health",
|
|
||||||
"610": "Medicine and health",
|
|
||||||
"611": "Human anatomy, cytology, histology",
|
|
||||||
"612": "Human physiology",
|
|
||||||
"613": "Personal health and safety",
|
|
||||||
"614": "Forensic medicine; incidence of injuries, wounds, disease; public preventive medicine",
|
|
||||||
"615": "Pharmacology and therapeutics",
|
|
||||||
"616": "Diseases",
|
|
||||||
"617": "Surgery, regional medicine, dentistry, ophthalmology, otology, audiology",
|
|
||||||
"618": "Other branches of medicine Gynecology and obstetrics",
|
|
||||||
"62": "Engineering",
|
|
||||||
"620": "Engineering and allied operations",
|
|
||||||
"621": "Applied physics",
|
|
||||||
"622": "Mining and related operations",
|
|
||||||
"623": "Military and nautical engineering",
|
|
||||||
"624": "Civil engineering",
|
|
||||||
"625": "Engineering of railroads & roads",
|
|
||||||
"627": "Hydraulic engineering",
|
|
||||||
"628": "Sanitary engineering",
|
|
||||||
"629": "Other branches of engineering",
|
|
||||||
"63": "Agriculture",
|
|
||||||
"630": "Agriculture and related technologies",
|
|
||||||
"631": "Specific techniques; apparatus, equipment, materials",
|
|
||||||
"632": "Plant injuries, diseases, pests",
|
|
||||||
"633": "Field and plantation crops",
|
|
||||||
"634": "Orchards, fruits, forestry",
|
|
||||||
"635": "Garden crops (Horticulture)",
|
|
||||||
"636": "Animal husbandry",
|
|
||||||
"637": "Processing dairy & related products",
|
|
||||||
"638": "Insect culture",
|
|
||||||
"639": "Hunting, fishing, conservation, related technologies",
|
|
||||||
"64": "Home & family management",
|
|
||||||
"640": "Home and family management",
|
|
||||||
"641": "Food & drink",
|
|
||||||
"642": "Meals and table service",
|
|
||||||
"643": "Housing and household equipment",
|
|
||||||
"644": "Household utilities",
|
|
||||||
"645": "Household furnishings",
|
|
||||||
"646": "Sewing, clothing, management of personal and family life",
|
|
||||||
"647": "Management of public households (Institutional housekeeping)",
|
|
||||||
"648": "Housekeeping",
|
|
||||||
"649": "Child rearing; home care of people with disabilities and illnesses",
|
|
||||||
"65": "Management & public relations",
|
|
||||||
"650": "Management and auxiliary services",
|
|
||||||
"651": "Office services",
|
|
||||||
"652": "Processes of written communication",
|
|
||||||
"653": "Shorthand",
|
|
||||||
"657": "Accounting",
|
|
||||||
"658": "General management",
|
|
||||||
"659": "Advertising and public relations",
|
|
||||||
"66": "Chemical engineering",
|
|
||||||
"660": "Chemical engineering and related technologies",
|
|
||||||
"661": "Technology of industrial chemicals",
|
|
||||||
"662": "Technology of explosives, fuels, related products",
|
|
||||||
"663": "Beverage technology",
|
|
||||||
"664": "Food technology",
|
|
||||||
"665": "Technology of industrial oils, fats, waxes, gases",
|
|
||||||
"666": "Ceramic and allied technologies",
|
|
||||||
"667": "Cleaning, color, coating, related technologies",
|
|
||||||
"668": "Technology of other organic products",
|
|
||||||
"669": "Metallurgy",
|
|
||||||
"67": "Manufacturing",
|
|
||||||
"670": "Manufacturing",
|
|
||||||
"671": "Metalworking processes and primary metal products",
|
|
||||||
"672": "Iron, steel, other iron alloys",
|
|
||||||
"673": "Nonferrous metals",
|
|
||||||
"674": "Lumber processing, wood products, cork",
|
|
||||||
"675": "Leather and fur processing",
|
|
||||||
"676": "Pulp and paper technology",
|
|
||||||
"677": "Textiles",
|
|
||||||
"678": "Elastomers and elastomer products",
|
|
||||||
"679": "Other products of specific materials",
|
|
||||||
"68": "Manufacture for specific uses",
|
|
||||||
"680": "Manufacture of products for specific uses",
|
|
||||||
"681": "Precision instruments and other devices",
|
|
||||||
"682": "Small forge work (Blacksmithing)",
|
|
||||||
"683": "Hardware and household appliances",
|
|
||||||
"684": "Furnishings and home workshops",
|
|
||||||
"685": "Leather and fur goods, and related products",
|
|
||||||
"686": "Printing and related activities",
|
|
||||||
"687": "Clothing and accessories",
|
|
||||||
"688": "Other final products & packaging",
|
|
||||||
"69": "Construction of buildings",
|
|
||||||
"690": "Buildings",
|
|
||||||
"691": "Building materials",
|
|
||||||
"692": "Auxiliary construction practices",
|
|
||||||
"693": "Construction in specific types of materials and for specific purposes",
|
|
||||||
"694": "Wood construction",
|
|
||||||
"695": "Roof covering",
|
|
||||||
"696": "Utilities",
|
|
||||||
"697": "Heating, ventilating & air-conditioning",
|
|
||||||
"698": "Detail finishing",
|
|
||||||
"7": "Arts & recreation",
|
|
||||||
"70": "Arts",
|
|
||||||
"700": "Arts",
|
|
||||||
"701": "Philosophy and theory of fine and decorative arts",
|
|
||||||
"702": "Miscellany of fine and decorative arts",
|
|
||||||
"703": "Dictionaries, encyclopedias, concordances of fine and decorative arts",
|
|
||||||
"704": "Special topics in fine and decorative arts",
|
|
||||||
"705": "Serial publications of fine and decorative arts",
|
|
||||||
"706": "Organizations and management of fine and decorative arts",
|
|
||||||
"707": "Education, research, related topics of fine and decorative arts",
|
|
||||||
"708": "Galleries, museums, private collections of fine and decorative arts",
|
|
||||||
"709": "Historical, geographic & persons treatment",
|
|
||||||
"71": "Area planning & landscape architecture",
|
|
||||||
"710": "Area planning and landscape architecture",
|
|
||||||
"711": "Area planning (Civic art)",
|
|
||||||
"712": "Landscape architecture (Landscape design)",
|
|
||||||
"713": "Landscape architecture of trafficways",
|
|
||||||
"714": "Water features in landscape architecture",
|
|
||||||
"715": "Woody plants in landscape architecture",
|
|
||||||
"716": "Herbaceous plants in landscape architecture",
|
|
||||||
"717": "Structures in landscape architecture",
|
|
||||||
"718": "Landscape design of cemeteries",
|
|
||||||
"719": "Natural landscapes",
|
|
||||||
"72": "Architecture",
|
|
||||||
"720": "Architecture",
|
|
||||||
"721": "Architectural materials and structural elements",
|
|
||||||
"722": "Architecture from earliest times to ca. 300",
|
|
||||||
"723": "Architecture from ca. 300 to 1399",
|
|
||||||
"724": "Architecture from 1400",
|
|
||||||
"725": "Public structures",
|
|
||||||
"726": "Buildings for religious purposes",
|
|
||||||
"727": "Buildings for educational and research purposes",
|
|
||||||
"728": "Residential and related buildings",
|
|
||||||
"729": "Design and decoration of structures and accessories",
|
|
||||||
"73": "Sculpture, ceramics & metalwork",
|
|
||||||
"730": "Plastic arts Sculpture",
|
|
||||||
"731": "Processes, forms & subjects of sculpture",
|
|
||||||
"732": "Sculpture from earliest times to ca. 500, sculpture of nonliterate peoples",
|
|
||||||
"733": "Greek, Etruscan, Roman sculpture",
|
|
||||||
"734": "Sculpture from ca. 500 to 1399",
|
|
||||||
"735": "Sculpture from 1400",
|
|
||||||
"736": "Carving and carvings",
|
|
||||||
"737": "Numismatics and sigillography",
|
|
||||||
"738": "Ceramic arts",
|
|
||||||
"739": "Art metalwork",
|
|
||||||
"74": "Graphic arts & decorative arts",
|
|
||||||
"740": "Graphic arts",
|
|
||||||
"741": "Drawing and drawings",
|
|
||||||
"742": "Perspective in drawing",
|
|
||||||
"743": "Drawing and drawings by subject",
|
|
||||||
"745": "Decorative arts",
|
|
||||||
"746": "Textile arts",
|
|
||||||
"747": "Interior decoration",
|
|
||||||
"748": "Glass",
|
|
||||||
"749": "Furniture and accessories",
|
|
||||||
"75": "Painting",
|
|
||||||
"750": "Painting and paintings",
|
|
||||||
"751": "Techniques, procedures, apparatus, equipment, materials, forms",
|
|
||||||
"752": "Color",
|
|
||||||
"753": "Symbolism, allegory, mythology, legend",
|
|
||||||
"754": "Genre paintings",
|
|
||||||
"755": "Religion",
|
|
||||||
"757": "Human figures",
|
|
||||||
"758": "Nature, architectural subjects and cityscapes, other specific subjects",
|
|
||||||
"759": "History, geographic treatment, biography",
|
|
||||||
"76": "Printmaking & prints",
|
|
||||||
"760": "Printmaking and prints",
|
|
||||||
"761": "Relief processes (Block printing)",
|
|
||||||
"763": "Lithographic processes (Planographic processes)",
|
|
||||||
"764": "Chromolithography and serigraphy",
|
|
||||||
"765": "Metal engraving",
|
|
||||||
"766": "Mezzotinting, aquatinting, related processes",
|
|
||||||
"767": "Etching and drypoint",
|
|
||||||
"769": "Prints",
|
|
||||||
"77": "Photography, computer art, film, video",
|
|
||||||
"770": "Photography, computer art, cinematography, videography",
|
|
||||||
"771": "Techniques, procedures, apparatus, equipment, materials",
|
|
||||||
"772": "Metallic salt processes",
|
|
||||||
"773": "Pigment processes of printing",
|
|
||||||
"774": "Holography",
|
|
||||||
"775": "Digital photography",
|
|
||||||
"776": "Computer art (Digital art)",
|
|
||||||
"777": "Cinematography and videography",
|
|
||||||
"778": "Specific fields and special kinds of photography",
|
|
||||||
"779": "Photographs",
|
|
||||||
"78": "Music",
|
|
||||||
"780": "Music",
|
|
||||||
"781": "General principles & musical forms",
|
|
||||||
"782": "Vocal music",
|
|
||||||
"783": "Music for single voices",
|
|
||||||
"784": "Instruments & instrumental ensembles",
|
|
||||||
"785": "Ensembles with only one instrument per part",
|
|
||||||
"786": "Keyboard, mechanical, electrophonic, percussion instruments",
|
|
||||||
"787": "Stringed instruments (Chordophones)",
|
|
||||||
"788": "Wind instruments (Aerophones)",
|
|
||||||
"79": "Sports, games & entertainment",
|
|
||||||
"790": "Recreational and performing arts",
|
|
||||||
"791": "Public performances",
|
|
||||||
"792": "Stage presentations",
|
|
||||||
"793": "Indoor games and amusements",
|
|
||||||
"794": "Indoor games of skill",
|
|
||||||
"795": "Games of chance",
|
|
||||||
"796": "Athletic and outdoor sports and games",
|
|
||||||
"797": "Aquatic & air sports",
|
|
||||||
"798": "Equestrian sports and animal racing",
|
|
||||||
"799": "Fishing, hunting, shooting",
|
|
||||||
"8": "Literature",
|
|
||||||
"80": "Literature, rhetoric & criticism",
|
|
||||||
"800": "Literature (Belles-lettres) and rhetoric",
|
|
||||||
"801": "Philosophy and theory",
|
|
||||||
"802": "Miscellany",
|
|
||||||
"803": "Dictionaries, encyclopedias, concordances",
|
|
||||||
"805": "Serial publications",
|
|
||||||
"806": "Organizations and management",
|
|
||||||
"807": "Education, research, related topics",
|
|
||||||
"808": "Rhetoric and collections of literary texts from more than two literatures",
|
|
||||||
"809": "History, description, critical appraisal of more than two literatures",
|
|
||||||
"81": "American literature in English",
|
|
||||||
"810": "American literature in English",
|
|
||||||
"811": "American poetry in English",
|
|
||||||
"812": "American drama in English",
|
|
||||||
"813": "American fiction in English",
|
|
||||||
"814": "American essays in English",
|
|
||||||
"815": "American speeches in English",
|
|
||||||
"816": "American letters in English",
|
|
||||||
"817": "American humor and satire in English",
|
|
||||||
"818": "American miscellaneous writings",
|
|
||||||
"82": "English & Old English literatures",
|
|
||||||
"820": "English and Old English (Anglo-Saxon) literatures",
|
|
||||||
"821": "English poetry",
|
|
||||||
"822": "English drama",
|
|
||||||
"823": "English fiction",
|
|
||||||
"824": "English essays",
|
|
||||||
"825": "English speeches",
|
|
||||||
"826": "English letters",
|
|
||||||
"827": "English humor and satire",
|
|
||||||
"828": "English miscellaneous writings",
|
|
||||||
"829": "Old English (Anglo-Saxon) literature",
|
|
||||||
"83": "German & related literatures",
|
|
||||||
"830": "Literatures of Germanic languages German literature",
|
|
||||||
"831": "German poetry",
|
|
||||||
"832": "German drama",
|
|
||||||
"833": "German fiction",
|
|
||||||
"834": "German essays",
|
|
||||||
"835": "German speeches",
|
|
||||||
"836": "German letters",
|
|
||||||
"837": "German humor & satire",
|
|
||||||
"838": "German miscellaneous writings",
|
|
||||||
"839": "Other Germanic literatures",
|
|
||||||
"84": "French & related literatures",
|
|
||||||
"840": "French literature and literatures of related Romance languages",
|
|
||||||
"841": "French poetry",
|
|
||||||
"842": "French drama",
|
|
||||||
"843": "French fiction",
|
|
||||||
"844": "French essays",
|
|
||||||
"845": "French speeches",
|
|
||||||
"846": "French letters",
|
|
||||||
"847": "French humor & satire",
|
|
||||||
"848": "French miscellaneous writings",
|
|
||||||
"849": "Occitan, Catalan, Franco-Provençal literatures",
|
|
||||||
"85": "Italian, Romanian & related literatures",
|
|
||||||
"850": "Literatures of Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican languages",
|
|
||||||
"851": "Italian poetry",
|
|
||||||
"852": "Italian drama",
|
|
||||||
"853": "Italian fiction",
|
|
||||||
"854": "Italian essays",
|
|
||||||
"855": "Italian speeches",
|
|
||||||
"856": "Italian letters",
|
|
||||||
"857": "Italian humor and satire",
|
|
||||||
"858": "Italian miscellaneous writings",
|
|
||||||
"859": "Literatures of Romanian, Rhaetian, Sardinian, Corsican languages",
|
|
||||||
"86": "Spanish, Portuguese, Galician literatures",
|
|
||||||
"860": "Spanish & Portuguese literatures",
|
|
||||||
"861": "Spanish poetry",
|
|
||||||
"862": "Spanish drama",
|
|
||||||
"863": "Spanish fiction",
|
|
||||||
"864": "Spanish essays",
|
|
||||||
"865": "Spanish speeches",
|
|
||||||
"866": "Spanish letters",
|
|
||||||
"867": "Spanish humor and satire",
|
|
||||||
"868": "Spanish miscellaneous writings",
|
|
||||||
"869": "Literatures of Portuguese and Galician languages",
|
|
||||||
"87": "Latin & Italic literatures",
|
|
||||||
"870": "Latin & Italic literatures",
|
|
||||||
"871": "Latin poetry",
|
|
||||||
"872": "Latin dramatic poetry and drama",
|
|
||||||
"873": "Latin epic poetry and fiction",
|
|
||||||
"874": "Latin lyric poetry",
|
|
||||||
"875": "Latin speeches",
|
|
||||||
"876": "Latin letters",
|
|
||||||
"877": "Latin humor and satire",
|
|
||||||
"878": "Latin miscellaneous writings",
|
|
||||||
"879": "Literatures of other Italic languages",
|
|
||||||
"88": "Classical & modern Greek literatures",
|
|
||||||
"880": "Literatures of Hellenic languages Classical Greek literature",
|
|
||||||
"881": "Classical Greek poetry",
|
|
||||||
"882": "Classical Greek dramatic poetry and drama",
|
|
||||||
"883": "Classical Greek epic poetry and fiction",
|
|
||||||
"884": "Classical Greek lyric poetry",
|
|
||||||
"885": "Classical Greek speeches",
|
|
||||||
"886": "Classical Greek letters",
|
|
||||||
"887": "Classical Greek humor and satire",
|
|
||||||
"888": "Classical Greek miscellaneous writings",
|
|
||||||
"889": "Modern Greek literature",
|
|
||||||
"89": "Other literatures",
|
|
||||||
"890": "Literatures of other specific languages and language families",
|
|
||||||
"891": "East Indo-European and Celtic literatures",
|
|
||||||
"892": "Afro-Asiatic literatures",
|
|
||||||
"893": "Non-Semitic Afro-Asiatic literatures",
|
|
||||||
"894": "Literatures of Altaic, Uralic, Hyperborean, Dravidian languages; literatures of miscellaneous languages of south Asia",
|
|
||||||
"895": "Literatures of East and Southeast Asia",
|
|
||||||
"896": "African literatures",
|
|
||||||
"897": "North American native literatures",
|
|
||||||
"898": "Literatures of South American native languages",
|
|
||||||
"899": "Literatures of non-Austronesian languages of Oceania, of Austronesian languages, of miscellaneous languages",
|
|
||||||
"9": "History & geography",
|
|
||||||
"90": "History",
|
|
||||||
"900": "History, geography, and auxiliary disciplines",
|
|
||||||
"901": "Philosophy and theory of history",
|
|
||||||
"902": "Miscellany",
|
|
||||||
"903": "Dictionaries, encyclopedias, concordances of history",
|
|
||||||
"904": "Collected accounts of events",
|
|
||||||
"905": "Serial publications of history",
|
|
||||||
"906": "Organizations and management of history",
|
|
||||||
"907": "Education, research & related topics",
|
|
||||||
"908": "History with respect to groups of people",
|
|
||||||
"909": "World history",
|
|
||||||
"91": "Geography & travel",
|
|
||||||
"910": "Geography and travel",
|
|
||||||
"911": "Historical geography",
|
|
||||||
"912": "Graphic representations of surface of earth and of extraterrestrial worlds",
|
|
||||||
"913": "Geography of and travel in ancient world",
|
|
||||||
"914": "Geography of and travel in Europe",
|
|
||||||
"915": "Geography of and travel in Asia",
|
|
||||||
"916": "Geography of and travel in Africa",
|
|
||||||
"917": "Geography of and travel in North America",
|
|
||||||
"918": "Geography of & travel in South America",
|
|
||||||
"919": "Geography of and travel in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica and on extraterrestrial worlds",
|
|
||||||
"92": "Biography & genealogy",
|
|
||||||
"920": "Biography, genealogy, insignia",
|
|
||||||
"929": "Genealogy, names, insignia",
|
|
||||||
"93": "History of ancient world (to ca. 499)",
|
|
||||||
"930": "History of ancient world to ca. 499",
|
|
||||||
"931": "China to 420",
|
|
||||||
"932": "Egypt to 640",
|
|
||||||
"933": "Palestine to 70",
|
|
||||||
"934": "South Asia to 647",
|
|
||||||
"935": "Mesopotamia to 637 and Iranian Plateau to 637",
|
|
||||||
"936": "Europe north and west of Italian Peninsula to ca. 499",
|
|
||||||
"937": "Italian Peninsula to 476 and adjacent territories to 476",
|
|
||||||
"938": "Greece to 323",
|
|
||||||
"939": "Other parts of ancient world to ca. 640",
|
|
||||||
"94": "History of Europe",
|
|
||||||
"940": "History of Europe",
|
|
||||||
"941": "British Isles",
|
|
||||||
"942": "England and Wales",
|
|
||||||
"943": "Germany and neighboring central European countries",
|
|
||||||
"944": "France and Monaco",
|
|
||||||
"945": "Italy, San Marino, Vatican City, Malta",
|
|
||||||
"946": "Spain, Andorra, Gibraltar, Portugal",
|
|
||||||
"947": "Russia and neighboring east European countries",
|
|
||||||
"948": "Scandinavia",
|
|
||||||
"949": "Other parts of Europe",
|
|
||||||
"95": "History of Asia",
|
|
||||||
"950": "History of Asia",
|
|
||||||
"951": "China and adjacent areas",
|
|
||||||
"952": "Japan",
|
|
||||||
"953": "Arabian Peninsula and adjacent areas",
|
|
||||||
"954": "India and neighboring south Asian countries",
|
|
||||||
"955": "Iran",
|
|
||||||
"956": "Middle East (Near East)",
|
|
||||||
"957": "Siberia (Asiatic Russia)",
|
|
||||||
"958": "Central Asia",
|
|
||||||
"959": "Southeast Asia",
|
|
||||||
"96": "History of Africa",
|
|
||||||
"960": "History of Africa",
|
|
||||||
"961": "Tunisia & Libya",
|
|
||||||
"962": "Egypt, Sudan, South Sudan",
|
|
||||||
"963": "Ethiopia and Eritrea",
|
|
||||||
"964": "Northwest African coast & offshore islands",
|
|
||||||
"965": "Algeria",
|
|
||||||
"966": "West Africa and offshore islands",
|
|
||||||
"967": "Central Africa and offshore islands",
|
|
||||||
"968": "Republic of South Africa and neighboring southern African countries",
|
|
||||||
"969": "South Indian Ocean islands",
|
|
||||||
"97": "History of North America",
|
|
||||||
"970": "History of North America",
|
|
||||||
"971": "Canada",
|
|
||||||
"972": "Middle America; Mexico",
|
|
||||||
"973": "United States",
|
|
||||||
"974": "Northeastern United States (New England and Middle Atlantic states)",
|
|
||||||
"975": "Southeastern United States (South Atlantic states)",
|
|
||||||
"976": "South central United States Gulf Coast states",
|
|
||||||
"977": "North central United States",
|
|
||||||
"978": "Western United States",
|
|
||||||
"979": "Great Basin and Pacific Slope region of United States",
|
|
||||||
"98": "History of South America",
|
|
||||||
"980": "History of South America",
|
|
||||||
"981": "Brazil",
|
|
||||||
"982": "Argentina",
|
|
||||||
"983": "Chile",
|
|
||||||
"984": "Bolivia",
|
|
||||||
"985": "Peru",
|
|
||||||
"986": "Colombia and Ecuador",
|
|
||||||
"987": "Venezuela",
|
|
||||||
"988": "Guiana",
|
|
||||||
"989": "Paraguay and Uruguay",
|
|
||||||
"99": "History of other areas",
|
|
||||||
"990": "History of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
|
|
||||||
"993": "New Zealand",
|
|
||||||
"994": "Australia",
|
|
||||||
"995": "New Guinea and neighboring countries of Melanesia",
|
|
||||||
"996": "Other parts of Pacific Polynesia",
|
|
||||||
"997": "Atlantic Ocean islands",
|
|
||||||
"998": "Arctic islands and Antarctica",
|
|
||||||
"999": "Extraterrestrial worlds"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from ox.cache import read_url
|
|
||||||
|
|
||||||
dewey = {}
|
|
||||||
for i in range(0, 1000):
|
|
||||||
url = 'http://dewey.info/class/%s/about.en.json' % i
|
|
||||||
print(url)
|
|
||||||
data = json.loads(read_url(url).decode('utf-8'))
|
|
||||||
for d in list(data.values()):
|
|
||||||
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
|
||||||
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
|
||||||
dewey[str(i)] = value
|
|
||||||
break
|
|
||||||
|
|
||||||
data = json.dumps(dewey, indent=4, ensure_ascii=False, sort_keys=True).encode('utf-8')
|
|
||||||
with open(__file__) as f:
|
|
||||||
pydata = f.read()
|
|
||||||
pydata = re.sub(
|
|
||||||
re.compile('\nDEWEY = {.*?}\n\n', re.DOTALL),
|
|
||||||
'\nDEWEY = %s\n\n' % data, pydata)
|
|
||||||
|
|
||||||
with open(__file__, 'w') as f:
|
|
||||||
f.write(pydata)
|
|
102
oml/meta/loc.py
102
oml/meta/loc.py
|
@ -1,102 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
|
|
||||||
from ox.cache import read_url
|
|
||||||
import ox
|
|
||||||
import re
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
|
|
||||||
from .dewey import get_classification
|
|
||||||
from .marc_countries import COUNTRIES
|
|
||||||
from .utils import normalize_isbn
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def get_ids(key, value):
|
|
||||||
ids = []
|
|
||||||
if key == 'isbn':
|
|
||||||
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
|
|
||||||
html = ox.cache.read_url(url).decode('utf-8', 'ignore')
|
|
||||||
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
|
|
||||||
if match:
|
|
||||||
ids.append(('lccn', match.group(1)))
|
|
||||||
elif key == 'lccn':
|
|
||||||
info = lookup(value)
|
|
||||||
for key in ('oclc', 'isbn'):
|
|
||||||
if key in info:
|
|
||||||
for value in info[key]:
|
|
||||||
ids.append((key, value))
|
|
||||||
if ids:
|
|
||||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def lookup(id):
|
|
||||||
logger.debug('lookup %s', id)
|
|
||||||
ns = '{http://www.loc.gov/mods/v3}'
|
|
||||||
url = 'http://lccn.loc.gov/%s/mods' % id
|
|
||||||
info = {
|
|
||||||
'lccn': [id]
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
data = read_url(url).decode('utf-8')
|
|
||||||
mods = ET.fromstring(data)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
data = read_url(url, timeout=0).decode('utf-8')
|
|
||||||
mods = ET.fromstring(data)
|
|
||||||
except:
|
|
||||||
logger.debug('lookup for %s url: %s failed', id, url, exc_info=True)
|
|
||||||
return info
|
|
||||||
|
|
||||||
title = mods.findall(ns + 'titleInfo')
|
|
||||||
if not title:
|
|
||||||
return {}
|
|
||||||
info['title'] = ''.join([': ' + e.text.strip() if e.tag == ns + 'subTitle' else ' ' + e.text.strip() for e in title[0]]).strip()
|
|
||||||
origin = mods.findall(ns + 'originInfo')
|
|
||||||
if origin:
|
|
||||||
info['place'] = []
|
|
||||||
for place in origin[0].findall(ns + 'place'):
|
|
||||||
terms = place.findall(ns + 'placeTerm')
|
|
||||||
if terms and terms[0].attrib['type'] == 'text':
|
|
||||||
e = terms[0]
|
|
||||||
info['place'].append(e.text)
|
|
||||||
elif terms and terms[0].attrib['type'] == 'code':
|
|
||||||
e = terms[0]
|
|
||||||
info['country'] = COUNTRIES.get(e.text, e.text)
|
|
||||||
publisher = [e.text for e in origin[0].findall(ns + 'publisher')]
|
|
||||||
if publisher:
|
|
||||||
info['publisher'] = publisher[0]
|
|
||||||
info['date'] = ''.join([e.text
|
|
||||||
for e in origin[0].findall(ns + 'dateIssued') if e.attrib.get('encoding') == 'marc'])
|
|
||||||
for i in mods.findall(ns + 'identifier'):
|
|
||||||
key = i.attrib['type']
|
|
||||||
value = i.text
|
|
||||||
if key in ('oclc', 'lccn', 'isbn'):
|
|
||||||
if i.attrib['type'] == 'oclc':
|
|
||||||
value = value.replace('ocn', '').replace('ocm', '')
|
|
||||||
if i.attrib['type'] == 'isbn':
|
|
||||||
value = normalize_isbn(i.text)
|
|
||||||
if not key in info:
|
|
||||||
info[key] = []
|
|
||||||
if value not in info[key]:
|
|
||||||
info[key].append(value)
|
|
||||||
for i in mods.findall(ns + 'classification'):
|
|
||||||
if i.attrib['authority'] == 'ddc':
|
|
||||||
info['classification'] = get_classification(i.text.split('/')[0])
|
|
||||||
info['author'] = []
|
|
||||||
for a in mods.findall(ns + 'name'):
|
|
||||||
if a.attrib.get('usage') == 'primary':
|
|
||||||
info['author'].append(' '.join([e.text for e in a.findall(ns + 'namePart') if not e.attrib.get('type') in ('date', )]))
|
|
||||||
info['author'] = [ox.normalize_name(a) for a in info['author']]
|
|
||||||
toc = mods.findall(ns + 'tableOfContents')
|
|
||||||
if toc:
|
|
||||||
info['description'] = toc[0].text.strip()
|
|
||||||
for key in list(info.keys()):
|
|
||||||
if not info[key]:
|
|
||||||
del info[key]
|
|
||||||
return info
|
|
||||||
|
|
||||||
info = lookup
|
|
|
@ -1,97 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from ox.cache import read_url
|
|
||||||
from ox import find_re, strip_tags, decode_html
|
|
||||||
import stdnum.isbn
|
|
||||||
|
|
||||||
from .utils import find_isbns
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
base = 'http://www.lookupbyisbn.com'
|
|
||||||
|
|
||||||
def get_ids(key, value):
|
|
||||||
ids = []
|
|
||||||
|
|
||||||
def add_other_isbn(v):
|
|
||||||
if len(v) == 10:
|
|
||||||
ids.append(('isbn', stdnum.isbn.to_isbn13(v)))
|
|
||||||
if len(v) == 13 and v.startswith('978'):
|
|
||||||
ids.append(('isbn', stdnum.isbn.to_isbn10(v)))
|
|
||||||
|
|
||||||
if key in ('isbn', 'asin'):
|
|
||||||
url = '%s/Search/Book/%s/1' % (base, value)
|
|
||||||
data = read_url(url).decode('utf-8')
|
|
||||||
m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data)
|
|
||||||
if m:
|
|
||||||
asin = m[0].split('/')[-3]
|
|
||||||
if stdnum.isbn.to_isbn10(asin) or not stdnum.isbn.is_valid(asin):
|
|
||||||
ids.append(('asin', asin))
|
|
||||||
if key == 'isbn':
|
|
||||||
add_other_isbn(value)
|
|
||||||
if key == 'asin':
|
|
||||||
if stdnum.isbn.is_valid(value):
|
|
||||||
ids.append(('isbn', value))
|
|
||||||
add_other_isbn(value)
|
|
||||||
else:
|
|
||||||
for isbn in amazon_lookup(value):
|
|
||||||
if stdnum.isbn.is_valid(isbn):
|
|
||||||
ids.append(('isbn', isbn))
|
|
||||||
add_other_isbn(isbn)
|
|
||||||
if ids:
|
|
||||||
logger.debug('get_ids %s, %s => %s', key, value, ids)
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def lookup(id):
|
|
||||||
logger.debug('lookup %s', id)
|
|
||||||
r = {
|
|
||||||
'asin': [id]
|
|
||||||
}
|
|
||||||
url = '%s/Lookup/Book/%s/%s/1' % (base, id, id)
|
|
||||||
logger.debug('%s', url)
|
|
||||||
data = read_url(url).decode('utf-8')
|
|
||||||
r["title"] = find_re(data, "<h2>(.*?)</h2>")
|
|
||||||
if r["title"] == 'Error!':
|
|
||||||
return {}
|
|
||||||
keys = {
|
|
||||||
'author': 'Author(s)',
|
|
||||||
'publisher': 'Publisher',
|
|
||||||
'date': 'Publication date',
|
|
||||||
'edition': 'Edition',
|
|
||||||
'binding': 'Binding',
|
|
||||||
'volume': 'Volume(s)',
|
|
||||||
'pages': 'Pages',
|
|
||||||
}
|
|
||||||
for key in keys:
|
|
||||||
r[key] = find_re(data, '<span class="title">%s:</span>(.*?)</li>'% re.escape(keys[key]))
|
|
||||||
if r[key] == '--' or not r[key]:
|
|
||||||
del r[key]
|
|
||||||
if key == 'pages' and key in r:
|
|
||||||
r[key] = int(r[key])
|
|
||||||
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
|
|
||||||
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
|
|
||||||
r['description'] = decode_html(strip_tags(desc))
|
|
||||||
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
|
|
||||||
for key in r:
|
|
||||||
if isinstance(r[key], str):
|
|
||||||
r[key] = decode_html(strip_tags(r[key])).strip()
|
|
||||||
if 'author' in r and isinstance(r['author'], str) and r['author']:
|
|
||||||
r['author'] = [r['author']]
|
|
||||||
else:
|
|
||||||
r['author'] = []
|
|
||||||
if not r['author'] or r['author'][0].isupper():
|
|
||||||
del r['author']
|
|
||||||
if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
|
|
||||||
r['description'] = ''
|
|
||||||
return r
|
|
||||||
|
|
||||||
def amazon_lookup(asin):
|
|
||||||
url = 'http://www.amazon.com/dp/%s' % asin
|
|
||||||
html = read_url(url, timeout=-1).decode('utf-8', 'ignore')
|
|
||||||
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
|
|
@ -1,409 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
COUNTRIES = {
|
|
||||||
"gw": "Germany",
|
|
||||||
"gv": "Guinea",
|
|
||||||
"gu": "Guam",
|
|
||||||
"gt": "Guatemala",
|
|
||||||
"gs": "Georgia (Republic)",
|
|
||||||
"gr": "Greece",
|
|
||||||
"-ge": "Germany (East)",
|
|
||||||
"gp": "Guadeloupe",
|
|
||||||
"mnu": "Minnesota",
|
|
||||||
"gy": "Guyana",
|
|
||||||
"gd": "Grenada",
|
|
||||||
"gb": "Kiribati",
|
|
||||||
"go": "Gabon",
|
|
||||||
"gm": "Gambia",
|
|
||||||
"alu": "Alabama",
|
|
||||||
"gi": "Gibraltar",
|
|
||||||
"gh": "Ghana",
|
|
||||||
"tz": "Tanzania",
|
|
||||||
"tv": "Tuvalu",
|
|
||||||
"tu": "Turkey",
|
|
||||||
"tr": "Trinidad and Tobago",
|
|
||||||
"ts": "United Arab Emirates",
|
|
||||||
"to": "Tonga",
|
|
||||||
"tl": "Tokelau",
|
|
||||||
"tk": "Turkmenistan",
|
|
||||||
"th": "Thailand",
|
|
||||||
"ti": "Tunisia",
|
|
||||||
"tg": "Togo",
|
|
||||||
"tc": "Turks and Caicos Islands",
|
|
||||||
"ta": "Tajikistan",
|
|
||||||
"-gn": "Gilbert and Ellice Islands",
|
|
||||||
"-us": "United States",
|
|
||||||
"-ajr": "Azerbaijan S.S.R.",
|
|
||||||
"-iu": "Israel-Syria Demilitarized Zones",
|
|
||||||
"-iw": "Israel-Jordan Demilitarized Zones",
|
|
||||||
"za": "Zambia",
|
|
||||||
"nbu": "Nebraska",
|
|
||||||
"scu": "South Carolina",
|
|
||||||
"bg": "Bangladesh",
|
|
||||||
"cau": "California",
|
|
||||||
"abc": "Alberta",
|
|
||||||
"xoa": "Northern Territory",
|
|
||||||
"meu": "Maine",
|
|
||||||
"ctu": "Connecticut",
|
|
||||||
"my": "Malaysia",
|
|
||||||
"aku": "Alaska",
|
|
||||||
"gl": "Greenland",
|
|
||||||
"-cn": "Canada",
|
|
||||||
"wiu": "Wisconsin",
|
|
||||||
"-cz": "Canal Zone",
|
|
||||||
"txu": "Texas",
|
|
||||||
"-cs": "Czechoslovakia",
|
|
||||||
"-cp": "Canton and Enderbury Islands",
|
|
||||||
"msu": "Mississippi",
|
|
||||||
"-ln": "Central and Southern Line Islands",
|
|
||||||
"nkc": "New Brunswick",
|
|
||||||
"it": "Italy",
|
|
||||||
"tnu": "Tennessee",
|
|
||||||
"vp": "Various places",
|
|
||||||
"mg": "Madagascar",
|
|
||||||
"mf": "Mauritius",
|
|
||||||
"mc": "Monaco",
|
|
||||||
"-ur": "Soviet Union",
|
|
||||||
"mm": "Malta",
|
|
||||||
"ml": "Mali",
|
|
||||||
"mo": "Montenegro",
|
|
||||||
"flu": "Florida",
|
|
||||||
"deu": "Delaware",
|
|
||||||
"mk": "Oman",
|
|
||||||
"mj": "Montserrat",
|
|
||||||
"mu": "Mauritania",
|
|
||||||
"mw": "Malawi",
|
|
||||||
"mv": "Moldova",
|
|
||||||
"mq": "Martinique",
|
|
||||||
"mp": "Mongolia",
|
|
||||||
"mr": "Morocco",
|
|
||||||
"-ui": "United Kingdom Misc. Islands",
|
|
||||||
"mx": "Mexico",
|
|
||||||
"-uk": "United Kingdom",
|
|
||||||
"mz": "Mozambique",
|
|
||||||
"kyu": "Kentucky",
|
|
||||||
"hiu": "Hawaii",
|
|
||||||
"enk": "England",
|
|
||||||
"nyu": "New York (State)",
|
|
||||||
"fp": "French Polynesia",
|
|
||||||
"fr": "France",
|
|
||||||
"fs": "Terres australes et antarctiques françaises",
|
|
||||||
"mau": "Massachusetts",
|
|
||||||
"snc": "Saskatchewan",
|
|
||||||
"fa": "Faroe Islands",
|
|
||||||
"fg": "French Guiana",
|
|
||||||
"lau": "Louisiana",
|
|
||||||
"fj": "Fiji",
|
|
||||||
"fk": "Falkland Islands",
|
|
||||||
"fm": "Micronesia (Federated States)",
|
|
||||||
"sz": "Switzerland",
|
|
||||||
"sy": "Syria",
|
|
||||||
"sx": "Namibia",
|
|
||||||
"ss": "Western Sahara",
|
|
||||||
"sr": "Surinam",
|
|
||||||
"sq": "Swaziland",
|
|
||||||
"sp": "Spain",
|
|
||||||
"sw": "Sweden",
|
|
||||||
"su": "Saudi Arabia",
|
|
||||||
"st": "Saint-Martin",
|
|
||||||
"sj": "Sudan",
|
|
||||||
"si": "Singapore",
|
|
||||||
"sh": "Spanish North Africa",
|
|
||||||
"so": "Somalia",
|
|
||||||
"sn": "Sint Maarten",
|
|
||||||
"sm": "San Marino",
|
|
||||||
"sl": "Sierra Leone",
|
|
||||||
"sc": "Saint-Barthélemy",
|
|
||||||
"sa": "South Africa",
|
|
||||||
"sg": "Senegal",
|
|
||||||
"sf": "Sao Tome and Principe",
|
|
||||||
"se": "Seychelles",
|
|
||||||
"sd": "South Sudan",
|
|
||||||
"-unr": "Ukraine",
|
|
||||||
"-kgr": "Kirghiz S.S.R.",
|
|
||||||
"le": "Lebanon",
|
|
||||||
"lb": "Liberia",
|
|
||||||
"-hk": "Hong Kong",
|
|
||||||
"lo": "Lesotho",
|
|
||||||
"lh": "Liechtenstein",
|
|
||||||
"li": "Lithuania",
|
|
||||||
"lv": "Latvia",
|
|
||||||
"lu": "Luxembourg",
|
|
||||||
"vtu": "Vermont",
|
|
||||||
"ls": "Laos",
|
|
||||||
"xc": "Maldives",
|
|
||||||
"ly": "Libya",
|
|
||||||
"oku": "Oklahoma",
|
|
||||||
"ye": "Yemen",
|
|
||||||
"-tkr": "Turkmen S.S.R.",
|
|
||||||
"nfc": "Newfoundland and Labrador",
|
|
||||||
"ft": "Djibouti",
|
|
||||||
"em": "Timor-Leste",
|
|
||||||
"eg": "Equatorial Guinea",
|
|
||||||
"ea": "Eritrea",
|
|
||||||
"ec": "Ecuador",
|
|
||||||
"-gsr": "Georgian S.S.R.",
|
|
||||||
"et": "Ethiopia",
|
|
||||||
"es": "El Salvador",
|
|
||||||
"er": "Estonia",
|
|
||||||
"ru": "Russia (Federation)",
|
|
||||||
"rw": "Rwanda",
|
|
||||||
"re": "Réunion",
|
|
||||||
"rb": "Serbia",
|
|
||||||
"rm": "Romania",
|
|
||||||
"rh": "Zimbabwe",
|
|
||||||
"-err": "Estonia",
|
|
||||||
"oru": "Oregon",
|
|
||||||
"quc": "Québec (Province)",
|
|
||||||
"ntc": "Northwest Territories",
|
|
||||||
"wlk": "Wales",
|
|
||||||
"xj": "Saint Helena",
|
|
||||||
"xk": "Saint Lucia",
|
|
||||||
"xh": "Niue",
|
|
||||||
"xn": "Macedonia",
|
|
||||||
"xo": "Slovakia",
|
|
||||||
"xl": "Saint Pierre and Miquelon",
|
|
||||||
"xm": "Saint Vincent and the Grenadines",
|
|
||||||
"xb": "Cocos (Keeling) Islands",
|
|
||||||
"onc": "Ontario",
|
|
||||||
"xa": "Christmas Island (Indian Ocean)",
|
|
||||||
"xf": "Midway Islands",
|
|
||||||
"xd": "Saint Kitts-Nevis",
|
|
||||||
"xe": "Marshall Islands",
|
|
||||||
"nhu": "New Hampshire",
|
|
||||||
"xx": "No place, unknown, or undetermined",
|
|
||||||
"fi": "Finland",
|
|
||||||
"xr": "Czech Republic",
|
|
||||||
"xs": "South Georgia and the South Sandwich Islands",
|
|
||||||
"xp": "Spratly Island",
|
|
||||||
"xv": "Slovenia",
|
|
||||||
"-tt": "Trust Territory of the Pacific Islands",
|
|
||||||
"iau": "Iowa",
|
|
||||||
"ncu": "North Carolina",
|
|
||||||
"stk": "Scotland",
|
|
||||||
"xra": "South Australia",
|
|
||||||
"miu": "Michigan",
|
|
||||||
"kg": "Kyrgyzstan",
|
|
||||||
"ke": "Kenya",
|
|
||||||
"ko": "Korea (South)",
|
|
||||||
"kn": "Korea (North)",
|
|
||||||
"kv": "Kosovo",
|
|
||||||
"ku": "Kuwait",
|
|
||||||
"kz": "Kazakhstan",
|
|
||||||
"-pt": "Portuguese Timor",
|
|
||||||
"ksu": "Kansas",
|
|
||||||
"dm": "Benin",
|
|
||||||
"dk": "Denmark",
|
|
||||||
"-ys": "Yemen (People's Democratic Republic)",
|
|
||||||
"-yu": "Serbia and Montenegro",
|
|
||||||
"-bwr": "Byelorussian S.S.R.",
|
|
||||||
"dr": "Dominican Republic",
|
|
||||||
"dq": "Dominica",
|
|
||||||
"qa": "Qatar",
|
|
||||||
"aru": "Arkansas",
|
|
||||||
"nuc": "Nunavut",
|
|
||||||
"wf": "Wallis and Futuna",
|
|
||||||
"wk": "Wake Island",
|
|
||||||
"wj": "West Bank of the Jordan River",
|
|
||||||
"jm": "Jamaica",
|
|
||||||
"vra": "Victoria",
|
|
||||||
"jo": "Jordan",
|
|
||||||
"ws": "Samoa",
|
|
||||||
"ji": "Johnston Atoll",
|
|
||||||
"-na": "Netherlands Antilles",
|
|
||||||
"ja": "Japan",
|
|
||||||
"cou": "Colorado",
|
|
||||||
"-wb": "West Berlin",
|
|
||||||
"ilu": "Illinois",
|
|
||||||
"-nm": "Northern Mariana Islands",
|
|
||||||
"ck": "Colombia",
|
|
||||||
"cj": "Cayman Islands",
|
|
||||||
"ci": "Croatia",
|
|
||||||
"ch": "China (Republic : 1949- )",
|
|
||||||
"co": "Curaçao",
|
|
||||||
"cm": "Cameroon",
|
|
||||||
"cl": "Chile",
|
|
||||||
"-rur": "Russian S.F.S.R.",
|
|
||||||
"cb": "Cambodia",
|
|
||||||
"ca": "Caribbean Netherlands",
|
|
||||||
"cg": "Congo (Democratic Republic)",
|
|
||||||
"cf": "Congo (Brazzaville)",
|
|
||||||
"-lir": "Lithuania",
|
|
||||||
"cd": "Chad",
|
|
||||||
"cy": "Cyprus",
|
|
||||||
"cx": "Central African Republic",
|
|
||||||
"cr": "Costa Rica",
|
|
||||||
"cq": "Comoros",
|
|
||||||
"cw": "Cook Islands",
|
|
||||||
"cv": "Cape Verde",
|
|
||||||
"cu": "Cuba",
|
|
||||||
"pr": "Puerto Rico",
|
|
||||||
"pp": "Papua New Guinea",
|
|
||||||
"pw": "Palau",
|
|
||||||
"py": "Paraguay",
|
|
||||||
"pc": "Pitcairn Island",
|
|
||||||
"pf": "Paracel Islands",
|
|
||||||
"pg": "Guinea-Bissau",
|
|
||||||
"pe": "Peru",
|
|
||||||
"pk": "Pakistan",
|
|
||||||
"ph": "Philippines",
|
|
||||||
"pn": "Panama",
|
|
||||||
"po": "Portugal",
|
|
||||||
"pl": "Poland",
|
|
||||||
"pic": "Prince Edward Island",
|
|
||||||
"xxu": "United States",
|
|
||||||
"gau": "Georgia",
|
|
||||||
"xxc": "Canada",
|
|
||||||
"xxk": "United Kingdom",
|
|
||||||
"iy": "Iraq-Saudi Arabia Neutral Zone",
|
|
||||||
"vb": "British Virgin Islands",
|
|
||||||
"vc": "Vatican City",
|
|
||||||
"ve": "Venezuela",
|
|
||||||
"iq": "Iraq",
|
|
||||||
"vi": "Virgin Islands of the United States",
|
|
||||||
"is": "Israel",
|
|
||||||
"ir": "Iran",
|
|
||||||
"vm": "Vietnam",
|
|
||||||
"iv": "Côte d'Ivoire",
|
|
||||||
"ii": "India",
|
|
||||||
"-ac": "Ashmore and Cartier Islands",
|
|
||||||
"io": "Indonesia",
|
|
||||||
"-ai": "Anguilla",
|
|
||||||
"ic": "Iceland",
|
|
||||||
"ie": "Ireland",
|
|
||||||
"pau": "Pennsylvania",
|
|
||||||
"-jn": "Jan Mayen",
|
|
||||||
"nik": "Northern Ireland",
|
|
||||||
"wyu": "Wyoming",
|
|
||||||
"-air": "Armenian S.S.R.",
|
|
||||||
"-sv": "Swan Islands",
|
|
||||||
"-mvr": "Moldavian S.S.R.",
|
|
||||||
"-sk": "Sikkim",
|
|
||||||
"riu": "Rhode Island",
|
|
||||||
"-sb": "Svalbard",
|
|
||||||
"-xi": "Saint Kitts-Nevis-Anguilla",
|
|
||||||
"wea": "Western Australia",
|
|
||||||
"cc": "China",
|
|
||||||
"nvu": "Nevada",
|
|
||||||
"mou": "Missouri",
|
|
||||||
"ce": "Sri Lanka",
|
|
||||||
"qea": "Queensland",
|
|
||||||
"-mh": "Macao",
|
|
||||||
"nju": "New Jersey",
|
|
||||||
"ykc": "Yukon Territory",
|
|
||||||
"-vs": "Vietnam, South",
|
|
||||||
"tma": "Tasmania",
|
|
||||||
"-vn": "Vietnam, North",
|
|
||||||
"bd": "Burundi",
|
|
||||||
"be": "Belgium",
|
|
||||||
"bf": "Bahamas",
|
|
||||||
"nmu": "New Mexico",
|
|
||||||
"ba": "Bahrain",
|
|
||||||
"bb": "Barbados",
|
|
||||||
"bl": "Brazil",
|
|
||||||
"bm": "Bermuda Islands",
|
|
||||||
"bn": "Bosnia and Hercegovina",
|
|
||||||
"bo": "Bolivia",
|
|
||||||
"bh": "Belize",
|
|
||||||
"bi": "British Indian Ocean Territory",
|
|
||||||
"bt": "Bhutan",
|
|
||||||
"bu": "Bulgaria",
|
|
||||||
"bv": "Bouvet Island",
|
|
||||||
"bw": "Belarus",
|
|
||||||
"bp": "Solomon Islands",
|
|
||||||
"br": "Burma",
|
|
||||||
"bs": "Botswana",
|
|
||||||
"dcu": "District of Columbia",
|
|
||||||
"bx": "Brunei",
|
|
||||||
"aca": "Australian Capital Territory",
|
|
||||||
"idu": "Idaho",
|
|
||||||
"xna": "New South Wales",
|
|
||||||
"ot": "Mayotte",
|
|
||||||
"ndu": "North Dakota",
|
|
||||||
"nsc": "Nova Scotia",
|
|
||||||
"-kzr": "Kazakh S.S.R.",
|
|
||||||
"mbc": "Manitoba",
|
|
||||||
"-lvr": "Latvia",
|
|
||||||
"-uzr": "Uzbek S.S.R.",
|
|
||||||
"wau": "Washington (State)",
|
|
||||||
"vau": "Virginia",
|
|
||||||
"sdu": "South Dakota",
|
|
||||||
"gz": "Gaza Strip",
|
|
||||||
"ht": "Haiti",
|
|
||||||
"hu": "Hungary",
|
|
||||||
"ho": "Honduras",
|
|
||||||
"hm": "Heard and McDonald Islands",
|
|
||||||
"xga": "Coral Sea Islands Territory",
|
|
||||||
"uy": "Uruguay",
|
|
||||||
"uz": "Uzbekistan",
|
|
||||||
"uv": "Burkina Faso",
|
|
||||||
"up": "United States Misc. Pacific Islands",
|
|
||||||
"mtu": "Montana",
|
|
||||||
"un": "Ukraine",
|
|
||||||
"utu": "Utah",
|
|
||||||
"ug": "Uganda",
|
|
||||||
"ua": "Egypt",
|
|
||||||
"azu": "Arizona",
|
|
||||||
"uc": "United States Misc. Caribbean Islands",
|
|
||||||
"aa": "Albania",
|
|
||||||
"ae": "Algeria",
|
|
||||||
"ag": "Argentina",
|
|
||||||
"af": "Afghanistan",
|
|
||||||
"ai": "Armenia (Republic)",
|
|
||||||
"inu": "Indiana",
|
|
||||||
"uik": "United Kingdom Misc. Islands",
|
|
||||||
"aj": "Azerbaijan",
|
|
||||||
"am": "Anguilla",
|
|
||||||
"ao": "Angola",
|
|
||||||
"an": "Andorra",
|
|
||||||
"aq": "Antigua and Barbuda",
|
|
||||||
"as": "American Samoa",
|
|
||||||
"au": "Austria",
|
|
||||||
"at": "Australia",
|
|
||||||
"aw": "Aruba",
|
|
||||||
"ay": "Antarctica",
|
|
||||||
"ohu": "Ohio",
|
|
||||||
"nl": "New Caledonia",
|
|
||||||
"-ry": "Ryukyu Islands, Southern",
|
|
||||||
"nn": "Vanuatu",
|
|
||||||
"no": "Norway",
|
|
||||||
"ne": "Netherlands",
|
|
||||||
"ng": "Niger",
|
|
||||||
"nx": "Norfolk Island",
|
|
||||||
"nz": "New Zealand",
|
|
||||||
"np": "Nepal",
|
|
||||||
"nq": "Nicaragua",
|
|
||||||
"nr": "Nigeria",
|
|
||||||
"mdu": "Maryland",
|
|
||||||
"nu": "Nauru",
|
|
||||||
"nw": "Northern Mariana Islands",
|
|
||||||
"wvu": "West Virginia",
|
|
||||||
"-xxr": "Soviet Union",
|
|
||||||
"-tar": "Tajik S.S.R.",
|
|
||||||
"bcc": "British Columbia"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import ox
|
|
||||||
from ox.cache import read_url
|
|
||||||
|
|
||||||
url = "http://www.loc.gov/marc/countries/countries_code.html"
|
|
||||||
data = read_url(url).decode('utf-8')
|
|
||||||
countries = dict([
|
|
||||||
[ox.strip_tags(c) for c in r]
|
|
||||||
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
|
|
||||||
])
|
|
||||||
|
|
||||||
data = json.dumps(countries, indent=4, ensure_ascii=False).encode('utf-8')
|
|
||||||
with open(__file__) as f:
|
|
||||||
pydata = f.read()
|
|
||||||
pydata = re.sub(
|
|
||||||
re.compile('\nCOUNTRIES = {.*?}\n\n', re.DOTALL),
|
|
||||||
'\nCOUNTRIES = %s\n\n' % data, pydata)
|
|
||||||
|
|
||||||
with open(__file__, 'w') as f:
|
|
||||||
f.write(pydata)
|
|
|
@ -1,210 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
import json
|
|
||||||
|
|
||||||
from ox.cache import read_url
|
|
||||||
|
|
||||||
from .dewey import get_classification
|
|
||||||
from .marc_countries import COUNTRIES
|
|
||||||
from .utils import normalize_isbn
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
KEYS = {
|
|
||||||
'authors': 'author',
|
|
||||||
'covers': 'cover',
|
|
||||||
'dewey_decimal_class': 'classification',
|
|
||||||
'isbn_10': 'isbn',
|
|
||||||
'isbn_13': 'isbn',
|
|
||||||
'lccn': 'lccn',
|
|
||||||
'number_of_pages': 'pages',
|
|
||||||
'languages': 'language',
|
|
||||||
'oclc_numbers': 'oclc',
|
|
||||||
'publish_country': 'country',
|
|
||||||
'publish_date': 'date',
|
|
||||||
'publishers': 'publisher',
|
|
||||||
'publish_places': 'place',
|
|
||||||
'series': 'series',
|
|
||||||
'title': 'title',
|
|
||||||
}
|
|
||||||
|
|
||||||
def find(query):
|
|
||||||
query = query.strip()
|
|
||||||
logger.debug('find %s', query)
|
|
||||||
r = api.search(query)
|
|
||||||
results = []
|
|
||||||
ids = [b for b in r.get('result', []) if b.startswith('/books')]
|
|
||||||
books = api.get_many(ids).get('result', [])
|
|
||||||
for olid, value in books.items():
|
|
||||||
olid = olid.split('/')[-1]
|
|
||||||
book = format(value)
|
|
||||||
book['olid'] = [olid]
|
|
||||||
book['primaryid'] = ['olid', olid]
|
|
||||||
results.append(book)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def get_ids(key, value):
|
|
||||||
ids = []
|
|
||||||
if key == 'olid':
|
|
||||||
data = lookup(value)
|
|
||||||
for id in ('isbn', 'lccn', 'oclc'):
|
|
||||||
if id in data:
|
|
||||||
for v in data[id]:
|
|
||||||
if (id, v) not in ids:
|
|
||||||
ids.append((id, v))
|
|
||||||
elif key in ('isbn', 'oclc', 'lccn'):
|
|
||||||
logger.debug('get_ids %s %s', key, value)
|
|
||||||
if key == 'isbn':
|
|
||||||
key = 'isbn_%s'%len(value)
|
|
||||||
r = api.things({'type': '/type/edition', key: value})
|
|
||||||
for b in r.get('result', []):
|
|
||||||
if b.startswith('/books'):
|
|
||||||
olid = b.split('/')[-1]
|
|
||||||
for kv in [('olid', olid)] + get_ids('olid', olid):
|
|
||||||
if kv not in ids:
|
|
||||||
ids.append(kv)
|
|
||||||
if ids:
|
|
||||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def lookup(id, return_all=False):
|
|
||||||
logger.debug('lookup %s', id)
|
|
||||||
info = api.get('/books/' + id).get('result', {})
|
|
||||||
#url = 'https://openlibrary.org/books/%s.json' % id
|
|
||||||
#info = json.loads(read_url(url).decode('utf-8'))
|
|
||||||
data = format(info, return_all)
|
|
||||||
if 'olid' not in data:
|
|
||||||
data['olid'] = []
|
|
||||||
if id not in data['olid']:
|
|
||||||
data['olid'] = [id]
|
|
||||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
|
||||||
return data
|
|
||||||
|
|
||||||
def get_type(obj):
|
|
||||||
type_ = obj.get('type')
|
|
||||||
if isinstance(type_, dict):
|
|
||||||
type_ = type_['key']
|
|
||||||
return type_
|
|
||||||
|
|
||||||
def parse_date(s):
|
|
||||||
#"January 1, 1998"
|
|
||||||
for pattern, fmt in (('%B %d, %Y', '%Y-%m-%d'), ('%B %Y', '%Y-%m')):
|
|
||||||
try:
|
|
||||||
d = datetime.strptime(s, pattern)
|
|
||||||
s = d.strftime(fmt)
|
|
||||||
return s
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return s
|
|
||||||
|
|
||||||
def format(info, return_all=False):
|
|
||||||
data = {}
|
|
||||||
if 'works' in info:
|
|
||||||
work = api.get(info['works'][0]['key'])['result']
|
|
||||||
else:
|
|
||||||
work = None
|
|
||||||
for key in KEYS:
|
|
||||||
if key in info:
|
|
||||||
value = info[key]
|
|
||||||
if key == 'authors':
|
|
||||||
if work:
|
|
||||||
value = resolve_names([r['author']
|
|
||||||
for r in work.get('authors', []) if get_type(r) == '/type/author_role'])
|
|
||||||
else:
|
|
||||||
value = resolve_names(value)
|
|
||||||
elif key == 'publish_country':
|
|
||||||
value = value.strip()
|
|
||||||
value = COUNTRIES.get(value, value)
|
|
||||||
elif key == 'covers':
|
|
||||||
value = 'https://covers.openlibrary.org/b/id/%s.jpg' % value[0]
|
|
||||||
elif key == 'languages':
|
|
||||||
value = resolve_names(value)
|
|
||||||
elif key in ('isbn_10', 'isbn_13'):
|
|
||||||
if not isinstance(value, list):
|
|
||||||
value = [value]
|
|
||||||
value = list(map(normalize_isbn, value))
|
|
||||||
if KEYS[key] in data:
|
|
||||||
value = data[KEYS[key]] + value
|
|
||||||
elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'):
|
|
||||||
value = value[0]
|
|
||||||
if key == 'publish_date':
|
|
||||||
value = parse_date(value)
|
|
||||||
if key == 'publish_places':
|
|
||||||
for i, v in enumerate(value):
|
|
||||||
if v.startswith('['):
|
|
||||||
v = v[1:]
|
|
||||||
if v.endswith(']'):
|
|
||||||
v = v[:-1]
|
|
||||||
value[i] = v
|
|
||||||
data[KEYS[key]] = value
|
|
||||||
if 'subtitle' in info:
|
|
||||||
data['title'] += ' ' + info['subtitle']
|
|
||||||
if 'classification' in data:
|
|
||||||
value = data['classification']
|
|
||||||
if isinstance(value, list):
|
|
||||||
value = value[0]
|
|
||||||
data['classification'] = get_classification(value.split('/')[0])
|
|
||||||
return data
|
|
||||||
|
|
||||||
def resolve_names(objects, key='name'):
|
|
||||||
r = []
|
|
||||||
data = api.get_many([k['key'] for k in objects]).get('result', {})
|
|
||||||
for k, value in data.items():
|
|
||||||
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
|
|
||||||
value = api.get(value['location']).get('result', {})
|
|
||||||
r.append(value[key])
|
|
||||||
return r
|
|
||||||
|
|
||||||
class API(object):
|
|
||||||
base = 'https://openlibrary.org/api'
|
|
||||||
|
|
||||||
def _request(self, action, data, timeout=None):
|
|
||||||
for key in data:
|
|
||||||
if not isinstance(data[key], str):
|
|
||||||
data[key] = json.dumps(data[key])
|
|
||||||
url = self.base + '/' + action + '?' + urlencode(data)
|
|
||||||
if timeout is None:
|
|
||||||
r = read_url(url).decode('utf-8')
|
|
||||||
if '504 Gateway Time-out' in r:
|
|
||||||
r = read_url(url, timeout=-1).decode('utf-8')
|
|
||||||
result = json.loads(r)
|
|
||||||
else:
|
|
||||||
r = read_url(url, timeout).decode('utf-8')
|
|
||||||
if '504 Gateway Time-out' in r:
|
|
||||||
r = read_url(url, timeout=-1).decode('utf-8')
|
|
||||||
result = json.loads(r)
|
|
||||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
|
||||||
logger.info('FAILED %s %s', action, data)
|
|
||||||
logger.info('URL %s', url)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
data = self._request('get', {'key': key})
|
|
||||||
return data
|
|
||||||
|
|
||||||
def get_many(self, keys):
|
|
||||||
data = self._request('get_many', {'keys': keys})
|
|
||||||
return data
|
|
||||||
|
|
||||||
def search(self, query):
|
|
||||||
if isinstance(query, str):
|
|
||||||
query = {
|
|
||||||
'query': query
|
|
||||||
}
|
|
||||||
data = self._request('search', {'q': query})
|
|
||||||
if 'status' in data and data['status'] == 'error':
|
|
||||||
logger.info('FAILED %s', query)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def things(self, query):
|
|
||||||
data = self._request('things', {'query': query})
|
|
||||||
return data
|
|
||||||
|
|
||||||
api = API()
|
|
|
@ -1,116 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
import hashlib
|
|
||||||
|
|
||||||
from ox.cache import read_url
|
|
||||||
import lxml.html
|
|
||||||
import stdnum.isbn
|
|
||||||
|
|
||||||
from .utils import normalize_isbn
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
base_url = 'http://www.worldcat.org'
|
|
||||||
|
|
||||||
def get_ids(key, value):
|
|
||||||
ids = []
|
|
||||||
if key == 'isbn':
|
|
||||||
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
|
||||||
html = read_url(url).decode('utf-8')
|
|
||||||
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
|
||||||
if matches:
|
|
||||||
info = lookup(matches[0])
|
|
||||||
ids.append(('oclc', matches[0]))
|
|
||||||
for v in info.get('isbn', []):
|
|
||||||
if v != value:
|
|
||||||
ids.append(('isbn', v))
|
|
||||||
elif key == 'oclc':
|
|
||||||
info = lookup(value)
|
|
||||||
if 'isbn' in info:
|
|
||||||
for value in info['isbn']:
|
|
||||||
ids.append(('isbn', value))
|
|
||||||
if ids:
|
|
||||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def lookup(id):
|
|
||||||
data = {
|
|
||||||
'oclc': [id]
|
|
||||||
}
|
|
||||||
url = '%s/oclc/%s' % (base_url, id)
|
|
||||||
html = read_url(url).decode('utf-8')
|
|
||||||
doc = lxml.html.document_fromstring(html)
|
|
||||||
for e in doc.xpath("//*[contains(@id, 'bibtip')]"):
|
|
||||||
key = e.attrib['id'].replace('bibtip_', '')
|
|
||||||
value = e.text_content().strip()
|
|
||||||
if value:
|
|
||||||
data[key] = value
|
|
||||||
info = doc.xpath('//textarea[@id="util-em-note"]')
|
|
||||||
if info:
|
|
||||||
info = info[0].text
|
|
||||||
info = dict([i.split(':', 1) for i in info.split('\n\n')[1].split('\n')])
|
|
||||||
for key in info:
|
|
||||||
k = key.lower()
|
|
||||||
value = info[key].strip()
|
|
||||||
if value:
|
|
||||||
data[k] = value
|
|
||||||
for key in ('id', 'instance', 'mediatype', 'reclist', 'shorttitle'):
|
|
||||||
if key in data:
|
|
||||||
del data[key]
|
|
||||||
if 'isxn' in data:
|
|
||||||
for isbn in data.pop('isxn').split(' '):
|
|
||||||
isbn = normalize_isbn(isbn)
|
|
||||||
if stdnum.isbn.is_valid(isbn):
|
|
||||||
if not 'isbn' in data:
|
|
||||||
data['isbn'] = []
|
|
||||||
if isbn not in data['isbn']:
|
|
||||||
data['isbn'].append(isbn)
|
|
||||||
cover = doc.xpath('//img[@class="cover"]')
|
|
||||||
if cover:
|
|
||||||
data['cover'] = cover[0].attrib['src']
|
|
||||||
if data['cover'].startswith('//'):
|
|
||||||
data['cover'] = 'http:' + data['cover']
|
|
||||||
cdata = read_url(data['cover'])
|
|
||||||
if hashlib.sha1(cdata).hexdigest() in (
|
|
||||||
'd2e9ab0c87193d69a7d3a3c21ae4aa550f7dcf00',
|
|
||||||
'70f16d3e077cdd47ef6b331001dbb1963677fa04'
|
|
||||||
):
|
|
||||||
del data['cover']
|
|
||||||
|
|
||||||
if 'author' in data:
|
|
||||||
data['author'] = data['author'].split('; ')
|
|
||||||
if 'title' in data:
|
|
||||||
data['title'] = data['title'].replace(' : ', ': ')
|
|
||||||
if 'publisher' in data:
|
|
||||||
m = re.compile('(.+) : (.+), (\d{4})').findall(data['publisher'])
|
|
||||||
if m:
|
|
||||||
place, publisher, date = m[0]
|
|
||||||
data['publisher'] = publisher
|
|
||||||
data['date'] = date
|
|
||||||
data['place'] = [place]
|
|
||||||
elif ':' in data['publisher']:
|
|
||||||
place, publisher = data['publisher'].split(':', 1)
|
|
||||||
data['place'] = [place.strip()]
|
|
||||||
data['publisher'] = publisher.split(',')[0].strip()
|
|
||||||
m = re.compile('\d{4}').findall(publisher)
|
|
||||||
if m:
|
|
||||||
data['date'] = m[0]
|
|
||||||
|
|
||||||
if 'place' in data:
|
|
||||||
if data['place'][0].startswith('['):
|
|
||||||
data['place'] = [data['place'][0][1:]]
|
|
||||||
if data['place'][0].endswith(']'):
|
|
||||||
data['place'] = [data['place'][0][:-1]]
|
|
||||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
|
||||||
return data
|
|
||||||
|
|
||||||
info = lookup
|
|
||||||
|
|
||||||
def find(title, author, year):
|
|
||||||
return []
|
|
||||||
|
|
Loading…
Reference in a new issue