remove unused scrapers
This commit is contained in:
parent
47647a7b86
commit
a172e7b4b7
8 changed files with 0 additions and 1951 deletions
|
@ -5,11 +5,6 @@
|
|||
import stdnum.isbn
|
||||
import ox
|
||||
|
||||
from . import abebooks
|
||||
from . import loc
|
||||
from . import lookupbyisbn
|
||||
from . import openlibrary
|
||||
from . import worldcat
|
||||
from . import google
|
||||
from . import duckduckgo
|
||||
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
|
||||
import re
|
||||
|
||||
from ox.cache import read_url
|
||||
import lxml.html
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
base = 'http://www.abebooks.com'
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'isbn':
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, value)
|
||||
data = read_url(url, unicode=True)
|
||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
||||
if urls:
|
||||
ids.append((key, value))
|
||||
if ids:
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
logger.debug('lookup %s', id)
|
||||
data = {}
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||
html = read_url(url, unicode=True)
|
||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
|
||||
keys = {
|
||||
'pubdate': 'date'
|
||||
}
|
||||
if urls:
|
||||
details = '%s%s' % (base, urls[0])
|
||||
html = read_url(details, unicode=True)
|
||||
doc = lxml.html.document_fromstring(html)
|
||||
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
||||
key = e.attrib['id'].replace('biblio-', '')
|
||||
value = e.text_content().strip()
|
||||
k = keys.get(key, key)
|
||||
if k == 'date' and value == 'Publication Date:':
|
||||
value = ''
|
||||
elif k == 'publisher' and value == 'Publisher:':
|
||||
value = ''
|
||||
if value and key not in ('bookcondition', 'binding', 'edition-amz'):
|
||||
data[k] = value
|
||||
return data
|
|
@ -1,962 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
def get_classification(id):
|
||||
name = '%s' % id
|
||||
base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()])
|
||||
if base != '0':
|
||||
base = base.lstrip('0')
|
||||
if base in DEWEY:
|
||||
name = '%s %s' % (name, DEWEY[base])
|
||||
return name
|
||||
|
||||
DEWEY = {
|
||||
"0": "Computer science, information & general works",
|
||||
"1": "Philosophy & psychology",
|
||||
"10": "Philosophy",
|
||||
"100": "Philosophy, parapsychology and occultism, psychology",
|
||||
"101": "Theory of philosophy",
|
||||
"102": "Miscellany of philosophy",
|
||||
"103": "Dictionaries, encyclopedias, concordances of philosophy",
|
||||
"105": "Serial publications",
|
||||
"106": "Organizations and management of philosophy",
|
||||
"107": "Education, research, related topics of philosophy",
|
||||
"108": "Groups of people",
|
||||
"109": "Historical and collected persons treatment of philosophy",
|
||||
"11": "Metaphysics",
|
||||
"110": "Metaphysics",
|
||||
"111": "Ontology",
|
||||
"113": "Cosmology (Philosophy of nature)",
|
||||
"114": "Space",
|
||||
"115": "Time",
|
||||
"116": "Change",
|
||||
"117": "Structure",
|
||||
"118": "Force and energy",
|
||||
"119": "Number and quantity",
|
||||
"12": "Epistemology",
|
||||
"120": "Epistemology, causation & humankind",
|
||||
"121": "Epistemology (Theory of knowledge)",
|
||||
"122": "Causation",
|
||||
"123": "Determinism and indeterminism",
|
||||
"124": "Teleology",
|
||||
"126": "The self",
|
||||
"127": "The unconscious and the subconscious",
|
||||
"128": "Humankind",
|
||||
"129": "Origin and destiny of individual souls",
|
||||
"13": "Parapsychology & occultism",
|
||||
"130": "Parapsychology and occultism",
|
||||
"131": "Parapsychological and occult techniques for achieving well-being, happiness, success",
|
||||
"133": "Specific topics in parapsychology & occultism",
|
||||
"135": "Dreams and mysteries",
|
||||
"137": "Divinatory graphology",
|
||||
"138": "Physiognomy",
|
||||
"139": "Phrenology",
|
||||
"14": "Philosophical schools of thought",
|
||||
"140": "Specific philosophical schools",
|
||||
"141": "Idealism & related systems",
|
||||
"142": "Critical philosophy",
|
||||
"143": "Bergsonism and intuitionism",
|
||||
"144": "Humanism and related systems and doctrines",
|
||||
"145": "Sensationalism",
|
||||
"146": "Naturalism and related systems and doctrines",
|
||||
"147": "Pantheism and related systems and doctrines",
|
||||
"148": "Dogmatism, eclecticism, liberalism, syncretism, traditionalism",
|
||||
"149": "Other philosophical systems",
|
||||
"15": "Psychology",
|
||||
"150": "Psychology",
|
||||
"152": "Sensory perception, movement, emotions, physiological drives",
|
||||
"153": "Conscious mental processes and intelligence",
|
||||
"154": "Subconscious and altered states and processes",
|
||||
"155": "Differential and developmental psychology",
|
||||
"156": "Comparative psychology",
|
||||
"158": "Applied psychology",
|
||||
"16": "Philosophical logic",
|
||||
"160": "Logic",
|
||||
"161": "Induction",
|
||||
"162": "Deduction",
|
||||
"165": "Fallacies and sources of error",
|
||||
"166": "Syllogisms",
|
||||
"167": "Hypotheses",
|
||||
"168": "Argument and persuasion",
|
||||
"169": "Analogy",
|
||||
"17": "Ethics",
|
||||
"170": "Ethics",
|
||||
"171": "Ethical systems",
|
||||
"172": "Political ethics",
|
||||
"173": "Ethics of family relationships",
|
||||
"174": "Occupational ethics",
|
||||
"175": "Ethics of recreation, leisure, public performances, communication",
|
||||
"176": "Ethics of sex and reproduction",
|
||||
"177": "Ethics of social relations",
|
||||
"178": "Ethics of consumption",
|
||||
"179": "Other ethical norms",
|
||||
"18": "Ancient, medieval & eastern philosophy",
|
||||
"180": "Ancient, medieval, eastern philosophy",
|
||||
"181": "Eastern philosophy",
|
||||
"182": "Pre-Socratic Greek philosophies",
|
||||
"183": "Sophistic, Socratic, related Greek philosophies",
|
||||
"184": "Platonic philosophy",
|
||||
"185": "Aristotelian philosophy",
|
||||
"186": "Skeptic and Neoplatonic philosophies",
|
||||
"187": "Epicurean philosophy",
|
||||
"188": "Stoic philosophy",
|
||||
"189": "Medieval western philosophy",
|
||||
"19": "Modern western philosophy",
|
||||
"190": "Modern western and other noneastern philosophy",
|
||||
"191": "United States and Canada",
|
||||
"192": "Philosophy of British Isles",
|
||||
"193": "Philosophy of Germany and Austria",
|
||||
"194": "Philosophy of France",
|
||||
"195": "Philosophy of Italy",
|
||||
"196": "Philosophy of Spain and Portugal",
|
||||
"197": "Philosophy of Russia",
|
||||
"198": "Philosophy of Scandinavia and Finland",
|
||||
"199": "Philosophy in other geographic areas",
|
||||
"2": "Religion",
|
||||
"20": "Religion",
|
||||
"200": "Religion",
|
||||
"201": "Religious mythology, general classes of religion, interreligious relations and attitudes, social theology",
|
||||
"202": "Doctrines",
|
||||
"203": "Public worship and other practices",
|
||||
"204": "Religious experience, life, practice",
|
||||
"205": "Religious ethics",
|
||||
"206": "Leaders & organization",
|
||||
"207": "Missions & religious education",
|
||||
"208": "Sources",
|
||||
"209": "Sects and reform movements",
|
||||
"21": "Philosophy & theory of religion",
|
||||
"210": "Philosophy & theory of religion",
|
||||
"211": "Concepts of God",
|
||||
"212": "Existence of God, ways of knowing God, attributes of God",
|
||||
"213": "Creation",
|
||||
"214": "Theodicy",
|
||||
"215": "Science and religion",
|
||||
"218": "Humankind",
|
||||
"22": "The Bible",
|
||||
"220": "Bible",
|
||||
"221": "Old Testament (Tanakh)",
|
||||
"222": "Historical books of Old Testament",
|
||||
"223": "Poetic books of Old Testament",
|
||||
"224": "Prophetic books of Old Testament",
|
||||
"225": "New Testament",
|
||||
"226": "Gospels and Acts",
|
||||
"227": "Epistles",
|
||||
"228": "Revelation (Apocalypse)",
|
||||
"229": "Apocrypha & pseudepigrapha",
|
||||
"23": "Christianity",
|
||||
"230": "Christianity Christian theology",
|
||||
"231": "God",
|
||||
"232": "Jesus Christ and his family",
|
||||
"233": "Humankind",
|
||||
"234": "Salvation and grace",
|
||||
"235": "Spiritual beings",
|
||||
"236": "Eschatology",
|
||||
"238": "Creeds, confessions of faith, covenants, catechisms",
|
||||
"239": "Apologetics and polemics",
|
||||
"24": "Christian practice & observance",
|
||||
"240": "Christian moral & devotional theology",
|
||||
"241": "Christian ethics",
|
||||
"242": "Devotional literature",
|
||||
"243": "Evangelistic writings for individuals and families",
|
||||
"246": "Use of art in Christianity",
|
||||
"247": "Church furnishings and related articles",
|
||||
"248": "Christian experience, practice, life",
|
||||
"249": "Christian observances in family life",
|
||||
"25": "Christian pastoral practice & religious orders",
|
||||
"250": "Local Christian church and Christian religious orders",
|
||||
"251": "Preaching (Homiletics)",
|
||||
"252": "Texts of sermons",
|
||||
"253": "Pastoral office and work (Pastoral theology)",
|
||||
"254": "Parish administration",
|
||||
"255": "Religious congregations & orders",
|
||||
"259": "Pastoral care of specific kinds of persons",
|
||||
"26": "Christian organization, social work & worship",
|
||||
"260": "Christian social and ecclesiastical theology",
|
||||
"261": "Social theology and interreligious relations and attitudes",
|
||||
"262": "Ecclesiology",
|
||||
"263": "Days, times & places of observance",
|
||||
"264": "Public worship",
|
||||
"265": "Sacraments, other rites and acts",
|
||||
"266": "Missions",
|
||||
"267": "Associations for religious work",
|
||||
"268": "Religious education",
|
||||
"269": "Spiritual renewal",
|
||||
"27": "History of Christianity",
|
||||
"270": "History of Christianity & Christian church",
|
||||
"271": "Religious congregations and orders in church history",
|
||||
"272": "Persecutions in general church history",
|
||||
"273": "Doctrinal controversies and heresies in general church history",
|
||||
"274": "Christianity in Europe",
|
||||
"275": "History of Christianity in Asia",
|
||||
"276": "Christianity in Africa",
|
||||
"277": "Christianity in North America",
|
||||
"278": "Christianity in South America",
|
||||
"279": "Christianity in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica",
|
||||
"28": "Christian denominations",
|
||||
"280": "Denominations and sects of Christian church",
|
||||
"281": "Early church and Eastern churches",
|
||||
"282": "Roman Catholic Church",
|
||||
"283": "Anglican churches",
|
||||
"284": "Protestant denominations of Continental origin and related bodies",
|
||||
"285": "Presbyterian churches, Reformed churches centered in America, Congregational churches, Puritanism",
|
||||
"286": "Baptist, Restoration movement, Adventist churches",
|
||||
"287": "Methodist churches; churches related to Methodism",
|
||||
"289": "Other denominations & sects",
|
||||
"29": "Other religions",
|
||||
"290": "Other religions",
|
||||
"292": "Classical religion (Greek and Roman religion)",
|
||||
"293": "Germanic religion",
|
||||
"294": "Religions of Indic origin",
|
||||
"295": "Zoroastrianism (Mazdaism, Parseeism)",
|
||||
"296": "Judaism",
|
||||
"297": "Islam, Babism, Bahai Faith",
|
||||
"299": "Religions not provided for elsewhere",
|
||||
"3": "Social sciences",
|
||||
"30": "Social sciences, sociology & anthropology",
|
||||
"300": "Social sciences",
|
||||
"301": "Sociology and anthropology",
|
||||
"302": "Social interaction",
|
||||
"303": "Social processes",
|
||||
"304": "Factors affecting social behavior",
|
||||
"305": "Groups of people",
|
||||
"306": "Culture and institutions",
|
||||
"307": "Communities",
|
||||
"31": "Statistics",
|
||||
"310": "Collections of general statistics",
|
||||
"314": "General statistics of Europe",
|
||||
"315": "General statistics of Asia",
|
||||
"316": "General statistics of Africa",
|
||||
"317": "General statistics of North America",
|
||||
"318": "General statistics of South America",
|
||||
"319": "General statistics of other parts of the world Of Pacific Ocean islands",
|
||||
"32": "Political science",
|
||||
"320": "Political science (Politics and government)",
|
||||
"321": "Systems of governments and states",
|
||||
"322": "Relation of state to organized groups",
|
||||
"323": "Civil and political rights",
|
||||
"324": "The political process",
|
||||
"325": "International migration and colonization",
|
||||
"326": "Slavery and emancipation",
|
||||
"327": "International relations",
|
||||
"328": "The legislative process",
|
||||
"33": "Economics",
|
||||
"330": "Economics",
|
||||
"331": "Labor economics",
|
||||
"332": "Financial economics",
|
||||
"333": "Economics of land and energy",
|
||||
"334": "Cooperatives",
|
||||
"335": "Socialism and related systems",
|
||||
"336": "Public finance",
|
||||
"337": "International economics",
|
||||
"338": "Production",
|
||||
"339": "Macroeconomics and related topics",
|
||||
"34": "Law",
|
||||
"340": "Law",
|
||||
"341": "Law of nations",
|
||||
"342": "Constitutional and administrative law",
|
||||
"343": "Military, defense, public property, public finance, tax, commerce (trade), industrial law",
|
||||
"344": "Labor, social, education & cultural law",
|
||||
"345": "Criminal law",
|
||||
"346": "Private law",
|
||||
"347": "Procedure and courts",
|
||||
"348": "Laws, regulations, cases",
|
||||
"349": "Law of specific jurisdictions, areas, socioeconomic regions, regional intergovernmental organizations",
|
||||
"35": "Public administration & military science",
|
||||
"350": "Public administration and military science",
|
||||
"351": "Public administration",
|
||||
"352": "General considerations of public administration",
|
||||
"353": "Specific fields of public administration",
|
||||
"354": "Public administration of economy and environment",
|
||||
"355": "Military science",
|
||||
"356": "Foot forces and warfare",
|
||||
"357": "Mounted forces & warfare",
|
||||
"358": "Air and other specialized forces and warfare; engineering and related services",
|
||||
"359": "Sea forces and warfare",
|
||||
"36": "Social problems & social services",
|
||||
"360": "Social problems & social services",
|
||||
"361": "Social problems & social welfare in general",
|
||||
"362": "Social welfare problems and services",
|
||||
"363": "Other social problems and services",
|
||||
"364": "Criminology",
|
||||
"365": "Penal and related institutions",
|
||||
"366": "Secret associations and societies",
|
||||
"367": "General clubs",
|
||||
"368": "Insurance",
|
||||
"369": "Miscellaneous kinds of associations",
|
||||
"37": "Education",
|
||||
"370": "Education",
|
||||
"371": "Schools and their activities; special education",
|
||||
"372": "Primary education (Elementary education)",
|
||||
"373": "Secondary education",
|
||||
"374": "Adult education",
|
||||
"375": "Curricula",
|
||||
"378": "Higher education (Tertiary education)",
|
||||
"379": "Public policy issues in education",
|
||||
"38": "Commerce, communications & transportation",
|
||||
"380": "Commerce, communications, transportation",
|
||||
"381": "Commerce (Trade)",
|
||||
"382": "International commerce (Foreign trade)",
|
||||
"383": "Postal communication",
|
||||
"384": "Communications",
|
||||
"385": "Railroad transportation",
|
||||
"386": "Inland waterway & ferry transportation",
|
||||
"387": "Water, air & space transportation",
|
||||
"388": "Transportation",
|
||||
"389": "Metrology and standardization",
|
||||
"39": "Customs, etiquette & folklore",
|
||||
"390": "Customs, etiquette, folklore",
|
||||
"391": "Costume and personal appearance",
|
||||
"392": "Customs of life cycle and domestic life",
|
||||
"393": "Death customs",
|
||||
"394": "General customs",
|
||||
"395": "Etiquette (Manners)",
|
||||
"398": "Folklore",
|
||||
"399": "Customs of war and diplomacy",
|
||||
"4": "Language",
|
||||
"40": "Language",
|
||||
"400": "Language",
|
||||
"401": "Philosophy and theory; international languages",
|
||||
"402": "Miscellany",
|
||||
"403": "Dictionaries, encyclopedias, concordances",
|
||||
"404": "Special topics of language",
|
||||
"405": "Serial publications",
|
||||
"406": "Organizations and management",
|
||||
"407": "Education, research & related topics",
|
||||
"408": "Groups of people",
|
||||
"409": "Geographic treatment and biography",
|
||||
"41": "Linguistics",
|
||||
"410": "Linguistics",
|
||||
"411": "Writing systems",
|
||||
"412": "Etymology of standard forms of languages",
|
||||
"413": "Dictionaries of standard forms of languages",
|
||||
"414": "Phonology & phonetics",
|
||||
"415": "Grammar of standard forms of languages",
|
||||
"417": "Dialectology and historical linguistics",
|
||||
"418": "Standard usage (Prescriptive linguistics)",
|
||||
"419": "Sign languages",
|
||||
"42": "English & Old English languages",
|
||||
"420": "English & Old English languages",
|
||||
"421": "Writing system, phonology, phonetics of standard English",
|
||||
"422": "Etymology of standard English",
|
||||
"423": "Dictionaries of standard English",
|
||||
"425": "Grammar of standard English",
|
||||
"427": "Historical and geographic variations, modern nongeographic variations of English",
|
||||
"428": "Standard English usage (Prescriptive linguistics)",
|
||||
"429": "Old English (Anglo-Saxon)",
|
||||
"43": "German & related languages",
|
||||
"430": "German & related languages",
|
||||
"431": "German writing systems & phonology",
|
||||
"432": "Etymology of standard German",
|
||||
"433": "Dictionaries of standard German",
|
||||
"435": "Grammar of standard German",
|
||||
"437": "Historical and geographic variations, modern nongeographic variations of German",
|
||||
"438": "Standard German usage",
|
||||
"439": "Other Germanic languages",
|
||||
"44": "French & related languages",
|
||||
"440": "Romance languages French",
|
||||
"441": "Writing systems, phonology, phonetics of standard French",
|
||||
"442": "Etymology of standard French",
|
||||
"443": "Dictionaries of standard French",
|
||||
"445": "Grammar of standard French",
|
||||
"447": "Historical and geographic variations, modern nongeographic variations of French",
|
||||
"448": "Standard French usage (Prescriptive linguistics)",
|
||||
"449": "Occitan, Catalan, Franco-Provençal",
|
||||
"45": "Italian, Romanian & related languages",
|
||||
"450": "Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican",
|
||||
"451": "Writing systems, phonology, phonetics of standard Italian",
|
||||
"452": "Etymology of standard Italian",
|
||||
"453": "Dictionaries of standard Italian",
|
||||
"455": "Grammar of standard Italian",
|
||||
"457": "Historical and geographic variations, modern nongeographic variations of Italian",
|
||||
"458": "Standard Italian usage",
|
||||
"459": "Sardinian",
|
||||
"46": "Spanish, Portuguese, Galician",
|
||||
"460": "Spanish, Portuguese, Galician",
|
||||
"461": "Writing systems, phonology, phonetics of standard Spanish",
|
||||
"462": "Etymology of standard Spanish",
|
||||
"463": "Dictionaries of standard Spanish",
|
||||
"465": "Grammar of standard Spanish",
|
||||
"467": "Historical and geographic variations, modern nongeographic variations of Spanish",
|
||||
"468": "Standard Spanish usage",
|
||||
"469": "Portuguese",
|
||||
"47": "Latin & Italic languages",
|
||||
"470": "Italic languages Latin",
|
||||
"471": "Writing systems, phonology, phonetics of classical Latin",
|
||||
"472": "Classical Latin etymology",
|
||||
"473": "Dictionaries of classical Latin",
|
||||
"475": "Grammar of classical Latin",
|
||||
"477": "Old, postclassical & Vulgar Latin",
|
||||
"478": "Classical Latin usage (Prescriptive linguistics)",
|
||||
"479": "Other Italic languages",
|
||||
"48": "Classical & modern Greek languages",
|
||||
"480": "Classical Greek and related Hellenic languages",
|
||||
"481": "Writing systems, phonology, phonetics of classical Greek",
|
||||
"482": "Etymology of classical Greek",
|
||||
"483": "Dictionaries of classical Greek",
|
||||
"485": "Grammar of classical Greek",
|
||||
"487": "Preclassical and postclassical Greek",
|
||||
"488": "Classical Greek usage (Prescriptive linguistics)",
|
||||
"489": "Other Hellenic languages",
|
||||
"49": "Other languages",
|
||||
"490": "Other languages",
|
||||
"491": "East Indo-European and Celtic languages",
|
||||
"492": "Afro-Asiatic languages",
|
||||
"493": "Non-Semitic Afro-Asiatic languages",
|
||||
"494": "Altaic, Uralic, Hyperborean, Dravidian languages, miscellaneous languages of south Asia",
|
||||
"495": "Languages of east and southeast Asia",
|
||||
"496": "African languages",
|
||||
"497": "North American native languages",
|
||||
"498": "South American native languages",
|
||||
"499": "Austronesian & other languages",
|
||||
"5": "Science",
|
||||
"50": "Science",
|
||||
"500": "Science",
|
||||
"501": "Philosophy & theory",
|
||||
"502": "Miscellany",
|
||||
"503": "Dictionaries, encyclopedias, concordances",
|
||||
"505": "Serial publications",
|
||||
"506": "Organizations and management",
|
||||
"507": "Education, research, related topics",
|
||||
"508": "Natural history",
|
||||
"509": "Historical, geographic & persons treatment",
|
||||
"51": "Mathematics",
|
||||
"510": "Mathematics",
|
||||
"511": "General principles of mathematics",
|
||||
"512": "Algebra",
|
||||
"513": "Arithmetic",
|
||||
"514": "Topology",
|
||||
"515": "Analysis",
|
||||
"516": "Geometry",
|
||||
"518": "Numerical analysis",
|
||||
"519": "Probabilities and applied mathematics",
|
||||
"52": "Astronomy",
|
||||
"520": "Astronomy and allied sciences",
|
||||
"521": "Celestial mechanics",
|
||||
"522": "Techniques, procedures, apparatus, equipment, materials",
|
||||
"523": "Specific celestial bodies and phenomena",
|
||||
"525": "Earth (Astronomical geography)",
|
||||
"526": "Mathematical geography",
|
||||
"527": "Celestial navigation",
|
||||
"528": "Ephemerides",
|
||||
"529": "Chronology",
|
||||
"53": "Physics",
|
||||
"530": "Physics",
|
||||
"531": "Classical mechanics",
|
||||
"532": "Fluid mechanics; liquid mechanics",
|
||||
"533": "Pneumatics (Gas mechanics)",
|
||||
"534": "Sound and related vibrations",
|
||||
"535": "Light and infrared and ultraviolet phenomena",
|
||||
"536": "Heat",
|
||||
"537": "Electricity & electronics",
|
||||
"538": "Magnetism",
|
||||
"539": "Modern physics",
|
||||
"54": "Chemistry",
|
||||
"540": "Chemistry and allied sciences",
|
||||
"541": "Physical chemistry",
|
||||
"542": "Techniques, equipment & materials",
|
||||
"543": "Analytical chemistry",
|
||||
"546": "Inorganic chemistry",
|
||||
"547": "Organic chemistry",
|
||||
"548": "Crystallography",
|
||||
"549": "Mineralogy",
|
||||
"55": "Earth sciences & geology",
|
||||
"550": "Earth sciences",
|
||||
"551": "Geology, hydrology, meteorology",
|
||||
"552": "Petrology",
|
||||
"553": "Economic geology",
|
||||
"554": "Earth sciences of Europe",
|
||||
"555": "Earth sciences of Asia",
|
||||
"556": "Earth sciences of Africa",
|
||||
"557": "Earth sciences of North America",
|
||||
"558": "Earth sciences of South America",
|
||||
"559": "Earth sciences of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
|
||||
"56": "Fossils & prehistoric life",
|
||||
"560": "Paleontology",
|
||||
"561": "Paleobotany; fossil microorganisms",
|
||||
"562": "Fossil invertebrates",
|
||||
"563": "Miscellaneous fossil marine and seashore invertebrates",
|
||||
"564": "Fossil Mollusca and Molluscoidea",
|
||||
"565": "Fossil Arthropoda",
|
||||
"566": "Fossil Chordata",
|
||||
"567": "Fossil cold-blooded vertebrates",
|
||||
"568": "Fossil birds",
|
||||
"569": "Fossil mammals",
|
||||
"57": "Biology",
|
||||
"570": "Life sciences Biology",
|
||||
"571": "Physiology and related subjects",
|
||||
"572": "Biochemistry",
|
||||
"573": "Specific physiological systems in animals, regional histology and physiology in animals",
|
||||
"575": "Specific parts of and physiological systems in plants",
|
||||
"576": "Genetics and evolution",
|
||||
"577": "Ecology",
|
||||
"578": "Natural history of organisms and related subjects",
|
||||
"579": "Microorganisms, fungi, algae",
|
||||
"58": "Plants (Botany)",
|
||||
"580": "Plants",
|
||||
"581": "Specific topics in natural history of plants",
|
||||
"582": "Plants noted for specific vegetative characteristics and flowers",
|
||||
"583": "Dicotyledons",
|
||||
"584": "Monocotyledons",
|
||||
"585": "Pinophyta (Gymnosperms)",
|
||||
"586": "Seedless plants",
|
||||
"587": "Vascular seedless plants",
|
||||
"588": "Bryophyta",
|
||||
"59": "Animals (Zoology)",
|
||||
"590": "Animals",
|
||||
"591": "Specific topics in natural history",
|
||||
"592": "Invertebrates",
|
||||
"593": "Miscellaneous marine and seashore invertebrates",
|
||||
"594": "Mollusks & molluscoids",
|
||||
"595": "Arthropoda",
|
||||
"596": "Chordata",
|
||||
"597": "Cold-blooded vertebrates",
|
||||
"598": "Aves (Birds)",
|
||||
"599": "Mammalia (Mammals)",
|
||||
"6": "Technology",
|
||||
"60": "Technology",
|
||||
"600": "Technology",
|
||||
"601": "Philosophy and theory",
|
||||
"602": "Miscellany",
|
||||
"603": "Dictionaries & encyclopedias",
|
||||
"604": "Technical drawing, hazardous materials technology; groups of people",
|
||||
"605": "Serial publications",
|
||||
"606": "Organizations",
|
||||
"607": "Education, research, related topics",
|
||||
"608": "Patents",
|
||||
"609": "Historical, geographic, persons treatment",
|
||||
"61": "Medicine & health",
|
||||
"610": "Medicine and health",
|
||||
"611": "Human anatomy, cytology, histology",
|
||||
"612": "Human physiology",
|
||||
"613": "Personal health and safety",
|
||||
"614": "Forensic medicine; incidence of injuries, wounds, disease; public preventive medicine",
|
||||
"615": "Pharmacology and therapeutics",
|
||||
"616": "Diseases",
|
||||
"617": "Surgery, regional medicine, dentistry, ophthalmology, otology, audiology",
|
||||
"618": "Other branches of medicine Gynecology and obstetrics",
|
||||
"62": "Engineering",
|
||||
"620": "Engineering and allied operations",
|
||||
"621": "Applied physics",
|
||||
"622": "Mining and related operations",
|
||||
"623": "Military and nautical engineering",
|
||||
"624": "Civil engineering",
|
||||
"625": "Engineering of railroads & roads",
|
||||
"627": "Hydraulic engineering",
|
||||
"628": "Sanitary engineering",
|
||||
"629": "Other branches of engineering",
|
||||
"63": "Agriculture",
|
||||
"630": "Agriculture and related technologies",
|
||||
"631": "Specific techniques; apparatus, equipment, materials",
|
||||
"632": "Plant injuries, diseases, pests",
|
||||
"633": "Field and plantation crops",
|
||||
"634": "Orchards, fruits, forestry",
|
||||
"635": "Garden crops (Horticulture)",
|
||||
"636": "Animal husbandry",
|
||||
"637": "Processing dairy & related products",
|
||||
"638": "Insect culture",
|
||||
"639": "Hunting, fishing, conservation, related technologies",
|
||||
"64": "Home & family management",
|
||||
"640": "Home and family management",
|
||||
"641": "Food & drink",
|
||||
"642": "Meals and table service",
|
||||
"643": "Housing and household equipment",
|
||||
"644": "Household utilities",
|
||||
"645": "Household furnishings",
|
||||
"646": "Sewing, clothing, management of personal and family life",
|
||||
"647": "Management of public households (Institutional housekeeping)",
|
||||
"648": "Housekeeping",
|
||||
"649": "Child rearing; home care of people with disabilities and illnesses",
|
||||
"65": "Management & public relations",
|
||||
"650": "Management and auxiliary services",
|
||||
"651": "Office services",
|
||||
"652": "Processes of written communication",
|
||||
"653": "Shorthand",
|
||||
"657": "Accounting",
|
||||
"658": "General management",
|
||||
"659": "Advertising and public relations",
|
||||
"66": "Chemical engineering",
|
||||
"660": "Chemical engineering and related technologies",
|
||||
"661": "Technology of industrial chemicals",
|
||||
"662": "Technology of explosives, fuels, related products",
|
||||
"663": "Beverage technology",
|
||||
"664": "Food technology",
|
||||
"665": "Technology of industrial oils, fats, waxes, gases",
|
||||
"666": "Ceramic and allied technologies",
|
||||
"667": "Cleaning, color, coating, related technologies",
|
||||
"668": "Technology of other organic products",
|
||||
"669": "Metallurgy",
|
||||
"67": "Manufacturing",
|
||||
"670": "Manufacturing",
|
||||
"671": "Metalworking processes and primary metal products",
|
||||
"672": "Iron, steel, other iron alloys",
|
||||
"673": "Nonferrous metals",
|
||||
"674": "Lumber processing, wood products, cork",
|
||||
"675": "Leather and fur processing",
|
||||
"676": "Pulp and paper technology",
|
||||
"677": "Textiles",
|
||||
"678": "Elastomers and elastomer products",
|
||||
"679": "Other products of specific materials",
|
||||
"68": "Manufacture for specific uses",
|
||||
"680": "Manufacture of products for specific uses",
|
||||
"681": "Precision instruments and other devices",
|
||||
"682": "Small forge work (Blacksmithing)",
|
||||
"683": "Hardware and household appliances",
|
||||
"684": "Furnishings and home workshops",
|
||||
"685": "Leather and fur goods, and related products",
|
||||
"686": "Printing and related activities",
|
||||
"687": "Clothing and accessories",
|
||||
"688": "Other final products & packaging",
|
||||
"69": "Construction of buildings",
|
||||
"690": "Buildings",
|
||||
"691": "Building materials",
|
||||
"692": "Auxiliary construction practices",
|
||||
"693": "Construction in specific types of materials and for specific purposes",
|
||||
"694": "Wood construction",
|
||||
"695": "Roof covering",
|
||||
"696": "Utilities",
|
||||
"697": "Heating, ventilating & air-conditioning",
|
||||
"698": "Detail finishing",
|
||||
"7": "Arts & recreation",
|
||||
"70": "Arts",
|
||||
"700": "Arts",
|
||||
"701": "Philosophy and theory of fine and decorative arts",
|
||||
"702": "Miscellany of fine and decorative arts",
|
||||
"703": "Dictionaries, encyclopedias, concordances of fine and decorative arts",
|
||||
"704": "Special topics in fine and decorative arts",
|
||||
"705": "Serial publications of fine and decorative arts",
|
||||
"706": "Organizations and management of fine and decorative arts",
|
||||
"707": "Education, research, related topics of fine and decorative arts",
|
||||
"708": "Galleries, museums, private collections of fine and decorative arts",
|
||||
"709": "Historical, geographic & persons treatment",
|
||||
"71": "Area planning & landscape architecture",
|
||||
"710": "Area planning and landscape architecture",
|
||||
"711": "Area planning (Civic art)",
|
||||
"712": "Landscape architecture (Landscape design)",
|
||||
"713": "Landscape architecture of trafficways",
|
||||
"714": "Water features in landscape architecture",
|
||||
"715": "Woody plants in landscape architecture",
|
||||
"716": "Herbaceous plants in landscape architecture",
|
||||
"717": "Structures in landscape architecture",
|
||||
"718": "Landscape design of cemeteries",
|
||||
"719": "Natural landscapes",
|
||||
"72": "Architecture",
|
||||
"720": "Architecture",
|
||||
"721": "Architectural materials and structural elements",
|
||||
"722": "Architecture from earliest times to ca. 300",
|
||||
"723": "Architecture from ca. 300 to 1399",
|
||||
"724": "Architecture from 1400",
|
||||
"725": "Public structures",
|
||||
"726": "Buildings for religious purposes",
|
||||
"727": "Buildings for educational and research purposes",
|
||||
"728": "Residential and related buildings",
|
||||
"729": "Design and decoration of structures and accessories",
|
||||
"73": "Sculpture, ceramics & metalwork",
|
||||
"730": "Plastic arts Sculpture",
|
||||
"731": "Processes, forms & subjects of sculpture",
|
||||
"732": "Sculpture from earliest times to ca. 500, sculpture of nonliterate peoples",
|
||||
"733": "Greek, Etruscan, Roman sculpture",
|
||||
"734": "Sculpture from ca. 500 to 1399",
|
||||
"735": "Sculpture from 1400",
|
||||
"736": "Carving and carvings",
|
||||
"737": "Numismatics and sigillography",
|
||||
"738": "Ceramic arts",
|
||||
"739": "Art metalwork",
|
||||
"74": "Graphic arts & decorative arts",
|
||||
"740": "Graphic arts",
|
||||
"741": "Drawing and drawings",
|
||||
"742": "Perspective in drawing",
|
||||
"743": "Drawing and drawings by subject",
|
||||
"745": "Decorative arts",
|
||||
"746": "Textile arts",
|
||||
"747": "Interior decoration",
|
||||
"748": "Glass",
|
||||
"749": "Furniture and accessories",
|
||||
"75": "Painting",
|
||||
"750": "Painting and paintings",
|
||||
"751": "Techniques, procedures, apparatus, equipment, materials, forms",
|
||||
"752": "Color",
|
||||
"753": "Symbolism, allegory, mythology, legend",
|
||||
"754": "Genre paintings",
|
||||
"755": "Religion",
|
||||
"757": "Human figures",
|
||||
"758": "Nature, architectural subjects and cityscapes, other specific subjects",
|
||||
"759": "History, geographic treatment, biography",
|
||||
"76": "Printmaking & prints",
|
||||
"760": "Printmaking and prints",
|
||||
"761": "Relief processes (Block printing)",
|
||||
"763": "Lithographic processes (Planographic processes)",
|
||||
"764": "Chromolithography and serigraphy",
|
||||
"765": "Metal engraving",
|
||||
"766": "Mezzotinting, aquatinting, related processes",
|
||||
"767": "Etching and drypoint",
|
||||
"769": "Prints",
|
||||
"77": "Photography, computer art, film, video",
|
||||
"770": "Photography, computer art, cinematography, videography",
|
||||
"771": "Techniques, procedures, apparatus, equipment, materials",
|
||||
"772": "Metallic salt processes",
|
||||
"773": "Pigment processes of printing",
|
||||
"774": "Holography",
|
||||
"775": "Digital photography",
|
||||
"776": "Computer art (Digital art)",
|
||||
"777": "Cinematography and videography",
|
||||
"778": "Specific fields and special kinds of photography",
|
||||
"779": "Photographs",
|
||||
"78": "Music",
|
||||
"780": "Music",
|
||||
"781": "General principles & musical forms",
|
||||
"782": "Vocal music",
|
||||
"783": "Music for single voices",
|
||||
"784": "Instruments & instrumental ensembles",
|
||||
"785": "Ensembles with only one instrument per part",
|
||||
"786": "Keyboard, mechanical, electrophonic, percussion instruments",
|
||||
"787": "Stringed instruments (Chordophones)",
|
||||
"788": "Wind instruments (Aerophones)",
|
||||
"79": "Sports, games & entertainment",
|
||||
"790": "Recreational and performing arts",
|
||||
"791": "Public performances",
|
||||
"792": "Stage presentations",
|
||||
"793": "Indoor games and amusements",
|
||||
"794": "Indoor games of skill",
|
||||
"795": "Games of chance",
|
||||
"796": "Athletic and outdoor sports and games",
|
||||
"797": "Aquatic & air sports",
|
||||
"798": "Equestrian sports and animal racing",
|
||||
"799": "Fishing, hunting, shooting",
|
||||
"8": "Literature",
|
||||
"80": "Literature, rhetoric & criticism",
|
||||
"800": "Literature (Belles-lettres) and rhetoric",
|
||||
"801": "Philosophy and theory",
|
||||
"802": "Miscellany",
|
||||
"803": "Dictionaries, encyclopedias, concordances",
|
||||
"805": "Serial publications",
|
||||
"806": "Organizations and management",
|
||||
"807": "Education, research, related topics",
|
||||
"808": "Rhetoric and collections of literary texts from more than two literatures",
|
||||
"809": "History, description, critical appraisal of more than two literatures",
|
||||
"81": "American literature in English",
|
||||
"810": "American literature in English",
|
||||
"811": "American poetry in English",
|
||||
"812": "American drama in English",
|
||||
"813": "American fiction in English",
|
||||
"814": "American essays in English",
|
||||
"815": "American speeches in English",
|
||||
"816": "American letters in English",
|
||||
"817": "American humor and satire in English",
|
||||
"818": "American miscellaneous writings",
|
||||
"82": "English & Old English literatures",
|
||||
"820": "English and Old English (Anglo-Saxon) literatures",
|
||||
"821": "English poetry",
|
||||
"822": "English drama",
|
||||
"823": "English fiction",
|
||||
"824": "English essays",
|
||||
"825": "English speeches",
|
||||
"826": "English letters",
|
||||
"827": "English humor and satire",
|
||||
"828": "English miscellaneous writings",
|
||||
"829": "Old English (Anglo-Saxon) literature",
|
||||
"83": "German & related literatures",
|
||||
"830": "Literatures of Germanic languages German literature",
|
||||
"831": "German poetry",
|
||||
"832": "German drama",
|
||||
"833": "German fiction",
|
||||
"834": "German essays",
|
||||
"835": "German speeches",
|
||||
"836": "German letters",
|
||||
"837": "German humor & satire",
|
||||
"838": "German miscellaneous writings",
|
||||
"839": "Other Germanic literatures",
|
||||
"84": "French & related literatures",
|
||||
"840": "French literature and literatures of related Romance languages",
|
||||
"841": "French poetry",
|
||||
"842": "French drama",
|
||||
"843": "French fiction",
|
||||
"844": "French essays",
|
||||
"845": "French speeches",
|
||||
"846": "French letters",
|
||||
"847": "French humor & satire",
|
||||
"848": "French miscellaneous writings",
|
||||
"849": "Occitan, Catalan, Franco-Provençal literatures",
|
||||
"85": "Italian, Romanian & related literatures",
|
||||
"850": "Literatures of Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican languages",
|
||||
"851": "Italian poetry",
|
||||
"852": "Italian drama",
|
||||
"853": "Italian fiction",
|
||||
"854": "Italian essays",
|
||||
"855": "Italian speeches",
|
||||
"856": "Italian letters",
|
||||
"857": "Italian humor and satire",
|
||||
"858": "Italian miscellaneous writings",
|
||||
"859": "Literatures of Romanian, Rhaetian, Sardinian, Corsican languages",
|
||||
"86": "Spanish, Portuguese, Galician literatures",
|
||||
"860": "Spanish & Portuguese literatures",
|
||||
"861": "Spanish poetry",
|
||||
"862": "Spanish drama",
|
||||
"863": "Spanish fiction",
|
||||
"864": "Spanish essays",
|
||||
"865": "Spanish speeches",
|
||||
"866": "Spanish letters",
|
||||
"867": "Spanish humor and satire",
|
||||
"868": "Spanish miscellaneous writings",
|
||||
"869": "Literatures of Portuguese and Galician languages",
|
||||
"87": "Latin & Italic literatures",
|
||||
"870": "Latin & Italic literatures",
|
||||
"871": "Latin poetry",
|
||||
"872": "Latin dramatic poetry and drama",
|
||||
"873": "Latin epic poetry and fiction",
|
||||
"874": "Latin lyric poetry",
|
||||
"875": "Latin speeches",
|
||||
"876": "Latin letters",
|
||||
"877": "Latin humor and satire",
|
||||
"878": "Latin miscellaneous writings",
|
||||
"879": "Literatures of other Italic languages",
|
||||
"88": "Classical & modern Greek literatures",
|
||||
"880": "Literatures of Hellenic languages Classical Greek literature",
|
||||
"881": "Classical Greek poetry",
|
||||
"882": "Classical Greek dramatic poetry and drama",
|
||||
"883": "Classical Greek epic poetry and fiction",
|
||||
"884": "Classical Greek lyric poetry",
|
||||
"885": "Classical Greek speeches",
|
||||
"886": "Classical Greek letters",
|
||||
"887": "Classical Greek humor and satire",
|
||||
"888": "Classical Greek miscellaneous writings",
|
||||
"889": "Modern Greek literature",
|
||||
"89": "Other literatures",
|
||||
"890": "Literatures of other specific languages and language families",
|
||||
"891": "East Indo-European and Celtic literatures",
|
||||
"892": "Afro-Asiatic literatures",
|
||||
"893": "Non-Semitic Afro-Asiatic literatures",
|
||||
"894": "Literatures of Altaic, Uralic, Hyperborean, Dravidian languages; literatures of miscellaneous languages of south Asia",
|
||||
"895": "Literatures of East and Southeast Asia",
|
||||
"896": "African literatures",
|
||||
"897": "North American native literatures",
|
||||
"898": "Literatures of South American native languages",
|
||||
"899": "Literatures of non-Austronesian languages of Oceania, of Austronesian languages, of miscellaneous languages",
|
||||
"9": "History & geography",
|
||||
"90": "History",
|
||||
"900": "History, geography, and auxiliary disciplines",
|
||||
"901": "Philosophy and theory of history",
|
||||
"902": "Miscellany",
|
||||
"903": "Dictionaries, encyclopedias, concordances of history",
|
||||
"904": "Collected accounts of events",
|
||||
"905": "Serial publications of history",
|
||||
"906": "Organizations and management of history",
|
||||
"907": "Education, research & related topics",
|
||||
"908": "History with respect to groups of people",
|
||||
"909": "World history",
|
||||
"91": "Geography & travel",
|
||||
"910": "Geography and travel",
|
||||
"911": "Historical geography",
|
||||
"912": "Graphic representations of surface of earth and of extraterrestrial worlds",
|
||||
"913": "Geography of and travel in ancient world",
|
||||
"914": "Geography of and travel in Europe",
|
||||
"915": "Geography of and travel in Asia",
|
||||
"916": "Geography of and travel in Africa",
|
||||
"917": "Geography of and travel in North America",
|
||||
"918": "Geography of & travel in South America",
|
||||
"919": "Geography of and travel in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica and on extraterrestrial worlds",
|
||||
"92": "Biography & genealogy",
|
||||
"920": "Biography, genealogy, insignia",
|
||||
"929": "Genealogy, names, insignia",
|
||||
"93": "History of ancient world (to ca. 499)",
|
||||
"930": "History of ancient world to ca. 499",
|
||||
"931": "China to 420",
|
||||
"932": "Egypt to 640",
|
||||
"933": "Palestine to 70",
|
||||
"934": "South Asia to 647",
|
||||
"935": "Mesopotamia to 637 and Iranian Plateau to 637",
|
||||
"936": "Europe north and west of Italian Peninsula to ca. 499",
|
||||
"937": "Italian Peninsula to 476 and adjacent territories to 476",
|
||||
"938": "Greece to 323",
|
||||
"939": "Other parts of ancient world to ca. 640",
|
||||
"94": "History of Europe",
|
||||
"940": "History of Europe",
|
||||
"941": "British Isles",
|
||||
"942": "England and Wales",
|
||||
"943": "Germany and neighboring central European countries",
|
||||
"944": "France and Monaco",
|
||||
"945": "Italy, San Marino, Vatican City, Malta",
|
||||
"946": "Spain, Andorra, Gibraltar, Portugal",
|
||||
"947": "Russia and neighboring east European countries",
|
||||
"948": "Scandinavia",
|
||||
"949": "Other parts of Europe",
|
||||
"95": "History of Asia",
|
||||
"950": "History of Asia",
|
||||
"951": "China and adjacent areas",
|
||||
"952": "Japan",
|
||||
"953": "Arabian Peninsula and adjacent areas",
|
||||
"954": "India and neighboring south Asian countries",
|
||||
"955": "Iran",
|
||||
"956": "Middle East (Near East)",
|
||||
"957": "Siberia (Asiatic Russia)",
|
||||
"958": "Central Asia",
|
||||
"959": "Southeast Asia",
|
||||
"96": "History of Africa",
|
||||
"960": "History of Africa",
|
||||
"961": "Tunisia & Libya",
|
||||
"962": "Egypt, Sudan, South Sudan",
|
||||
"963": "Ethiopia and Eritrea",
|
||||
"964": "Northwest African coast & offshore islands",
|
||||
"965": "Algeria",
|
||||
"966": "West Africa and offshore islands",
|
||||
"967": "Central Africa and offshore islands",
|
||||
"968": "Republic of South Africa and neighboring southern African countries",
|
||||
"969": "South Indian Ocean islands",
|
||||
"97": "History of North America",
|
||||
"970": "History of North America",
|
||||
"971": "Canada",
|
||||
"972": "Middle America; Mexico",
|
||||
"973": "United States",
|
||||
"974": "Northeastern United States (New England and Middle Atlantic states)",
|
||||
"975": "Southeastern United States (South Atlantic states)",
|
||||
"976": "South central United States Gulf Coast states",
|
||||
"977": "North central United States",
|
||||
"978": "Western United States",
|
||||
"979": "Great Basin and Pacific Slope region of United States",
|
||||
"98": "History of South America",
|
||||
"980": "History of South America",
|
||||
"981": "Brazil",
|
||||
"982": "Argentina",
|
||||
"983": "Chile",
|
||||
"984": "Bolivia",
|
||||
"985": "Peru",
|
||||
"986": "Colombia and Ecuador",
|
||||
"987": "Venezuela",
|
||||
"988": "Guiana",
|
||||
"989": "Paraguay and Uruguay",
|
||||
"99": "History of other areas",
|
||||
"990": "History of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
|
||||
"993": "New Zealand",
|
||||
"994": "Australia",
|
||||
"995": "New Guinea and neighboring countries of Melanesia",
|
||||
"996": "Other parts of Pacific Polynesia",
|
||||
"997": "Atlantic Ocean islands",
|
||||
"998": "Arctic islands and Antarctica",
|
||||
"999": "Extraterrestrial worlds"
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import json
|
||||
import re
|
||||
from ox.cache import read_url
|
||||
|
||||
dewey = {}
|
||||
for i in range(0, 1000):
|
||||
url = 'http://dewey.info/class/%s/about.en.json' % i
|
||||
print(url)
|
||||
data = json.loads(read_url(url).decode('utf-8'))
|
||||
for d in list(data.values()):
|
||||
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
||||
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
||||
dewey[str(i)] = value
|
||||
break
|
||||
|
||||
data = json.dumps(dewey, indent=4, ensure_ascii=False, sort_keys=True).encode('utf-8')
|
||||
with open(__file__) as f:
|
||||
pydata = f.read()
|
||||
pydata = re.sub(
|
||||
re.compile('\nDEWEY = {.*?}\n\n', re.DOTALL),
|
||||
'\nDEWEY = %s\n\n' % data, pydata)
|
||||
|
||||
with open(__file__, 'w') as f:
|
||||
f.write(pydata)
|
102
oml/meta/loc.py
102
oml/meta/loc.py
|
@ -1,102 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
|
||||
from ox.cache import read_url
|
||||
import ox
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .dewey import get_classification
|
||||
from .marc_countries import COUNTRIES
|
||||
from .utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'isbn':
|
||||
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
|
||||
html = ox.cache.read_url(url).decode('utf-8', 'ignore')
|
||||
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
|
||||
if match:
|
||||
ids.append(('lccn', match.group(1)))
|
||||
elif key == 'lccn':
|
||||
info = lookup(value)
|
||||
for key in ('oclc', 'isbn'):
|
||||
if key in info:
|
||||
for value in info[key]:
|
||||
ids.append((key, value))
|
||||
if ids:
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
logger.debug('lookup %s', id)
|
||||
ns = '{http://www.loc.gov/mods/v3}'
|
||||
url = 'http://lccn.loc.gov/%s/mods' % id
|
||||
info = {
|
||||
'lccn': [id]
|
||||
}
|
||||
try:
|
||||
data = read_url(url).decode('utf-8')
|
||||
mods = ET.fromstring(data)
|
||||
except:
|
||||
try:
|
||||
data = read_url(url, timeout=0).decode('utf-8')
|
||||
mods = ET.fromstring(data)
|
||||
except:
|
||||
logger.debug('lookup for %s url: %s failed', id, url, exc_info=True)
|
||||
return info
|
||||
|
||||
title = mods.findall(ns + 'titleInfo')
|
||||
if not title:
|
||||
return {}
|
||||
info['title'] = ''.join([': ' + e.text.strip() if e.tag == ns + 'subTitle' else ' ' + e.text.strip() for e in title[0]]).strip()
|
||||
origin = mods.findall(ns + 'originInfo')
|
||||
if origin:
|
||||
info['place'] = []
|
||||
for place in origin[0].findall(ns + 'place'):
|
||||
terms = place.findall(ns + 'placeTerm')
|
||||
if terms and terms[0].attrib['type'] == 'text':
|
||||
e = terms[0]
|
||||
info['place'].append(e.text)
|
||||
elif terms and terms[0].attrib['type'] == 'code':
|
||||
e = terms[0]
|
||||
info['country'] = COUNTRIES.get(e.text, e.text)
|
||||
publisher = [e.text for e in origin[0].findall(ns + 'publisher')]
|
||||
if publisher:
|
||||
info['publisher'] = publisher[0]
|
||||
info['date'] = ''.join([e.text
|
||||
for e in origin[0].findall(ns + 'dateIssued') if e.attrib.get('encoding') == 'marc'])
|
||||
for i in mods.findall(ns + 'identifier'):
|
||||
key = i.attrib['type']
|
||||
value = i.text
|
||||
if key in ('oclc', 'lccn', 'isbn'):
|
||||
if i.attrib['type'] == 'oclc':
|
||||
value = value.replace('ocn', '').replace('ocm', '')
|
||||
if i.attrib['type'] == 'isbn':
|
||||
value = normalize_isbn(i.text)
|
||||
if not key in info:
|
||||
info[key] = []
|
||||
if value not in info[key]:
|
||||
info[key].append(value)
|
||||
for i in mods.findall(ns + 'classification'):
|
||||
if i.attrib['authority'] == 'ddc':
|
||||
info['classification'] = get_classification(i.text.split('/')[0])
|
||||
info['author'] = []
|
||||
for a in mods.findall(ns + 'name'):
|
||||
if a.attrib.get('usage') == 'primary':
|
||||
info['author'].append(' '.join([e.text for e in a.findall(ns + 'namePart') if not e.attrib.get('type') in ('date', )]))
|
||||
info['author'] = [ox.normalize_name(a) for a in info['author']]
|
||||
toc = mods.findall(ns + 'tableOfContents')
|
||||
if toc:
|
||||
info['description'] = toc[0].text.strip()
|
||||
for key in list(info.keys()):
|
||||
if not info[key]:
|
||||
del info[key]
|
||||
return info
|
||||
|
||||
info = lookup
|
|
@ -1,97 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
|
||||
import re
|
||||
|
||||
from ox.cache import read_url
|
||||
from ox import find_re, strip_tags, decode_html
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import find_isbns
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
base = 'http://www.lookupbyisbn.com'
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
|
||||
def add_other_isbn(v):
|
||||
if len(v) == 10:
|
||||
ids.append(('isbn', stdnum.isbn.to_isbn13(v)))
|
||||
if len(v) == 13 and v.startswith('978'):
|
||||
ids.append(('isbn', stdnum.isbn.to_isbn10(v)))
|
||||
|
||||
if key in ('isbn', 'asin'):
|
||||
url = '%s/Search/Book/%s/1' % (base, value)
|
||||
data = read_url(url).decode('utf-8')
|
||||
m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data)
|
||||
if m:
|
||||
asin = m[0].split('/')[-3]
|
||||
if stdnum.isbn.to_isbn10(asin) or not stdnum.isbn.is_valid(asin):
|
||||
ids.append(('asin', asin))
|
||||
if key == 'isbn':
|
||||
add_other_isbn(value)
|
||||
if key == 'asin':
|
||||
if stdnum.isbn.is_valid(value):
|
||||
ids.append(('isbn', value))
|
||||
add_other_isbn(value)
|
||||
else:
|
||||
for isbn in amazon_lookup(value):
|
||||
if stdnum.isbn.is_valid(isbn):
|
||||
ids.append(('isbn', isbn))
|
||||
add_other_isbn(isbn)
|
||||
if ids:
|
||||
logger.debug('get_ids %s, %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
logger.debug('lookup %s', id)
|
||||
r = {
|
||||
'asin': [id]
|
||||
}
|
||||
url = '%s/Lookup/Book/%s/%s/1' % (base, id, id)
|
||||
logger.debug('%s', url)
|
||||
data = read_url(url).decode('utf-8')
|
||||
r["title"] = find_re(data, "<h2>(.*?)</h2>")
|
||||
if r["title"] == 'Error!':
|
||||
return {}
|
||||
keys = {
|
||||
'author': 'Author(s)',
|
||||
'publisher': 'Publisher',
|
||||
'date': 'Publication date',
|
||||
'edition': 'Edition',
|
||||
'binding': 'Binding',
|
||||
'volume': 'Volume(s)',
|
||||
'pages': 'Pages',
|
||||
}
|
||||
for key in keys:
|
||||
r[key] = find_re(data, '<span class="title">%s:</span>(.*?)</li>'% re.escape(keys[key]))
|
||||
if r[key] == '--' or not r[key]:
|
||||
del r[key]
|
||||
if key == 'pages' and key in r:
|
||||
r[key] = int(r[key])
|
||||
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
|
||||
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
|
||||
r['description'] = decode_html(strip_tags(desc))
|
||||
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
|
||||
for key in r:
|
||||
if isinstance(r[key], str):
|
||||
r[key] = decode_html(strip_tags(r[key])).strip()
|
||||
if 'author' in r and isinstance(r['author'], str) and r['author']:
|
||||
r['author'] = [r['author']]
|
||||
else:
|
||||
r['author'] = []
|
||||
if not r['author'] or r['author'][0].isupper():
|
||||
del r['author']
|
||||
if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
|
||||
r['description'] = ''
|
||||
return r
|
||||
|
||||
def amazon_lookup(asin):
|
||||
url = 'http://www.amazon.com/dp/%s' % asin
|
||||
html = read_url(url, timeout=-1).decode('utf-8', 'ignore')
|
||||
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
|
@ -1,409 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
COUNTRIES = {
|
||||
"gw": "Germany",
|
||||
"gv": "Guinea",
|
||||
"gu": "Guam",
|
||||
"gt": "Guatemala",
|
||||
"gs": "Georgia (Republic)",
|
||||
"gr": "Greece",
|
||||
"-ge": "Germany (East)",
|
||||
"gp": "Guadeloupe",
|
||||
"mnu": "Minnesota",
|
||||
"gy": "Guyana",
|
||||
"gd": "Grenada",
|
||||
"gb": "Kiribati",
|
||||
"go": "Gabon",
|
||||
"gm": "Gambia",
|
||||
"alu": "Alabama",
|
||||
"gi": "Gibraltar",
|
||||
"gh": "Ghana",
|
||||
"tz": "Tanzania",
|
||||
"tv": "Tuvalu",
|
||||
"tu": "Turkey",
|
||||
"tr": "Trinidad and Tobago",
|
||||
"ts": "United Arab Emirates",
|
||||
"to": "Tonga",
|
||||
"tl": "Tokelau",
|
||||
"tk": "Turkmenistan",
|
||||
"th": "Thailand",
|
||||
"ti": "Tunisia",
|
||||
"tg": "Togo",
|
||||
"tc": "Turks and Caicos Islands",
|
||||
"ta": "Tajikistan",
|
||||
"-gn": "Gilbert and Ellice Islands",
|
||||
"-us": "United States",
|
||||
"-ajr": "Azerbaijan S.S.R.",
|
||||
"-iu": "Israel-Syria Demilitarized Zones",
|
||||
"-iw": "Israel-Jordan Demilitarized Zones",
|
||||
"za": "Zambia",
|
||||
"nbu": "Nebraska",
|
||||
"scu": "South Carolina",
|
||||
"bg": "Bangladesh",
|
||||
"cau": "California",
|
||||
"abc": "Alberta",
|
||||
"xoa": "Northern Territory",
|
||||
"meu": "Maine",
|
||||
"ctu": "Connecticut",
|
||||
"my": "Malaysia",
|
||||
"aku": "Alaska",
|
||||
"gl": "Greenland",
|
||||
"-cn": "Canada",
|
||||
"wiu": "Wisconsin",
|
||||
"-cz": "Canal Zone",
|
||||
"txu": "Texas",
|
||||
"-cs": "Czechoslovakia",
|
||||
"-cp": "Canton and Enderbury Islands",
|
||||
"msu": "Mississippi",
|
||||
"-ln": "Central and Southern Line Islands",
|
||||
"nkc": "New Brunswick",
|
||||
"it": "Italy",
|
||||
"tnu": "Tennessee",
|
||||
"vp": "Various places",
|
||||
"mg": "Madagascar",
|
||||
"mf": "Mauritius",
|
||||
"mc": "Monaco",
|
||||
"-ur": "Soviet Union",
|
||||
"mm": "Malta",
|
||||
"ml": "Mali",
|
||||
"mo": "Montenegro",
|
||||
"flu": "Florida",
|
||||
"deu": "Delaware",
|
||||
"mk": "Oman",
|
||||
"mj": "Montserrat",
|
||||
"mu": "Mauritania",
|
||||
"mw": "Malawi",
|
||||
"mv": "Moldova",
|
||||
"mq": "Martinique",
|
||||
"mp": "Mongolia",
|
||||
"mr": "Morocco",
|
||||
"-ui": "United Kingdom Misc. Islands",
|
||||
"mx": "Mexico",
|
||||
"-uk": "United Kingdom",
|
||||
"mz": "Mozambique",
|
||||
"kyu": "Kentucky",
|
||||
"hiu": "Hawaii",
|
||||
"enk": "England",
|
||||
"nyu": "New York (State)",
|
||||
"fp": "French Polynesia",
|
||||
"fr": "France",
|
||||
"fs": "Terres australes et antarctiques françaises",
|
||||
"mau": "Massachusetts",
|
||||
"snc": "Saskatchewan",
|
||||
"fa": "Faroe Islands",
|
||||
"fg": "French Guiana",
|
||||
"lau": "Louisiana",
|
||||
"fj": "Fiji",
|
||||
"fk": "Falkland Islands",
|
||||
"fm": "Micronesia (Federated States)",
|
||||
"sz": "Switzerland",
|
||||
"sy": "Syria",
|
||||
"sx": "Namibia",
|
||||
"ss": "Western Sahara",
|
||||
"sr": "Surinam",
|
||||
"sq": "Swaziland",
|
||||
"sp": "Spain",
|
||||
"sw": "Sweden",
|
||||
"su": "Saudi Arabia",
|
||||
"st": "Saint-Martin",
|
||||
"sj": "Sudan",
|
||||
"si": "Singapore",
|
||||
"sh": "Spanish North Africa",
|
||||
"so": "Somalia",
|
||||
"sn": "Sint Maarten",
|
||||
"sm": "San Marino",
|
||||
"sl": "Sierra Leone",
|
||||
"sc": "Saint-Barthélemy",
|
||||
"sa": "South Africa",
|
||||
"sg": "Senegal",
|
||||
"sf": "Sao Tome and Principe",
|
||||
"se": "Seychelles",
|
||||
"sd": "South Sudan",
|
||||
"-unr": "Ukraine",
|
||||
"-kgr": "Kirghiz S.S.R.",
|
||||
"le": "Lebanon",
|
||||
"lb": "Liberia",
|
||||
"-hk": "Hong Kong",
|
||||
"lo": "Lesotho",
|
||||
"lh": "Liechtenstein",
|
||||
"li": "Lithuania",
|
||||
"lv": "Latvia",
|
||||
"lu": "Luxembourg",
|
||||
"vtu": "Vermont",
|
||||
"ls": "Laos",
|
||||
"xc": "Maldives",
|
||||
"ly": "Libya",
|
||||
"oku": "Oklahoma",
|
||||
"ye": "Yemen",
|
||||
"-tkr": "Turkmen S.S.R.",
|
||||
"nfc": "Newfoundland and Labrador",
|
||||
"ft": "Djibouti",
|
||||
"em": "Timor-Leste",
|
||||
"eg": "Equatorial Guinea",
|
||||
"ea": "Eritrea",
|
||||
"ec": "Ecuador",
|
||||
"-gsr": "Georgian S.S.R.",
|
||||
"et": "Ethiopia",
|
||||
"es": "El Salvador",
|
||||
"er": "Estonia",
|
||||
"ru": "Russia (Federation)",
|
||||
"rw": "Rwanda",
|
||||
"re": "Réunion",
|
||||
"rb": "Serbia",
|
||||
"rm": "Romania",
|
||||
"rh": "Zimbabwe",
|
||||
"-err": "Estonia",
|
||||
"oru": "Oregon",
|
||||
"quc": "Québec (Province)",
|
||||
"ntc": "Northwest Territories",
|
||||
"wlk": "Wales",
|
||||
"xj": "Saint Helena",
|
||||
"xk": "Saint Lucia",
|
||||
"xh": "Niue",
|
||||
"xn": "Macedonia",
|
||||
"xo": "Slovakia",
|
||||
"xl": "Saint Pierre and Miquelon",
|
||||
"xm": "Saint Vincent and the Grenadines",
|
||||
"xb": "Cocos (Keeling) Islands",
|
||||
"onc": "Ontario",
|
||||
"xa": "Christmas Island (Indian Ocean)",
|
||||
"xf": "Midway Islands",
|
||||
"xd": "Saint Kitts-Nevis",
|
||||
"xe": "Marshall Islands",
|
||||
"nhu": "New Hampshire",
|
||||
"xx": "No place, unknown, or undetermined",
|
||||
"fi": "Finland",
|
||||
"xr": "Czech Republic",
|
||||
"xs": "South Georgia and the South Sandwich Islands",
|
||||
"xp": "Spratly Island",
|
||||
"xv": "Slovenia",
|
||||
"-tt": "Trust Territory of the Pacific Islands",
|
||||
"iau": "Iowa",
|
||||
"ncu": "North Carolina",
|
||||
"stk": "Scotland",
|
||||
"xra": "South Australia",
|
||||
"miu": "Michigan",
|
||||
"kg": "Kyrgyzstan",
|
||||
"ke": "Kenya",
|
||||
"ko": "Korea (South)",
|
||||
"kn": "Korea (North)",
|
||||
"kv": "Kosovo",
|
||||
"ku": "Kuwait",
|
||||
"kz": "Kazakhstan",
|
||||
"-pt": "Portuguese Timor",
|
||||
"ksu": "Kansas",
|
||||
"dm": "Benin",
|
||||
"dk": "Denmark",
|
||||
"-ys": "Yemen (People's Democratic Republic)",
|
||||
"-yu": "Serbia and Montenegro",
|
||||
"-bwr": "Byelorussian S.S.R.",
|
||||
"dr": "Dominican Republic",
|
||||
"dq": "Dominica",
|
||||
"qa": "Qatar",
|
||||
"aru": "Arkansas",
|
||||
"nuc": "Nunavut",
|
||||
"wf": "Wallis and Futuna",
|
||||
"wk": "Wake Island",
|
||||
"wj": "West Bank of the Jordan River",
|
||||
"jm": "Jamaica",
|
||||
"vra": "Victoria",
|
||||
"jo": "Jordan",
|
||||
"ws": "Samoa",
|
||||
"ji": "Johnston Atoll",
|
||||
"-na": "Netherlands Antilles",
|
||||
"ja": "Japan",
|
||||
"cou": "Colorado",
|
||||
"-wb": "West Berlin",
|
||||
"ilu": "Illinois",
|
||||
"-nm": "Northern Mariana Islands",
|
||||
"ck": "Colombia",
|
||||
"cj": "Cayman Islands",
|
||||
"ci": "Croatia",
|
||||
"ch": "China (Republic : 1949- )",
|
||||
"co": "Curaçao",
|
||||
"cm": "Cameroon",
|
||||
"cl": "Chile",
|
||||
"-rur": "Russian S.F.S.R.",
|
||||
"cb": "Cambodia",
|
||||
"ca": "Caribbean Netherlands",
|
||||
"cg": "Congo (Democratic Republic)",
|
||||
"cf": "Congo (Brazzaville)",
|
||||
"-lir": "Lithuania",
|
||||
"cd": "Chad",
|
||||
"cy": "Cyprus",
|
||||
"cx": "Central African Republic",
|
||||
"cr": "Costa Rica",
|
||||
"cq": "Comoros",
|
||||
"cw": "Cook Islands",
|
||||
"cv": "Cape Verde",
|
||||
"cu": "Cuba",
|
||||
"pr": "Puerto Rico",
|
||||
"pp": "Papua New Guinea",
|
||||
"pw": "Palau",
|
||||
"py": "Paraguay",
|
||||
"pc": "Pitcairn Island",
|
||||
"pf": "Paracel Islands",
|
||||
"pg": "Guinea-Bissau",
|
||||
"pe": "Peru",
|
||||
"pk": "Pakistan",
|
||||
"ph": "Philippines",
|
||||
"pn": "Panama",
|
||||
"po": "Portugal",
|
||||
"pl": "Poland",
|
||||
"pic": "Prince Edward Island",
|
||||
"xxu": "United States",
|
||||
"gau": "Georgia",
|
||||
"xxc": "Canada",
|
||||
"xxk": "United Kingdom",
|
||||
"iy": "Iraq-Saudi Arabia Neutral Zone",
|
||||
"vb": "British Virgin Islands",
|
||||
"vc": "Vatican City",
|
||||
"ve": "Venezuela",
|
||||
"iq": "Iraq",
|
||||
"vi": "Virgin Islands of the United States",
|
||||
"is": "Israel",
|
||||
"ir": "Iran",
|
||||
"vm": "Vietnam",
|
||||
"iv": "Côte d'Ivoire",
|
||||
"ii": "India",
|
||||
"-ac": "Ashmore and Cartier Islands",
|
||||
"io": "Indonesia",
|
||||
"-ai": "Anguilla",
|
||||
"ic": "Iceland",
|
||||
"ie": "Ireland",
|
||||
"pau": "Pennsylvania",
|
||||
"-jn": "Jan Mayen",
|
||||
"nik": "Northern Ireland",
|
||||
"wyu": "Wyoming",
|
||||
"-air": "Armenian S.S.R.",
|
||||
"-sv": "Swan Islands",
|
||||
"-mvr": "Moldavian S.S.R.",
|
||||
"-sk": "Sikkim",
|
||||
"riu": "Rhode Island",
|
||||
"-sb": "Svalbard",
|
||||
"-xi": "Saint Kitts-Nevis-Anguilla",
|
||||
"wea": "Western Australia",
|
||||
"cc": "China",
|
||||
"nvu": "Nevada",
|
||||
"mou": "Missouri",
|
||||
"ce": "Sri Lanka",
|
||||
"qea": "Queensland",
|
||||
"-mh": "Macao",
|
||||
"nju": "New Jersey",
|
||||
"ykc": "Yukon Territory",
|
||||
"-vs": "Vietnam, South",
|
||||
"tma": "Tasmania",
|
||||
"-vn": "Vietnam, North",
|
||||
"bd": "Burundi",
|
||||
"be": "Belgium",
|
||||
"bf": "Bahamas",
|
||||
"nmu": "New Mexico",
|
||||
"ba": "Bahrain",
|
||||
"bb": "Barbados",
|
||||
"bl": "Brazil",
|
||||
"bm": "Bermuda Islands",
|
||||
"bn": "Bosnia and Hercegovina",
|
||||
"bo": "Bolivia",
|
||||
"bh": "Belize",
|
||||
"bi": "British Indian Ocean Territory",
|
||||
"bt": "Bhutan",
|
||||
"bu": "Bulgaria",
|
||||
"bv": "Bouvet Island",
|
||||
"bw": "Belarus",
|
||||
"bp": "Solomon Islands",
|
||||
"br": "Burma",
|
||||
"bs": "Botswana",
|
||||
"dcu": "District of Columbia",
|
||||
"bx": "Brunei",
|
||||
"aca": "Australian Capital Territory",
|
||||
"idu": "Idaho",
|
||||
"xna": "New South Wales",
|
||||
"ot": "Mayotte",
|
||||
"ndu": "North Dakota",
|
||||
"nsc": "Nova Scotia",
|
||||
"-kzr": "Kazakh S.S.R.",
|
||||
"mbc": "Manitoba",
|
||||
"-lvr": "Latvia",
|
||||
"-uzr": "Uzbek S.S.R.",
|
||||
"wau": "Washington (State)",
|
||||
"vau": "Virginia",
|
||||
"sdu": "South Dakota",
|
||||
"gz": "Gaza Strip",
|
||||
"ht": "Haiti",
|
||||
"hu": "Hungary",
|
||||
"ho": "Honduras",
|
||||
"hm": "Heard and McDonald Islands",
|
||||
"xga": "Coral Sea Islands Territory",
|
||||
"uy": "Uruguay",
|
||||
"uz": "Uzbekistan",
|
||||
"uv": "Burkina Faso",
|
||||
"up": "United States Misc. Pacific Islands",
|
||||
"mtu": "Montana",
|
||||
"un": "Ukraine",
|
||||
"utu": "Utah",
|
||||
"ug": "Uganda",
|
||||
"ua": "Egypt",
|
||||
"azu": "Arizona",
|
||||
"uc": "United States Misc. Caribbean Islands",
|
||||
"aa": "Albania",
|
||||
"ae": "Algeria",
|
||||
"ag": "Argentina",
|
||||
"af": "Afghanistan",
|
||||
"ai": "Armenia (Republic)",
|
||||
"inu": "Indiana",
|
||||
"uik": "United Kingdom Misc. Islands",
|
||||
"aj": "Azerbaijan",
|
||||
"am": "Anguilla",
|
||||
"ao": "Angola",
|
||||
"an": "Andorra",
|
||||
"aq": "Antigua and Barbuda",
|
||||
"as": "American Samoa",
|
||||
"au": "Austria",
|
||||
"at": "Australia",
|
||||
"aw": "Aruba",
|
||||
"ay": "Antarctica",
|
||||
"ohu": "Ohio",
|
||||
"nl": "New Caledonia",
|
||||
"-ry": "Ryukyu Islands, Southern",
|
||||
"nn": "Vanuatu",
|
||||
"no": "Norway",
|
||||
"ne": "Netherlands",
|
||||
"ng": "Niger",
|
||||
"nx": "Norfolk Island",
|
||||
"nz": "New Zealand",
|
||||
"np": "Nepal",
|
||||
"nq": "Nicaragua",
|
||||
"nr": "Nigeria",
|
||||
"mdu": "Maryland",
|
||||
"nu": "Nauru",
|
||||
"nw": "Northern Mariana Islands",
|
||||
"wvu": "West Virginia",
|
||||
"-xxr": "Soviet Union",
|
||||
"-tar": "Tajik S.S.R.",
|
||||
"bcc": "British Columbia"
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import json
|
||||
import re
|
||||
import ox
|
||||
from ox.cache import read_url
|
||||
|
||||
url = "http://www.loc.gov/marc/countries/countries_code.html"
|
||||
data = read_url(url).decode('utf-8')
|
||||
countries = dict([
|
||||
[ox.strip_tags(c) for c in r]
|
||||
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
|
||||
])
|
||||
|
||||
data = json.dumps(countries, indent=4, ensure_ascii=False).encode('utf-8')
|
||||
with open(__file__) as f:
|
||||
pydata = f.read()
|
||||
pydata = re.sub(
|
||||
re.compile('\nCOUNTRIES = {.*?}\n\n', re.DOTALL),
|
||||
'\nCOUNTRIES = %s\n\n' % data, pydata)
|
||||
|
||||
with open(__file__, 'w') as f:
|
||||
f.write(pydata)
|
|
@ -1,210 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
import json
|
||||
|
||||
from ox.cache import read_url
|
||||
|
||||
from .dewey import get_classification
|
||||
from .marc_countries import COUNTRIES
|
||||
from .utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
KEYS = {
|
||||
'authors': 'author',
|
||||
'covers': 'cover',
|
||||
'dewey_decimal_class': 'classification',
|
||||
'isbn_10': 'isbn',
|
||||
'isbn_13': 'isbn',
|
||||
'lccn': 'lccn',
|
||||
'number_of_pages': 'pages',
|
||||
'languages': 'language',
|
||||
'oclc_numbers': 'oclc',
|
||||
'publish_country': 'country',
|
||||
'publish_date': 'date',
|
||||
'publishers': 'publisher',
|
||||
'publish_places': 'place',
|
||||
'series': 'series',
|
||||
'title': 'title',
|
||||
}
|
||||
|
||||
def find(query):
|
||||
query = query.strip()
|
||||
logger.debug('find %s', query)
|
||||
r = api.search(query)
|
||||
results = []
|
||||
ids = [b for b in r.get('result', []) if b.startswith('/books')]
|
||||
books = api.get_many(ids).get('result', [])
|
||||
for olid, value in books.items():
|
||||
olid = olid.split('/')[-1]
|
||||
book = format(value)
|
||||
book['olid'] = [olid]
|
||||
book['primaryid'] = ['olid', olid]
|
||||
results.append(book)
|
||||
return results
|
||||
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'olid':
|
||||
data = lookup(value)
|
||||
for id in ('isbn', 'lccn', 'oclc'):
|
||||
if id in data:
|
||||
for v in data[id]:
|
||||
if (id, v) not in ids:
|
||||
ids.append((id, v))
|
||||
elif key in ('isbn', 'oclc', 'lccn'):
|
||||
logger.debug('get_ids %s %s', key, value)
|
||||
if key == 'isbn':
|
||||
key = 'isbn_%s'%len(value)
|
||||
r = api.things({'type': '/type/edition', key: value})
|
||||
for b in r.get('result', []):
|
||||
if b.startswith('/books'):
|
||||
olid = b.split('/')[-1]
|
||||
for kv in [('olid', olid)] + get_ids('olid', olid):
|
||||
if kv not in ids:
|
||||
ids.append(kv)
|
||||
if ids:
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id, return_all=False):
|
||||
logger.debug('lookup %s', id)
|
||||
info = api.get('/books/' + id).get('result', {})
|
||||
#url = 'https://openlibrary.org/books/%s.json' % id
|
||||
#info = json.loads(read_url(url).decode('utf-8'))
|
||||
data = format(info, return_all)
|
||||
if 'olid' not in data:
|
||||
data['olid'] = []
|
||||
if id not in data['olid']:
|
||||
data['olid'] = [id]
|
||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
||||
return data
|
||||
|
||||
def get_type(obj):
|
||||
type_ = obj.get('type')
|
||||
if isinstance(type_, dict):
|
||||
type_ = type_['key']
|
||||
return type_
|
||||
|
||||
def parse_date(s):
|
||||
#"January 1, 1998"
|
||||
for pattern, fmt in (('%B %d, %Y', '%Y-%m-%d'), ('%B %Y', '%Y-%m')):
|
||||
try:
|
||||
d = datetime.strptime(s, pattern)
|
||||
s = d.strftime(fmt)
|
||||
return s
|
||||
except:
|
||||
pass
|
||||
return s
|
||||
|
||||
def format(info, return_all=False):
|
||||
data = {}
|
||||
if 'works' in info:
|
||||
work = api.get(info['works'][0]['key'])['result']
|
||||
else:
|
||||
work = None
|
||||
for key in KEYS:
|
||||
if key in info:
|
||||
value = info[key]
|
||||
if key == 'authors':
|
||||
if work:
|
||||
value = resolve_names([r['author']
|
||||
for r in work.get('authors', []) if get_type(r) == '/type/author_role'])
|
||||
else:
|
||||
value = resolve_names(value)
|
||||
elif key == 'publish_country':
|
||||
value = value.strip()
|
||||
value = COUNTRIES.get(value, value)
|
||||
elif key == 'covers':
|
||||
value = 'https://covers.openlibrary.org/b/id/%s.jpg' % value[0]
|
||||
elif key == 'languages':
|
||||
value = resolve_names(value)
|
||||
elif key in ('isbn_10', 'isbn_13'):
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value = list(map(normalize_isbn, value))
|
||||
if KEYS[key] in data:
|
||||
value = data[KEYS[key]] + value
|
||||
elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'):
|
||||
value = value[0]
|
||||
if key == 'publish_date':
|
||||
value = parse_date(value)
|
||||
if key == 'publish_places':
|
||||
for i, v in enumerate(value):
|
||||
if v.startswith('['):
|
||||
v = v[1:]
|
||||
if v.endswith(']'):
|
||||
v = v[:-1]
|
||||
value[i] = v
|
||||
data[KEYS[key]] = value
|
||||
if 'subtitle' in info:
|
||||
data['title'] += ' ' + info['subtitle']
|
||||
if 'classification' in data:
|
||||
value = data['classification']
|
||||
if isinstance(value, list):
|
||||
value = value[0]
|
||||
data['classification'] = get_classification(value.split('/')[0])
|
||||
return data
|
||||
|
||||
def resolve_names(objects, key='name'):
|
||||
r = []
|
||||
data = api.get_many([k['key'] for k in objects]).get('result', {})
|
||||
for k, value in data.items():
|
||||
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
|
||||
value = api.get(value['location']).get('result', {})
|
||||
r.append(value[key])
|
||||
return r
|
||||
|
||||
class API(object):
|
||||
base = 'https://openlibrary.org/api'
|
||||
|
||||
def _request(self, action, data, timeout=None):
|
||||
for key in data:
|
||||
if not isinstance(data[key], str):
|
||||
data[key] = json.dumps(data[key])
|
||||
url = self.base + '/' + action + '?' + urlencode(data)
|
||||
if timeout is None:
|
||||
r = read_url(url).decode('utf-8')
|
||||
if '504 Gateway Time-out' in r:
|
||||
r = read_url(url, timeout=-1).decode('utf-8')
|
||||
result = json.loads(r)
|
||||
else:
|
||||
r = read_url(url, timeout).decode('utf-8')
|
||||
if '504 Gateway Time-out' in r:
|
||||
r = read_url(url, timeout=-1).decode('utf-8')
|
||||
result = json.loads(r)
|
||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
||||
logger.info('FAILED %s %s', action, data)
|
||||
logger.info('URL %s', url)
|
||||
return result
|
||||
|
||||
def get(self, key):
|
||||
data = self._request('get', {'key': key})
|
||||
return data
|
||||
|
||||
def get_many(self, keys):
|
||||
data = self._request('get_many', {'keys': keys})
|
||||
return data
|
||||
|
||||
def search(self, query):
|
||||
if isinstance(query, str):
|
||||
query = {
|
||||
'query': query
|
||||
}
|
||||
data = self._request('search', {'q': query})
|
||||
if 'status' in data and data['status'] == 'error':
|
||||
logger.info('FAILED %s', query)
|
||||
return data
|
||||
|
||||
def things(self, query):
|
||||
data = self._request('things', {'query': query})
|
||||
return data
|
||||
|
||||
api = API()
|
|
@ -1,116 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from ox.cache import read_url
|
||||
import lxml.html
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
base_url = 'http://www.worldcat.org'
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key == 'isbn':
|
||||
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
||||
html = read_url(url).decode('utf-8')
|
||||
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
||||
if matches:
|
||||
info = lookup(matches[0])
|
||||
ids.append(('oclc', matches[0]))
|
||||
for v in info.get('isbn', []):
|
||||
if v != value:
|
||||
ids.append(('isbn', v))
|
||||
elif key == 'oclc':
|
||||
info = lookup(value)
|
||||
if 'isbn' in info:
|
||||
for value in info['isbn']:
|
||||
ids.append(('isbn', value))
|
||||
if ids:
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
data = {
|
||||
'oclc': [id]
|
||||
}
|
||||
url = '%s/oclc/%s' % (base_url, id)
|
||||
html = read_url(url).decode('utf-8')
|
||||
doc = lxml.html.document_fromstring(html)
|
||||
for e in doc.xpath("//*[contains(@id, 'bibtip')]"):
|
||||
key = e.attrib['id'].replace('bibtip_', '')
|
||||
value = e.text_content().strip()
|
||||
if value:
|
||||
data[key] = value
|
||||
info = doc.xpath('//textarea[@id="util-em-note"]')
|
||||
if info:
|
||||
info = info[0].text
|
||||
info = dict([i.split(':', 1) for i in info.split('\n\n')[1].split('\n')])
|
||||
for key in info:
|
||||
k = key.lower()
|
||||
value = info[key].strip()
|
||||
if value:
|
||||
data[k] = value
|
||||
for key in ('id', 'instance', 'mediatype', 'reclist', 'shorttitle'):
|
||||
if key in data:
|
||||
del data[key]
|
||||
if 'isxn' in data:
|
||||
for isbn in data.pop('isxn').split(' '):
|
||||
isbn = normalize_isbn(isbn)
|
||||
if stdnum.isbn.is_valid(isbn):
|
||||
if not 'isbn' in data:
|
||||
data['isbn'] = []
|
||||
if isbn not in data['isbn']:
|
||||
data['isbn'].append(isbn)
|
||||
cover = doc.xpath('//img[@class="cover"]')
|
||||
if cover:
|
||||
data['cover'] = cover[0].attrib['src']
|
||||
if data['cover'].startswith('//'):
|
||||
data['cover'] = 'http:' + data['cover']
|
||||
cdata = read_url(data['cover'])
|
||||
if hashlib.sha1(cdata).hexdigest() in (
|
||||
'd2e9ab0c87193d69a7d3a3c21ae4aa550f7dcf00',
|
||||
'70f16d3e077cdd47ef6b331001dbb1963677fa04'
|
||||
):
|
||||
del data['cover']
|
||||
|
||||
if 'author' in data:
|
||||
data['author'] = data['author'].split('; ')
|
||||
if 'title' in data:
|
||||
data['title'] = data['title'].replace(' : ', ': ')
|
||||
if 'publisher' in data:
|
||||
m = re.compile('(.+) : (.+), (\d{4})').findall(data['publisher'])
|
||||
if m:
|
||||
place, publisher, date = m[0]
|
||||
data['publisher'] = publisher
|
||||
data['date'] = date
|
||||
data['place'] = [place]
|
||||
elif ':' in data['publisher']:
|
||||
place, publisher = data['publisher'].split(':', 1)
|
||||
data['place'] = [place.strip()]
|
||||
data['publisher'] = publisher.split(',')[0].strip()
|
||||
m = re.compile('\d{4}').findall(publisher)
|
||||
if m:
|
||||
data['date'] = m[0]
|
||||
|
||||
if 'place' in data:
|
||||
if data['place'][0].startswith('['):
|
||||
data['place'] = [data['place'][0][1:]]
|
||||
if data['place'][0].endswith(']'):
|
||||
data['place'] = [data['place'][0][:-1]]
|
||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
||||
return data
|
||||
|
||||
info = lookup
|
||||
|
||||
def find(title, author, year):
|
||||
return []
|
||||
|
Loading…
Reference in a new issue