From a172e7b4b79d582d6ecbe23d1b20132861885a0c Mon Sep 17 00:00:00 2001 From: j Date: Sat, 2 Apr 2016 16:00:36 +0200 Subject: [PATCH] remove unused scrapers --- oml/meta/__init__.py | 5 - oml/meta/abebooks.py | 50 -- oml/meta/dewey.py | 962 ------------------------------------- oml/meta/loc.py | 102 ---- oml/meta/lookupbyisbn.py | 97 ---- oml/meta/marc_countries.py | 409 ---------------- oml/meta/openlibrary.py | 210 -------- oml/meta/worldcat.py | 116 ----- 8 files changed, 1951 deletions(-) delete mode 100644 oml/meta/abebooks.py delete mode 100644 oml/meta/dewey.py delete mode 100644 oml/meta/loc.py delete mode 100644 oml/meta/lookupbyisbn.py delete mode 100644 oml/meta/marc_countries.py delete mode 100644 oml/meta/openlibrary.py delete mode 100644 oml/meta/worldcat.py diff --git a/oml/meta/__init__.py b/oml/meta/__init__.py index b606a76..329805e 100644 --- a/oml/meta/__init__.py +++ b/oml/meta/__init__.py @@ -5,11 +5,6 @@ import stdnum.isbn import ox -from . import abebooks -from . import loc -from . import lookupbyisbn -from . import openlibrary -from . import worldcat from . import google from . import duckduckgo diff --git a/oml/meta/abebooks.py b/oml/meta/abebooks.py deleted file mode 100644 index 9cdc17e..0000000 --- a/oml/meta/abebooks.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - - -import re - -from ox.cache import read_url -import lxml.html - -import logging -logger = logging.getLogger(__name__) - -base = 'http://www.abebooks.com' - -def get_ids(key, value): - ids = [] - if key == 'isbn': - url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, value) - data = read_url(url, unicode=True) - urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data) - if urls: - ids.append((key, value)) - if ids: - logger.debug('get_ids %s %s => %s', key, value, ids) - return ids - -def lookup(id): - logger.debug('lookup %s', id) - data = {} - url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id) - html = read_url(url, unicode=True) - urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html) - keys = { - 'pubdate': 'date' - } - if urls: - details = '%s%s' % (base, urls[0]) - html = read_url(details, unicode=True) - doc = lxml.html.document_fromstring(html) - for e in doc.xpath("//*[contains(@id, 'biblio')]"): - key = e.attrib['id'].replace('biblio-', '') - value = e.text_content().strip() - k = keys.get(key, key) - if k == 'date' and value == 'Publication Date:': - value = '' - elif k == 'publisher' and value == 'Publisher:': - value = '' - if value and key not in ('bookcondition', 'binding', 'edition-amz'): - data[k] = value - return data diff --git a/oml/meta/dewey.py b/oml/meta/dewey.py deleted file mode 100644 index 6323efd..0000000 --- a/oml/meta/dewey.py +++ /dev/null @@ -1,962 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - -def get_classification(id): - name = '%s' % id - base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()]) - if base != '0': - base = base.lstrip('0') - if base in DEWEY: - name = '%s %s' % (name, DEWEY[base]) - return name - -DEWEY = { - "0": "Computer science, information & general works", - "1": "Philosophy & psychology", - "10": "Philosophy", - "100": "Philosophy, parapsychology and occultism, psychology", - "101": "Theory of philosophy", - "102": "Miscellany of philosophy", - "103": "Dictionaries, encyclopedias, concordances of philosophy", - "105": "Serial publications", - "106": "Organizations and management of philosophy", - "107": "Education, research, related topics of philosophy", - "108": "Groups of people", - "109": "Historical and collected persons treatment of philosophy", - "11": "Metaphysics", - "110": "Metaphysics", - "111": "Ontology", - "113": "Cosmology (Philosophy of nature)", - "114": "Space", - "115": "Time", - "116": "Change", - "117": "Structure", - "118": "Force and energy", - "119": "Number and quantity", - "12": "Epistemology", - "120": "Epistemology, causation & humankind", - "121": "Epistemology (Theory of knowledge)", - "122": "Causation", - "123": "Determinism and indeterminism", - "124": "Teleology", - "126": "The self", - "127": "The unconscious and the subconscious", - "128": "Humankind", - "129": "Origin and destiny of individual souls", - "13": "Parapsychology & occultism", - "130": "Parapsychology and occultism", - "131": "Parapsychological and occult techniques for achieving well-being, happiness, success", - "133": "Specific topics in parapsychology & occultism", - "135": "Dreams and mysteries", - "137": "Divinatory graphology", - "138": "Physiognomy", - "139": "Phrenology", - "14": "Philosophical schools of thought", - "140": "Specific philosophical schools", - "141": "Idealism & related systems", - "142": "Critical philosophy", - "143": "Bergsonism and intuitionism", - "144": "Humanism and related systems and doctrines", - "145": "Sensationalism", - "146": "Naturalism and related systems and doctrines", - "147": "Pantheism and related systems and doctrines", - "148": "Dogmatism, eclecticism, liberalism, syncretism, traditionalism", - "149": "Other philosophical systems", - "15": "Psychology", - "150": "Psychology", - "152": "Sensory perception, movement, emotions, physiological drives", - "153": "Conscious mental processes and intelligence", - "154": "Subconscious and altered states and processes", - "155": "Differential and developmental psychology", - "156": "Comparative psychology", - "158": "Applied psychology", - "16": "Philosophical logic", - "160": "Logic", - "161": "Induction", - "162": "Deduction", - "165": "Fallacies and sources of error", - "166": "Syllogisms", - "167": "Hypotheses", - "168": "Argument and persuasion", - "169": "Analogy", - "17": "Ethics", - "170": "Ethics", - "171": "Ethical systems", - "172": "Political ethics", - "173": "Ethics of family relationships", - "174": "Occupational ethics", - "175": "Ethics of recreation, leisure, public performances, communication", - "176": "Ethics of sex and reproduction", - "177": "Ethics of social relations", - "178": "Ethics of consumption", - "179": "Other ethical norms", - "18": "Ancient, medieval & eastern philosophy", - "180": "Ancient, medieval, eastern philosophy", - "181": "Eastern philosophy", - "182": "Pre-Socratic Greek philosophies", - "183": "Sophistic, Socratic, related Greek philosophies", - "184": "Platonic philosophy", - "185": "Aristotelian philosophy", - "186": "Skeptic and Neoplatonic philosophies", - "187": "Epicurean philosophy", - "188": "Stoic philosophy", - "189": "Medieval western philosophy", - "19": "Modern western philosophy", - "190": "Modern western and other noneastern philosophy", - "191": "United States and Canada", - "192": "Philosophy of British Isles", - "193": "Philosophy of Germany and Austria", - "194": "Philosophy of France", - "195": "Philosophy of Italy", - "196": "Philosophy of Spain and Portugal", - "197": "Philosophy of Russia", - "198": "Philosophy of Scandinavia and Finland", - "199": "Philosophy in other geographic areas", - "2": "Religion", - "20": "Religion", - "200": "Religion", - "201": "Religious mythology, general classes of religion, interreligious relations and attitudes, social theology", - "202": "Doctrines", - "203": "Public worship and other practices", - "204": "Religious experience, life, practice", - "205": "Religious ethics", - "206": "Leaders & organization", - "207": "Missions & religious education", - "208": "Sources", - "209": "Sects and reform movements", - "21": "Philosophy & theory of religion", - "210": "Philosophy & theory of religion", - "211": "Concepts of God", - "212": "Existence of God, ways of knowing God, attributes of God", - "213": "Creation", - "214": "Theodicy", - "215": "Science and religion", - "218": "Humankind", - "22": "The Bible", - "220": "Bible", - "221": "Old Testament (Tanakh)", - "222": "Historical books of Old Testament", - "223": "Poetic books of Old Testament", - "224": "Prophetic books of Old Testament", - "225": "New Testament", - "226": "Gospels and Acts", - "227": "Epistles", - "228": "Revelation (Apocalypse)", - "229": "Apocrypha & pseudepigrapha", - "23": "Christianity", - "230": "Christianity    Christian theology", - "231": "God", - "232": "Jesus Christ and his family", - "233": "Humankind", - "234": "Salvation and grace", - "235": "Spiritual beings", - "236": "Eschatology", - "238": "Creeds, confessions of faith, covenants, catechisms", - "239": "Apologetics and polemics", - "24": "Christian practice & observance", - "240": "Christian moral & devotional theology", - "241": "Christian ethics", - "242": "Devotional literature", - "243": "Evangelistic writings for individuals and families", - "246": "Use of art in Christianity", - "247": "Church furnishings and related articles", - "248": "Christian experience, practice, life", - "249": "Christian observances in family life", - "25": "Christian pastoral practice & religious orders", - "250": "Local Christian church and Christian religious orders", - "251": "Preaching (Homiletics)", - "252": "Texts of sermons", - "253": "Pastoral office and work (Pastoral theology)", - "254": "Parish administration", - "255": "Religious congregations & orders", - "259": "Pastoral care of specific kinds of persons", - "26": "Christian organization, social work & worship", - "260": "Christian social and ecclesiastical theology", - "261": "Social theology and interreligious relations and attitudes", - "262": "Ecclesiology", - "263": "Days, times & places of observance", - "264": "Public worship", - "265": "Sacraments, other rites and acts", - "266": "Missions", - "267": "Associations for religious work", - "268": "Religious education", - "269": "Spiritual renewal", - "27": "History of Christianity", - "270": "History of Christianity & Christian church", - "271": "Religious congregations and orders in church history", - "272": "Persecutions in general church history", - "273": "Doctrinal controversies and heresies in general church history", - "274": "Christianity in Europe", - "275": "History of Christianity in Asia", - "276": "Christianity in Africa", - "277": "Christianity in North America", - "278": "Christianity in South America", - "279": "Christianity in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica", - "28": "Christian denominations", - "280": "Denominations and sects of Christian church", - "281": "Early church and Eastern churches", - "282": "Roman Catholic Church", - "283": "Anglican churches", - "284": "Protestant denominations of Continental origin and related bodies", - "285": "Presbyterian churches, Reformed churches centered in America, Congregational churches, Puritanism", - "286": "Baptist, Restoration movement, Adventist churches", - "287": "Methodist churches; churches related to Methodism", - "289": "Other denominations & sects", - "29": "Other religions", - "290": "Other religions", - "292": "Classical religion (Greek and Roman religion)", - "293": "Germanic religion", - "294": "Religions of Indic origin", - "295": "Zoroastrianism (Mazdaism, Parseeism)", - "296": "Judaism", - "297": "Islam, Babism, Bahai Faith", - "299": "Religions not provided for elsewhere", - "3": "Social sciences", - "30": "Social sciences, sociology & anthropology", - "300": "Social sciences", - "301": "Sociology and anthropology", - "302": "Social interaction", - "303": "Social processes", - "304": "Factors affecting social behavior", - "305": "Groups of people", - "306": "Culture and institutions", - "307": "Communities", - "31": "Statistics", - "310": "Collections of general statistics", - "314": "General statistics of Europe", - "315": "General statistics of Asia", - "316": "General statistics of Africa", - "317": "General statistics of North America", - "318": "General statistics of South America", - "319": "General statistics of other parts of the world    Of Pacific Ocean islands", - "32": "Political science", - "320": "Political science (Politics and government)", - "321": "Systems of governments and states", - "322": "Relation of state to organized groups", - "323": "Civil and political rights", - "324": "The political process", - "325": "International migration and colonization", - "326": "Slavery and emancipation", - "327": "International relations", - "328": "The legislative process", - "33": "Economics", - "330": "Economics", - "331": "Labor economics", - "332": "Financial economics", - "333": "Economics of land and energy", - "334": "Cooperatives", - "335": "Socialism and related systems", - "336": "Public finance", - "337": "International economics", - "338": "Production", - "339": "Macroeconomics and related topics", - "34": "Law", - "340": "Law", - "341": "Law of nations", - "342": "Constitutional and administrative law", - "343": "Military, defense, public property, public finance, tax, commerce (trade), industrial law", - "344": "Labor, social, education & cultural law", - "345": "Criminal law", - "346": "Private law", - "347": "Procedure and courts", - "348": "Laws, regulations, cases", - "349": "Law of specific jurisdictions, areas, socioeconomic regions, regional intergovernmental organizations", - "35": "Public administration & military science", - "350": "Public administration and military science", - "351": "Public administration", - "352": "General considerations of public administration", - "353": "Specific fields of public administration", - "354": "Public administration of economy and environment", - "355": "Military science", - "356": "Foot forces and warfare", - "357": "Mounted forces & warfare", - "358": "Air and other specialized forces and warfare; engineering and related services", - "359": "Sea forces and warfare", - "36": "Social problems & social services", - "360": "Social problems & social services", - "361": "Social problems & social welfare in general", - "362": "Social welfare problems and services", - "363": "Other social problems and services", - "364": "Criminology", - "365": "Penal and related institutions", - "366": "Secret associations and societies", - "367": "General clubs", - "368": "Insurance", - "369": "Miscellaneous kinds of associations", - "37": "Education", - "370": "Education", - "371": "Schools and their activities; special education", - "372": "Primary education (Elementary education)", - "373": "Secondary education", - "374": "Adult education", - "375": "Curricula", - "378": "Higher education (Tertiary education)", - "379": "Public policy issues in education", - "38": "Commerce, communications & transportation", - "380": "Commerce, communications, transportation", - "381": "Commerce (Trade)", - "382": "International commerce (Foreign trade)", - "383": "Postal communication", - "384": "Communications", - "385": "Railroad transportation", - "386": "Inland waterway & ferry transportation", - "387": "Water, air & space transportation", - "388": "Transportation", - "389": "Metrology and standardization", - "39": "Customs, etiquette & folklore", - "390": "Customs, etiquette, folklore", - "391": "Costume and personal appearance", - "392": "Customs of life cycle and domestic life", - "393": "Death customs", - "394": "General customs", - "395": "Etiquette (Manners)", - "398": "Folklore", - "399": "Customs of war and diplomacy", - "4": "Language", - "40": "Language", - "400": "Language", - "401": "Philosophy and theory; international languages", - "402": "Miscellany", - "403": "Dictionaries, encyclopedias, concordances", - "404": "Special topics of language", - "405": "Serial publications", - "406": "Organizations and management", - "407": "Education, research & related topics", - "408": "Groups of people", - "409": "Geographic treatment and biography", - "41": "Linguistics", - "410": "Linguistics", - "411": "Writing systems", - "412": "Etymology of standard forms of languages", - "413": "Dictionaries of standard forms of languages", - "414": "Phonology & phonetics", - "415": "Grammar of standard forms of languages", - "417": "Dialectology and historical linguistics", - "418": "Standard usage (Prescriptive linguistics)", - "419": "Sign languages", - "42": "English & Old English languages", - "420": "English & Old English languages", - "421": "Writing system, phonology, phonetics of standard English", - "422": "Etymology of standard English", - "423": "Dictionaries of standard English", - "425": "Grammar of standard English", - "427": "Historical and geographic variations, modern nongeographic variations of English", - "428": "Standard English usage (Prescriptive linguistics)", - "429": "Old English (Anglo-Saxon)", - "43": "German & related languages", - "430": "German & related languages", - "431": "German writing systems & phonology", - "432": "Etymology of standard German", - "433": "Dictionaries of standard German", - "435": "Grammar of standard German", - "437": "Historical and geographic variations, modern nongeographic variations of German", - "438": "Standard German usage", - "439": "Other Germanic languages", - "44": "French & related languages", - "440": "Romance languages    French", - "441": "Writing systems, phonology, phonetics of standard French", - "442": "Etymology of standard French", - "443": "Dictionaries of standard French", - "445": "Grammar of standard French", - "447": "Historical and geographic variations, modern nongeographic variations of French", - "448": "Standard French usage (Prescriptive linguistics)", - "449": "Occitan, Catalan, Franco-Provençal", - "45": "Italian, Romanian & related languages", - "450": "Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican", - "451": "Writing systems, phonology, phonetics of standard Italian", - "452": "Etymology of standard Italian", - "453": "Dictionaries of standard Italian", - "455": "Grammar of standard Italian", - "457": "Historical and geographic variations, modern nongeographic variations of Italian", - "458": "Standard Italian usage", - "459": "Sardinian", - "46": "Spanish, Portuguese, Galician", - "460": "Spanish, Portuguese, Galician", - "461": "Writing systems, phonology, phonetics of standard Spanish", - "462": "Etymology of standard Spanish", - "463": "Dictionaries of standard Spanish", - "465": "Grammar of standard Spanish", - "467": "Historical and geographic variations, modern nongeographic variations of Spanish", - "468": "Standard Spanish usage", - "469": "Portuguese", - "47": "Latin & Italic languages", - "470": "Italic languages    Latin", - "471": "Writing systems, phonology, phonetics of classical Latin", - "472": "Classical Latin etymology", - "473": "Dictionaries of classical Latin", - "475": "Grammar of classical Latin", - "477": "Old, postclassical & Vulgar Latin", - "478": "Classical Latin usage (Prescriptive linguistics)", - "479": "Other Italic languages", - "48": "Classical & modern Greek languages", - "480": "Classical Greek and related Hellenic languages", - "481": "Writing systems, phonology, phonetics of classical Greek", - "482": "Etymology of classical Greek", - "483": "Dictionaries of classical Greek", - "485": "Grammar of classical Greek", - "487": "Preclassical and postclassical Greek", - "488": "Classical Greek usage (Prescriptive linguistics)", - "489": "Other Hellenic languages", - "49": "Other languages", - "490": "Other languages", - "491": "East Indo-European and Celtic languages", - "492": "Afro-Asiatic languages", - "493": "Non-Semitic Afro-Asiatic languages", - "494": "Altaic, Uralic, Hyperborean, Dravidian languages, miscellaneous languages of south Asia", - "495": "Languages of east and southeast Asia", - "496": "African languages", - "497": "North American native languages", - "498": "South American native languages", - "499": "Austronesian & other languages", - "5": "Science", - "50": "Science", - "500": "Science", - "501": "Philosophy & theory", - "502": "Miscellany", - "503": "Dictionaries, encyclopedias, concordances", - "505": "Serial publications", - "506": "Organizations and management", - "507": "Education, research, related topics", - "508": "Natural history", - "509": "Historical, geographic & persons treatment", - "51": "Mathematics", - "510": "Mathematics", - "511": "General principles of mathematics", - "512": "Algebra", - "513": "Arithmetic", - "514": "Topology", - "515": "Analysis", - "516": "Geometry", - "518": "Numerical analysis", - "519": "Probabilities and applied mathematics", - "52": "Astronomy", - "520": "Astronomy and allied sciences", - "521": "Celestial mechanics", - "522": "Techniques, procedures, apparatus, equipment, materials", - "523": "Specific celestial bodies and phenomena", - "525": "Earth (Astronomical geography)", - "526": "Mathematical geography", - "527": "Celestial navigation", - "528": "Ephemerides", - "529": "Chronology", - "53": "Physics", - "530": "Physics", - "531": "Classical mechanics", - "532": "Fluid mechanics; liquid mechanics", - "533": "Pneumatics (Gas mechanics)", - "534": "Sound and related vibrations", - "535": "Light and infrared and ultraviolet phenomena", - "536": "Heat", - "537": "Electricity & electronics", - "538": "Magnetism", - "539": "Modern physics", - "54": "Chemistry", - "540": "Chemistry and allied sciences", - "541": "Physical chemistry", - "542": "Techniques, equipment & materials", - "543": "Analytical chemistry", - "546": "Inorganic chemistry", - "547": "Organic chemistry", - "548": "Crystallography", - "549": "Mineralogy", - "55": "Earth sciences & geology", - "550": "Earth sciences", - "551": "Geology, hydrology, meteorology", - "552": "Petrology", - "553": "Economic geology", - "554": "Earth sciences of Europe", - "555": "Earth sciences of Asia", - "556": "Earth sciences of Africa", - "557": "Earth sciences of North America", - "558": "Earth sciences of South America", - "559": "Earth sciences of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds", - "56": "Fossils & prehistoric life", - "560": "Paleontology", - "561": "Paleobotany; fossil microorganisms", - "562": "Fossil invertebrates", - "563": "Miscellaneous fossil marine and seashore invertebrates", - "564": "Fossil Mollusca and Molluscoidea", - "565": "Fossil Arthropoda", - "566": "Fossil Chordata", - "567": "Fossil cold-blooded vertebrates", - "568": "Fossil birds", - "569": "Fossil mammals", - "57": "Biology", - "570": "Life sciences    Biology", - "571": "Physiology and related subjects", - "572": "Biochemistry", - "573": "Specific physiological systems in animals, regional histology and physiology in animals", - "575": "Specific parts of and physiological systems in plants", - "576": "Genetics and evolution", - "577": "Ecology", - "578": "Natural history of organisms and related subjects", - "579": "Microorganisms, fungi, algae", - "58": "Plants (Botany)", - "580": "Plants", - "581": "Specific topics in natural history of plants", - "582": "Plants noted for specific vegetative characteristics and flowers", - "583": "Dicotyledons", - "584": "Monocotyledons", - "585": "Pinophyta (Gymnosperms)", - "586": "Seedless plants", - "587": "Vascular seedless plants", - "588": "Bryophyta", - "59": "Animals (Zoology)", - "590": "Animals", - "591": "Specific topics in natural history", - "592": "Invertebrates", - "593": "Miscellaneous marine and seashore invertebrates", - "594": "Mollusks & molluscoids", - "595": "Arthropoda", - "596": "Chordata", - "597": "Cold-blooded vertebrates", - "598": "Aves (Birds)", - "599": "Mammalia (Mammals)", - "6": "Technology", - "60": "Technology", - "600": "Technology", - "601": "Philosophy and theory", - "602": "Miscellany", - "603": "Dictionaries & encyclopedias", - "604": "Technical drawing, hazardous materials technology; groups of people", - "605": "Serial publications", - "606": "Organizations", - "607": "Education, research, related topics", - "608": "Patents", - "609": "Historical, geographic, persons treatment", - "61": "Medicine & health", - "610": "Medicine and health", - "611": "Human anatomy, cytology, histology", - "612": "Human physiology", - "613": "Personal health and safety", - "614": "Forensic medicine; incidence of injuries, wounds, disease; public preventive medicine", - "615": "Pharmacology and therapeutics", - "616": "Diseases", - "617": "Surgery, regional medicine, dentistry, ophthalmology, otology, audiology", - "618": "Other branches of medicine    Gynecology and obstetrics", - "62": "Engineering", - "620": "Engineering and allied operations", - "621": "Applied physics", - "622": "Mining and related operations", - "623": "Military and nautical engineering", - "624": "Civil engineering", - "625": "Engineering of railroads & roads", - "627": "Hydraulic engineering", - "628": "Sanitary engineering", - "629": "Other branches of engineering", - "63": "Agriculture", - "630": "Agriculture and related technologies", - "631": "Specific techniques; apparatus, equipment, materials", - "632": "Plant injuries, diseases, pests", - "633": "Field and plantation crops", - "634": "Orchards, fruits, forestry", - "635": "Garden crops (Horticulture)", - "636": "Animal husbandry", - "637": "Processing dairy & related products", - "638": "Insect culture", - "639": "Hunting, fishing, conservation, related technologies", - "64": "Home & family management", - "640": "Home and family management", - "641": "Food & drink", - "642": "Meals and table service", - "643": "Housing and household equipment", - "644": "Household utilities", - "645": "Household furnishings", - "646": "Sewing, clothing, management of personal and family life", - "647": "Management of public households (Institutional housekeeping)", - "648": "Housekeeping", - "649": "Child rearing; home care of people with disabilities and illnesses", - "65": "Management & public relations", - "650": "Management and auxiliary services", - "651": "Office services", - "652": "Processes of written communication", - "653": "Shorthand", - "657": "Accounting", - "658": "General management", - "659": "Advertising and public relations", - "66": "Chemical engineering", - "660": "Chemical engineering and related technologies", - "661": "Technology of industrial chemicals", - "662": "Technology of explosives, fuels, related products", - "663": "Beverage technology", - "664": "Food technology", - "665": "Technology of industrial oils, fats, waxes, gases", - "666": "Ceramic and allied technologies", - "667": "Cleaning, color, coating, related technologies", - "668": "Technology of other organic products", - "669": "Metallurgy", - "67": "Manufacturing", - "670": "Manufacturing", - "671": "Metalworking processes and primary metal products", - "672": "Iron, steel, other iron alloys", - "673": "Nonferrous metals", - "674": "Lumber processing, wood products, cork", - "675": "Leather and fur processing", - "676": "Pulp and paper technology", - "677": "Textiles", - "678": "Elastomers and elastomer products", - "679": "Other products of specific materials", - "68": "Manufacture for specific uses", - "680": "Manufacture of products for specific uses", - "681": "Precision instruments and other devices", - "682": "Small forge work (Blacksmithing)", - "683": "Hardware and household appliances", - "684": "Furnishings and home workshops", - "685": "Leather and fur goods, and related products", - "686": "Printing and related activities", - "687": "Clothing and accessories", - "688": "Other final products & packaging", - "69": "Construction of buildings", - "690": "Buildings", - "691": "Building materials", - "692": "Auxiliary construction practices", - "693": "Construction in specific types of materials and for specific purposes", - "694": "Wood construction", - "695": "Roof covering", - "696": "Utilities", - "697": "Heating, ventilating & air-conditioning", - "698": "Detail finishing", - "7": "Arts & recreation", - "70": "Arts", - "700": "Arts", - "701": "Philosophy and theory of fine and decorative arts", - "702": "Miscellany of fine and decorative arts", - "703": "Dictionaries, encyclopedias, concordances of fine and decorative arts", - "704": "Special topics in fine and decorative arts", - "705": "Serial publications of fine and decorative arts", - "706": "Organizations and management of fine and decorative arts", - "707": "Education, research, related topics of fine and decorative arts", - "708": "Galleries, museums, private collections of fine and decorative arts", - "709": "Historical, geographic & persons treatment", - "71": "Area planning & landscape architecture", - "710": "Area planning and landscape architecture", - "711": "Area planning (Civic art)", - "712": "Landscape architecture (Landscape design)", - "713": "Landscape architecture of trafficways", - "714": "Water features in landscape architecture", - "715": "Woody plants in landscape architecture", - "716": "Herbaceous plants in landscape architecture", - "717": "Structures in landscape architecture", - "718": "Landscape design of cemeteries", - "719": "Natural landscapes", - "72": "Architecture", - "720": "Architecture", - "721": "Architectural materials and structural elements", - "722": "Architecture from earliest times to ca. 300", - "723": "Architecture from ca. 300 to 1399", - "724": "Architecture from 1400", - "725": "Public structures", - "726": "Buildings for religious purposes", - "727": "Buildings for educational and research purposes", - "728": "Residential and related buildings", - "729": "Design and decoration of structures and accessories", - "73": "Sculpture, ceramics & metalwork", - "730": "Plastic arts    Sculpture", - "731": "Processes, forms & subjects of sculpture", - "732": "Sculpture from earliest times to ca. 500, sculpture of nonliterate peoples", - "733": "Greek, Etruscan, Roman sculpture", - "734": "Sculpture from ca. 500 to 1399", - "735": "Sculpture from 1400", - "736": "Carving and carvings", - "737": "Numismatics and sigillography", - "738": "Ceramic arts", - "739": "Art metalwork", - "74": "Graphic arts & decorative arts", - "740": "Graphic arts", - "741": "Drawing and drawings", - "742": "Perspective in drawing", - "743": "Drawing and drawings by subject", - "745": "Decorative arts", - "746": "Textile arts", - "747": "Interior decoration", - "748": "Glass", - "749": "Furniture and accessories", - "75": "Painting", - "750": "Painting and paintings", - "751": "Techniques, procedures, apparatus, equipment, materials, forms", - "752": "Color", - "753": "Symbolism, allegory, mythology, legend", - "754": "Genre paintings", - "755": "Religion", - "757": "Human figures", - "758": "Nature, architectural subjects and cityscapes, other specific subjects", - "759": "History, geographic treatment, biography", - "76": "Printmaking & prints", - "760": "Printmaking and prints", - "761": "Relief processes (Block printing)", - "763": "Lithographic processes (Planographic processes)", - "764": "Chromolithography and serigraphy", - "765": "Metal engraving", - "766": "Mezzotinting, aquatinting, related processes", - "767": "Etching and drypoint", - "769": "Prints", - "77": "Photography, computer art, film, video", - "770": "Photography, computer art, cinematography, videography", - "771": "Techniques, procedures, apparatus, equipment, materials", - "772": "Metallic salt processes", - "773": "Pigment processes of printing", - "774": "Holography", - "775": "Digital photography", - "776": "Computer art (Digital art)", - "777": "Cinematography and videography", - "778": "Specific fields and special kinds of photography", - "779": "Photographs", - "78": "Music", - "780": "Music", - "781": "General principles & musical forms", - "782": "Vocal music", - "783": "Music for single voices", - "784": "Instruments & instrumental ensembles", - "785": "Ensembles with only one instrument per part", - "786": "Keyboard, mechanical, electrophonic, percussion instruments", - "787": "Stringed instruments (Chordophones)", - "788": "Wind instruments (Aerophones)", - "79": "Sports, games & entertainment", - "790": "Recreational and performing arts", - "791": "Public performances", - "792": "Stage presentations", - "793": "Indoor games and amusements", - "794": "Indoor games of skill", - "795": "Games of chance", - "796": "Athletic and outdoor sports and games", - "797": "Aquatic & air sports", - "798": "Equestrian sports and animal racing", - "799": "Fishing, hunting, shooting", - "8": "Literature", - "80": "Literature, rhetoric & criticism", - "800": "Literature (Belles-lettres) and rhetoric", - "801": "Philosophy and theory", - "802": "Miscellany", - "803": "Dictionaries, encyclopedias, concordances", - "805": "Serial publications", - "806": "Organizations and management", - "807": "Education, research, related topics", - "808": "Rhetoric and collections of literary texts from more than two literatures", - "809": "History, description, critical appraisal of more than two literatures", - "81": "American literature in English", - "810": "American literature in English", - "811": "American poetry in English", - "812": "American drama in English", - "813": "American fiction in English", - "814": "American essays in English", - "815": "American speeches in English", - "816": "American letters in English", - "817": "American humor and satire in English", - "818": "American miscellaneous writings", - "82": "English & Old English literatures", - "820": "English and Old English (Anglo-Saxon) literatures", - "821": "English poetry", - "822": "English drama", - "823": "English fiction", - "824": "English essays", - "825": "English speeches", - "826": "English letters", - "827": "English humor and satire", - "828": "English miscellaneous writings", - "829": "Old English (Anglo-Saxon) literature", - "83": "German & related literatures", - "830": "Literatures of Germanic languages    German literature", - "831": "German poetry", - "832": "German drama", - "833": "German fiction", - "834": "German essays", - "835": "German speeches", - "836": "German letters", - "837": "German humor & satire", - "838": "German miscellaneous writings", - "839": "Other Germanic literatures", - "84": "French & related literatures", - "840": "French literature and literatures of related Romance languages", - "841": "French poetry", - "842": "French drama", - "843": "French fiction", - "844": "French essays", - "845": "French speeches", - "846": "French letters", - "847": "French humor & satire", - "848": "French miscellaneous writings", - "849": "Occitan, Catalan, Franco-Provençal literatures", - "85": "Italian, Romanian & related literatures", - "850": "Literatures of Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican languages", - "851": "Italian poetry", - "852": "Italian drama", - "853": "Italian fiction", - "854": "Italian essays", - "855": "Italian speeches", - "856": "Italian letters", - "857": "Italian humor and satire", - "858": "Italian miscellaneous writings", - "859": "Literatures of Romanian, Rhaetian, Sardinian, Corsican languages", - "86": "Spanish, Portuguese, Galician literatures", - "860": "Spanish & Portuguese literatures", - "861": "Spanish poetry", - "862": "Spanish drama", - "863": "Spanish fiction", - "864": "Spanish essays", - "865": "Spanish speeches", - "866": "Spanish letters", - "867": "Spanish humor and satire", - "868": "Spanish miscellaneous writings", - "869": "Literatures of Portuguese and Galician languages", - "87": "Latin & Italic literatures", - "870": "Latin & Italic literatures", - "871": "Latin poetry", - "872": "Latin dramatic poetry and drama", - "873": "Latin epic poetry and fiction", - "874": "Latin lyric poetry", - "875": "Latin speeches", - "876": "Latin letters", - "877": "Latin humor and satire", - "878": "Latin miscellaneous writings", - "879": "Literatures of other Italic languages", - "88": "Classical & modern Greek literatures", - "880": "Literatures of Hellenic languages    Classical Greek literature", - "881": "Classical Greek poetry", - "882": "Classical Greek dramatic poetry and drama", - "883": "Classical Greek epic poetry and fiction", - "884": "Classical Greek lyric poetry", - "885": "Classical Greek speeches", - "886": "Classical Greek letters", - "887": "Classical Greek humor and satire", - "888": "Classical Greek miscellaneous writings", - "889": "Modern Greek literature", - "89": "Other literatures", - "890": "Literatures of other specific languages and language families", - "891": "East Indo-European and Celtic literatures", - "892": "Afro-Asiatic literatures", - "893": "Non-Semitic Afro-Asiatic literatures", - "894": "Literatures of Altaic, Uralic, Hyperborean, Dravidian languages; literatures of miscellaneous languages of south Asia", - "895": "Literatures of East and Southeast Asia", - "896": "African literatures", - "897": "North American native literatures", - "898": "Literatures of South American native languages", - "899": "Literatures of non-Austronesian languages of Oceania, of Austronesian languages, of miscellaneous languages", - "9": "History & geography", - "90": "History", - "900": "History, geography, and auxiliary disciplines", - "901": "Philosophy and theory of history", - "902": "Miscellany", - "903": "Dictionaries, encyclopedias, concordances of history", - "904": "Collected accounts of events", - "905": "Serial publications of history", - "906": "Organizations and management of history", - "907": "Education, research & related topics", - "908": "History with respect to groups of people", - "909": "World history", - "91": "Geography & travel", - "910": "Geography and travel", - "911": "Historical geography", - "912": "Graphic representations of surface of earth and of extraterrestrial worlds", - "913": "Geography of and travel in ancient world", - "914": "Geography of and travel in Europe", - "915": "Geography of and travel in Asia", - "916": "Geography of and travel in Africa", - "917": "Geography of and travel in North America", - "918": "Geography of & travel in South America", - "919": "Geography of and travel in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica and on extraterrestrial worlds", - "92": "Biography & genealogy", - "920": "Biography, genealogy, insignia", - "929": "Genealogy, names, insignia", - "93": "History of ancient world (to ca. 499)", - "930": "History of ancient world to ca. 499", - "931": "China to 420", - "932": "Egypt to 640", - "933": "Palestine to 70", - "934": "South Asia to 647", - "935": "Mesopotamia to 637 and Iranian Plateau to 637", - "936": "Europe north and west of Italian Peninsula to ca. 499", - "937": "Italian Peninsula to 476 and adjacent territories to 476", - "938": "Greece to 323", - "939": "Other parts of ancient world to ca. 640", - "94": "History of Europe", - "940": "History of Europe", - "941": "British Isles", - "942": "England and Wales", - "943": "Germany and neighboring central European countries", - "944": "France and Monaco", - "945": "Italy, San Marino, Vatican City, Malta", - "946": "Spain, Andorra, Gibraltar, Portugal", - "947": "Russia and neighboring east European countries", - "948": "Scandinavia", - "949": "Other parts of Europe", - "95": "History of Asia", - "950": "History of Asia", - "951": "China and adjacent areas", - "952": "Japan", - "953": "Arabian Peninsula and adjacent areas", - "954": "India and neighboring south Asian countries", - "955": "Iran", - "956": "Middle East (Near East)", - "957": "Siberia (Asiatic Russia)", - "958": "Central Asia", - "959": "Southeast Asia", - "96": "History of Africa", - "960": "History of Africa", - "961": "Tunisia & Libya", - "962": "Egypt, Sudan, South Sudan", - "963": "Ethiopia and Eritrea", - "964": "Northwest African coast & offshore islands", - "965": "Algeria", - "966": "West Africa and offshore islands", - "967": "Central Africa and offshore islands", - "968": "Republic of South Africa and neighboring southern African countries", - "969": "South Indian Ocean islands", - "97": "History of North America", - "970": "History of North America", - "971": "Canada", - "972": "Middle America; Mexico", - "973": "United States", - "974": "Northeastern United States (New England and Middle Atlantic states)", - "975": "Southeastern United States (South Atlantic states)", - "976": "South central United States    Gulf Coast states", - "977": "North central United States", - "978": "Western United States", - "979": "Great Basin and Pacific Slope region of United States", - "98": "History of South America", - "980": "History of South America", - "981": "Brazil", - "982": "Argentina", - "983": "Chile", - "984": "Bolivia", - "985": "Peru", - "986": "Colombia and Ecuador", - "987": "Venezuela", - "988": "Guiana", - "989": "Paraguay and Uruguay", - "99": "History of other areas", - "990": "History of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds", - "993": "New Zealand", - "994": "Australia", - "995": "New Guinea and neighboring countries of Melanesia", - "996": "Other parts of Pacific    Polynesia", - "997": "Atlantic Ocean islands", - "998": "Arctic islands and Antarctica", - "999": "Extraterrestrial worlds" -} - - -if __name__ == '__main__': - import json - import re - from ox.cache import read_url - - dewey = {} - for i in range(0, 1000): - url = 'http://dewey.info/class/%s/about.en.json' % i - print(url) - data = json.loads(read_url(url).decode('utf-8')) - for d in list(data.values()): - if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d: - value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value'] - dewey[str(i)] = value - break - - data = json.dumps(dewey, indent=4, ensure_ascii=False, sort_keys=True).encode('utf-8') - with open(__file__) as f: - pydata = f.read() - pydata = re.sub( - re.compile('\nDEWEY = {.*?}\n\n', re.DOTALL), - '\nDEWEY = %s\n\n' % data, pydata) - - with open(__file__, 'w') as f: - f.write(pydata) diff --git a/oml/meta/loc.py b/oml/meta/loc.py deleted file mode 100644 index f74822b..0000000 --- a/oml/meta/loc.py +++ /dev/null @@ -1,102 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - - -from ox.cache import read_url -import ox -import re -import xml.etree.ElementTree as ET - -from .dewey import get_classification -from .marc_countries import COUNTRIES -from .utils import normalize_isbn - -import logging -logger = logging.getLogger(__name__) - - -def get_ids(key, value): - ids = [] - if key == 'isbn': - url = 'http://www.loc.gov/search/?q=%s&all=true' % value - html = ox.cache.read_url(url).decode('utf-8', 'ignore') - match = re.search('"http://lccn.loc.gov/(\d+)"', html) - if match: - ids.append(('lccn', match.group(1))) - elif key == 'lccn': - info = lookup(value) - for key in ('oclc', 'isbn'): - if key in info: - for value in info[key]: - ids.append((key, value)) - if ids: - logger.debug('get_ids %s %s => %s', key, value, ids) - return ids - -def lookup(id): - logger.debug('lookup %s', id) - ns = '{http://www.loc.gov/mods/v3}' - url = 'http://lccn.loc.gov/%s/mods' % id - info = { - 'lccn': [id] - } - try: - data = read_url(url).decode('utf-8') - mods = ET.fromstring(data) - except: - try: - data = read_url(url, timeout=0).decode('utf-8') - mods = ET.fromstring(data) - except: - logger.debug('lookup for %s url: %s failed', id, url, exc_info=True) - return info - - title = mods.findall(ns + 'titleInfo') - if not title: - return {} - info['title'] = ''.join([': ' + e.text.strip() if e.tag == ns + 'subTitle' else ' ' + e.text.strip() for e in title[0]]).strip() - origin = mods.findall(ns + 'originInfo') - if origin: - info['place'] = [] - for place in origin[0].findall(ns + 'place'): - terms = place.findall(ns + 'placeTerm') - if terms and terms[0].attrib['type'] == 'text': - e = terms[0] - info['place'].append(e.text) - elif terms and terms[0].attrib['type'] == 'code': - e = terms[0] - info['country'] = COUNTRIES.get(e.text, e.text) - publisher = [e.text for e in origin[0].findall(ns + 'publisher')] - if publisher: - info['publisher'] = publisher[0] - info['date'] = ''.join([e.text - for e in origin[0].findall(ns + 'dateIssued') if e.attrib.get('encoding') == 'marc']) - for i in mods.findall(ns + 'identifier'): - key = i.attrib['type'] - value = i.text - if key in ('oclc', 'lccn', 'isbn'): - if i.attrib['type'] == 'oclc': - value = value.replace('ocn', '').replace('ocm', '') - if i.attrib['type'] == 'isbn': - value = normalize_isbn(i.text) - if not key in info: - info[key] = [] - if value not in info[key]: - info[key].append(value) - for i in mods.findall(ns + 'classification'): - if i.attrib['authority'] == 'ddc': - info['classification'] = get_classification(i.text.split('/')[0]) - info['author'] = [] - for a in mods.findall(ns + 'name'): - if a.attrib.get('usage') == 'primary': - info['author'].append(' '.join([e.text for e in a.findall(ns + 'namePart') if not e.attrib.get('type') in ('date', )])) - info['author'] = [ox.normalize_name(a) for a in info['author']] - toc = mods.findall(ns + 'tableOfContents') - if toc: - info['description'] = toc[0].text.strip() - for key in list(info.keys()): - if not info[key]: - del info[key] - return info - -info = lookup diff --git a/oml/meta/lookupbyisbn.py b/oml/meta/lookupbyisbn.py deleted file mode 100644 index 92d3a0f..0000000 --- a/oml/meta/lookupbyisbn.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - - -import re - -from ox.cache import read_url -from ox import find_re, strip_tags, decode_html -import stdnum.isbn - -from .utils import find_isbns - -import logging -logger = logging.getLogger(__name__) - - -base = 'http://www.lookupbyisbn.com' - -def get_ids(key, value): - ids = [] - - def add_other_isbn(v): - if len(v) == 10: - ids.append(('isbn', stdnum.isbn.to_isbn13(v))) - if len(v) == 13 and v.startswith('978'): - ids.append(('isbn', stdnum.isbn.to_isbn10(v))) - - if key in ('isbn', 'asin'): - url = '%s/Search/Book/%s/1' % (base, value) - data = read_url(url).decode('utf-8') - m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data) - if m: - asin = m[0].split('/')[-3] - if stdnum.isbn.to_isbn10(asin) or not stdnum.isbn.is_valid(asin): - ids.append(('asin', asin)) - if key == 'isbn': - add_other_isbn(value) - if key == 'asin': - if stdnum.isbn.is_valid(value): - ids.append(('isbn', value)) - add_other_isbn(value) - else: - for isbn in amazon_lookup(value): - if stdnum.isbn.is_valid(isbn): - ids.append(('isbn', isbn)) - add_other_isbn(isbn) - if ids: - logger.debug('get_ids %s, %s => %s', key, value, ids) - return ids - -def lookup(id): - logger.debug('lookup %s', id) - r = { - 'asin': [id] - } - url = '%s/Lookup/Book/%s/%s/1' % (base, id, id) - logger.debug('%s', url) - data = read_url(url).decode('utf-8') - r["title"] = find_re(data, "

(.*?)

") - if r["title"] == 'Error!': - return {} - keys = { - 'author': 'Author(s)', - 'publisher': 'Publisher', - 'date': 'Publication date', - 'edition': 'Edition', - 'binding': 'Binding', - 'volume': 'Volume(s)', - 'pages': 'Pages', - } - for key in keys: - r[key] = find_re(data, '%s:(.*?)'% re.escape(keys[key])) - if r[key] == '--' or not r[key]: - del r[key] - if key == 'pages' and key in r: - r[key] = int(r[key]) - desc = find_re(data, '

Description:<\/h2>(.*?)

', ' ').replace('
', ' ').replace('
', ' ') - r['description'] = decode_html(strip_tags(desc)) - r['cover'] = find_re(data, 'Book cover').replace('._SL160_', '')
-    for key in r:
-        if isinstance(r[key], str):
-            r[key] = decode_html(strip_tags(r[key])).strip()
-    if 'author' in r and isinstance(r['author'], str) and r['author']:
-        r['author'] = [r['author']]
-    else:
-        r['author'] = []
-    if not r['author'] or r['author'][0].isupper():
-        del r['author']
-    if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
-        r['description'] = ''
-    return r
-
-def amazon_lookup(asin):
-    url = 'http://www.amazon.com/dp/%s' % asin
-    html = read_url(url, timeout=-1).decode('utf-8', 'ignore')
-    return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
diff --git a/oml/meta/marc_countries.py b/oml/meta/marc_countries.py
deleted file mode 100644
index 14d4016..0000000
--- a/oml/meta/marc_countries.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-
-COUNTRIES = {
-    .*?class="code">(.*?).*?(.*?)', re.DOTALL).findall(data) - ]) - - data = json.dumps(countries, indent=4, ensure_ascii=False).encode('utf-8') - with open(__file__) as f: - pydata = f.read() - pydata = re.sub( - re.compile('\nCOUNTRIES = {.*?}\n\n', re.DOTALL), - '\nCOUNTRIES = %s\n\n' % data, pydata) - - with open(__file__, 'w') as f: - f.write(pydata) diff --git a/oml/meta/openlibrary.py b/oml/meta/openlibrary.py deleted file mode 100644 index 7cf527c..0000000 --- a/oml/meta/openlibrary.py +++ /dev/null @@ -1,210 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - - -from datetime import datetime -from urllib.parse import urlencode -import json - -from ox.cache import read_url - -from .dewey import get_classification -from .marc_countries import COUNTRIES -from .utils import normalize_isbn - -import logging -logger = logging.getLogger(__name__) - - -KEYS = { - 'authors': 'author', - 'covers': 'cover', - 'dewey_decimal_class': 'classification', - 'isbn_10': 'isbn', - 'isbn_13': 'isbn', - 'lccn': 'lccn', - 'number_of_pages': 'pages', - 'languages': 'language', - 'oclc_numbers': 'oclc', - 'publish_country': 'country', - 'publish_date': 'date', - 'publishers': 'publisher', - 'publish_places': 'place', - 'series': 'series', - 'title': 'title', -} - -def find(query): - query = query.strip() - logger.debug('find %s', query) - r = api.search(query) - results = [] - ids = [b for b in r.get('result', []) if b.startswith('/books')] - books = api.get_many(ids).get('result', []) - for olid, value in books.items(): - olid = olid.split('/')[-1] - book = format(value) - book['olid'] = [olid] - book['primaryid'] = ['olid', olid] - results.append(book) - return results - - -def get_ids(key, value): - ids = [] - if key == 'olid': - data = lookup(value) - for id in ('isbn', 'lccn', 'oclc'): - if id in data: - for v in data[id]: - if (id, v) not in ids: - ids.append((id, v)) - elif key in ('isbn', 'oclc', 'lccn'): - logger.debug('get_ids %s %s', key, value) - if key == 'isbn': - key = 'isbn_%s'%len(value) - r = api.things({'type': '/type/edition', key: value}) - for b in r.get('result', []): - if b.startswith('/books'): - olid = b.split('/')[-1] - for kv in [('olid', olid)] + get_ids('olid', olid): - if kv not in ids: - ids.append(kv) - if ids: - logger.debug('get_ids %s %s => %s', key, value, ids) - return ids - -def lookup(id, return_all=False): - logger.debug('lookup %s', id) - info = api.get('/books/' + id).get('result', {}) - #url = 'https://openlibrary.org/books/%s.json' % id - #info = json.loads(read_url(url).decode('utf-8')) - data = format(info, return_all) - if 'olid' not in data: - data['olid'] = [] - if id not in data['olid']: - data['olid'] = [id] - logger.debug('lookup %s => %s', id, list(data.keys())) - return data - -def get_type(obj): - type_ = obj.get('type') - if isinstance(type_, dict): - type_ = type_['key'] - return type_ - -def parse_date(s): - #"January 1, 1998" - for pattern, fmt in (('%B %d, %Y', '%Y-%m-%d'), ('%B %Y', '%Y-%m')): - try: - d = datetime.strptime(s, pattern) - s = d.strftime(fmt) - return s - except: - pass - return s - -def format(info, return_all=False): - data = {} - if 'works' in info: - work = api.get(info['works'][0]['key'])['result'] - else: - work = None - for key in KEYS: - if key in info: - value = info[key] - if key == 'authors': - if work: - value = resolve_names([r['author'] - for r in work.get('authors', []) if get_type(r) == '/type/author_role']) - else: - value = resolve_names(value) - elif key == 'publish_country': - value = value.strip() - value = COUNTRIES.get(value, value) - elif key == 'covers': - value = 'https://covers.openlibrary.org/b/id/%s.jpg' % value[0] - elif key == 'languages': - value = resolve_names(value) - elif key in ('isbn_10', 'isbn_13'): - if not isinstance(value, list): - value = [value] - value = list(map(normalize_isbn, value)) - if KEYS[key] in data: - value = data[KEYS[key]] + value - elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'): - value = value[0] - if key == 'publish_date': - value = parse_date(value) - if key == 'publish_places': - for i, v in enumerate(value): - if v.startswith('['): - v = v[1:] - if v.endswith(']'): - v = v[:-1] - value[i] = v - data[KEYS[key]] = value - if 'subtitle' in info: - data['title'] += ' ' + info['subtitle'] - if 'classification' in data: - value = data['classification'] - if isinstance(value, list): - value = value[0] - data['classification'] = get_classification(value.split('/')[0]) - return data - -def resolve_names(objects, key='name'): - r = [] - data = api.get_many([k['key'] for k in objects]).get('result', {}) - for k, value in data.items(): - if 'location' in value and value.get('type', {}).get('key') == '/type/redirect': - value = api.get(value['location']).get('result', {}) - r.append(value[key]) - return r - -class API(object): - base = 'https://openlibrary.org/api' - - def _request(self, action, data, timeout=None): - for key in data: - if not isinstance(data[key], str): - data[key] = json.dumps(data[key]) - url = self.base + '/' + action + '?' + urlencode(data) - if timeout is None: - r = read_url(url).decode('utf-8') - if '504 Gateway Time-out' in r: - r = read_url(url, timeout=-1).decode('utf-8') - result = json.loads(r) - else: - r = read_url(url, timeout).decode('utf-8') - if '504 Gateway Time-out' in r: - r = read_url(url, timeout=-1).decode('utf-8') - result = json.loads(r) - if 'status' in result and result['status'] == 'error' or 'error' in result: - logger.info('FAILED %s %s', action, data) - logger.info('URL %s', url) - return result - - def get(self, key): - data = self._request('get', {'key': key}) - return data - - def get_many(self, keys): - data = self._request('get_many', {'keys': keys}) - return data - - def search(self, query): - if isinstance(query, str): - query = { - 'query': query - } - data = self._request('search', {'q': query}) - if 'status' in data and data['status'] == 'error': - logger.info('FAILED %s', query) - return data - - def things(self, query): - data = self._request('things', {'query': query}) - return data - -api = API() diff --git a/oml/meta/worldcat.py b/oml/meta/worldcat.py deleted file mode 100644 index 619143a..0000000 --- a/oml/meta/worldcat.py +++ /dev/null @@ -1,116 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 - - -import re -import hashlib - -from ox.cache import read_url -import lxml.html -import stdnum.isbn - -from .utils import normalize_isbn - -import logging -logger = logging.getLogger(__name__) - - -base_url = 'http://www.worldcat.org' - -def get_ids(key, value): - ids = [] - if key == 'isbn': - url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value) - html = read_url(url).decode('utf-8') - matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html) - if matches: - info = lookup(matches[0]) - ids.append(('oclc', matches[0])) - for v in info.get('isbn', []): - if v != value: - ids.append(('isbn', v)) - elif key == 'oclc': - info = lookup(value) - if 'isbn' in info: - for value in info['isbn']: - ids.append(('isbn', value)) - if ids: - logger.debug('get_ids %s %s => %s', key, value, ids) - return ids - -def lookup(id): - data = { - 'oclc': [id] - } - url = '%s/oclc/%s' % (base_url, id) - html = read_url(url).decode('utf-8') - doc = lxml.html.document_fromstring(html) - for e in doc.xpath("//*[contains(@id, 'bibtip')]"): - key = e.attrib['id'].replace('bibtip_', '') - value = e.text_content().strip() - if value: - data[key] = value - info = doc.xpath('//textarea[@id="util-em-note"]') - if info: - info = info[0].text - info = dict([i.split(':', 1) for i in info.split('\n\n')[1].split('\n')]) - for key in info: - k = key.lower() - value = info[key].strip() - if value: - data[k] = value - for key in ('id', 'instance', 'mediatype', 'reclist', 'shorttitle'): - if key in data: - del data[key] - if 'isxn' in data: - for isbn in data.pop('isxn').split(' '): - isbn = normalize_isbn(isbn) - if stdnum.isbn.is_valid(isbn): - if not 'isbn' in data: - data['isbn'] = [] - if isbn not in data['isbn']: - data['isbn'].append(isbn) - cover = doc.xpath('//img[@class="cover"]') - if cover: - data['cover'] = cover[0].attrib['src'] - if data['cover'].startswith('//'): - data['cover'] = 'http:' + data['cover'] - cdata = read_url(data['cover']) - if hashlib.sha1(cdata).hexdigest() in ( - 'd2e9ab0c87193d69a7d3a3c21ae4aa550f7dcf00', - '70f16d3e077cdd47ef6b331001dbb1963677fa04' - ): - del data['cover'] - - if 'author' in data: - data['author'] = data['author'].split('; ') - if 'title' in data: - data['title'] = data['title'].replace(' : ', ': ') - if 'publisher' in data: - m = re.compile('(.+) : (.+), (\d{4})').findall(data['publisher']) - if m: - place, publisher, date = m[0] - data['publisher'] = publisher - data['date'] = date - data['place'] = [place] - elif ':' in data['publisher']: - place, publisher = data['publisher'].split(':', 1) - data['place'] = [place.strip()] - data['publisher'] = publisher.split(',')[0].strip() - m = re.compile('\d{4}').findall(publisher) - if m: - data['date'] = m[0] - - if 'place' in data: - if data['place'][0].startswith('['): - data['place'] = [data['place'][0][1:]] - if data['place'][0].endswith(']'): - data['place'] = [data['place'][0][:-1]] - logger.debug('lookup %s => %s', id, list(data.keys())) - return data - -info = lookup - -def find(title, author, year): - return [] -