remove unused scrapers

This commit is contained in:
j 2016-04-02 16:00:36 +02:00
parent 47647a7b86
commit a172e7b4b7
8 changed files with 0 additions and 1951 deletions

View file

@ -5,11 +5,6 @@
import stdnum.isbn import stdnum.isbn
import ox import ox
from . import abebooks
from . import loc
from . import lookupbyisbn
from . import openlibrary
from . import worldcat
from . import google from . import google
from . import duckduckgo from . import duckduckgo

View file

@ -1,50 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from ox.cache import read_url
import lxml.html
import logging
logger = logging.getLogger(__name__)
base = 'http://www.abebooks.com'
def get_ids(key, value):
ids = []
if key == 'isbn':
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, value)
data = read_url(url, unicode=True)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
if urls:
ids.append((key, value))
if ids:
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id):
logger.debug('lookup %s', id)
data = {}
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
html = read_url(url, unicode=True)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
keys = {
'pubdate': 'date'
}
if urls:
details = '%s%s' % (base, urls[0])
html = read_url(details, unicode=True)
doc = lxml.html.document_fromstring(html)
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
key = e.attrib['id'].replace('biblio-', '')
value = e.text_content().strip()
k = keys.get(key, key)
if k == 'date' and value == 'Publication Date:':
value = ''
elif k == 'publisher' and value == 'Publisher:':
value = ''
if value and key not in ('bookcondition', 'binding', 'edition-amz'):
data[k] = value
return data

View file

@ -1,962 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
def get_classification(id):
name = '%s' % id
base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()])
if base != '0':
base = base.lstrip('0')
if base in DEWEY:
name = '%s %s' % (name, DEWEY[base])
return name
DEWEY = {
"0": "Computer science, information & general works",
"1": "Philosophy & psychology",
"10": "Philosophy",
"100": "Philosophy, parapsychology and occultism, psychology",
"101": "Theory of philosophy",
"102": "Miscellany of philosophy",
"103": "Dictionaries, encyclopedias, concordances of philosophy",
"105": "Serial publications",
"106": "Organizations and management of philosophy",
"107": "Education, research, related topics of philosophy",
"108": "Groups of people",
"109": "Historical and collected persons treatment of philosophy",
"11": "Metaphysics",
"110": "Metaphysics",
"111": "Ontology",
"113": "Cosmology (Philosophy of nature)",
"114": "Space",
"115": "Time",
"116": "Change",
"117": "Structure",
"118": "Force and energy",
"119": "Number and quantity",
"12": "Epistemology",
"120": "Epistemology, causation & humankind",
"121": "Epistemology (Theory of knowledge)",
"122": "Causation",
"123": "Determinism and indeterminism",
"124": "Teleology",
"126": "The self",
"127": "The unconscious and the subconscious",
"128": "Humankind",
"129": "Origin and destiny of individual souls",
"13": "Parapsychology & occultism",
"130": "Parapsychology and occultism",
"131": "Parapsychological and occult techniques for achieving well-being, happiness, success",
"133": "Specific topics in parapsychology & occultism",
"135": "Dreams and mysteries",
"137": "Divinatory graphology",
"138": "Physiognomy",
"139": "Phrenology",
"14": "Philosophical schools of thought",
"140": "Specific philosophical schools",
"141": "Idealism & related systems",
"142": "Critical philosophy",
"143": "Bergsonism and intuitionism",
"144": "Humanism and related systems and doctrines",
"145": "Sensationalism",
"146": "Naturalism and related systems and doctrines",
"147": "Pantheism and related systems and doctrines",
"148": "Dogmatism, eclecticism, liberalism, syncretism, traditionalism",
"149": "Other philosophical systems",
"15": "Psychology",
"150": "Psychology",
"152": "Sensory perception, movement, emotions, physiological drives",
"153": "Conscious mental processes and intelligence",
"154": "Subconscious and altered states and processes",
"155": "Differential and developmental psychology",
"156": "Comparative psychology",
"158": "Applied psychology",
"16": "Philosophical logic",
"160": "Logic",
"161": "Induction",
"162": "Deduction",
"165": "Fallacies and sources of error",
"166": "Syllogisms",
"167": "Hypotheses",
"168": "Argument and persuasion",
"169": "Analogy",
"17": "Ethics",
"170": "Ethics",
"171": "Ethical systems",
"172": "Political ethics",
"173": "Ethics of family relationships",
"174": "Occupational ethics",
"175": "Ethics of recreation, leisure, public performances, communication",
"176": "Ethics of sex and reproduction",
"177": "Ethics of social relations",
"178": "Ethics of consumption",
"179": "Other ethical norms",
"18": "Ancient, medieval & eastern philosophy",
"180": "Ancient, medieval, eastern philosophy",
"181": "Eastern philosophy",
"182": "Pre-Socratic Greek philosophies",
"183": "Sophistic, Socratic, related Greek philosophies",
"184": "Platonic philosophy",
"185": "Aristotelian philosophy",
"186": "Skeptic and Neoplatonic philosophies",
"187": "Epicurean philosophy",
"188": "Stoic philosophy",
"189": "Medieval western philosophy",
"19": "Modern western philosophy",
"190": "Modern western and other noneastern philosophy",
"191": "United States and Canada",
"192": "Philosophy of British Isles",
"193": "Philosophy of Germany and Austria",
"194": "Philosophy of France",
"195": "Philosophy of Italy",
"196": "Philosophy of Spain and Portugal",
"197": "Philosophy of Russia",
"198": "Philosophy of Scandinavia and Finland",
"199": "Philosophy in other geographic areas",
"2": "Religion",
"20": "Religion",
"200": "Religion",
"201": "Religious mythology, general classes of religion, interreligious relations and attitudes, social theology",
"202": "Doctrines",
"203": "Public worship and other practices",
"204": "Religious experience, life, practice",
"205": "Religious ethics",
"206": "Leaders & organization",
"207": "Missions & religious education",
"208": "Sources",
"209": "Sects and reform movements",
"21": "Philosophy & theory of religion",
"210": "Philosophy & theory of religion",
"211": "Concepts of God",
"212": "Existence of God, ways of knowing God, attributes of God",
"213": "Creation",
"214": "Theodicy",
"215": "Science and religion",
"218": "Humankind",
"22": "The Bible",
"220": "Bible",
"221": "Old Testament (Tanakh)",
"222": "Historical books of Old Testament",
"223": "Poetic books of Old Testament",
"224": "Prophetic books of Old Testament",
"225": "New Testament",
"226": "Gospels and Acts",
"227": "Epistles",
"228": "Revelation (Apocalypse)",
"229": "Apocrypha & pseudepigrapha",
"23": "Christianity",
"230": "Christianity    Christian theology",
"231": "God",
"232": "Jesus Christ and his family",
"233": "Humankind",
"234": "Salvation and grace",
"235": "Spiritual beings",
"236": "Eschatology",
"238": "Creeds, confessions of faith, covenants, catechisms",
"239": "Apologetics and polemics",
"24": "Christian practice & observance",
"240": "Christian moral & devotional theology",
"241": "Christian ethics",
"242": "Devotional literature",
"243": "Evangelistic writings for individuals and families",
"246": "Use of art in Christianity",
"247": "Church furnishings and related articles",
"248": "Christian experience, practice, life",
"249": "Christian observances in family life",
"25": "Christian pastoral practice & religious orders",
"250": "Local Christian church and Christian religious orders",
"251": "Preaching (Homiletics)",
"252": "Texts of sermons",
"253": "Pastoral office and work (Pastoral theology)",
"254": "Parish administration",
"255": "Religious congregations & orders",
"259": "Pastoral care of specific kinds of persons",
"26": "Christian organization, social work & worship",
"260": "Christian social and ecclesiastical theology",
"261": "Social theology and interreligious relations and attitudes",
"262": "Ecclesiology",
"263": "Days, times & places of observance",
"264": "Public worship",
"265": "Sacraments, other rites and acts",
"266": "Missions",
"267": "Associations for religious work",
"268": "Religious education",
"269": "Spiritual renewal",
"27": "History of Christianity",
"270": "History of Christianity & Christian church",
"271": "Religious congregations and orders in church history",
"272": "Persecutions in general church history",
"273": "Doctrinal controversies and heresies in general church history",
"274": "Christianity in Europe",
"275": "History of Christianity in Asia",
"276": "Christianity in Africa",
"277": "Christianity in North America",
"278": "Christianity in South America",
"279": "Christianity in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica",
"28": "Christian denominations",
"280": "Denominations and sects of Christian church",
"281": "Early church and Eastern churches",
"282": "Roman Catholic Church",
"283": "Anglican churches",
"284": "Protestant denominations of Continental origin and related bodies",
"285": "Presbyterian churches, Reformed churches centered in America, Congregational churches, Puritanism",
"286": "Baptist, Restoration movement, Adventist churches",
"287": "Methodist churches; churches related to Methodism",
"289": "Other denominations & sects",
"29": "Other religions",
"290": "Other religions",
"292": "Classical religion (Greek and Roman religion)",
"293": "Germanic religion",
"294": "Religions of Indic origin",
"295": "Zoroastrianism (Mazdaism, Parseeism)",
"296": "Judaism",
"297": "Islam, Babism, Bahai Faith",
"299": "Religions not provided for elsewhere",
"3": "Social sciences",
"30": "Social sciences, sociology & anthropology",
"300": "Social sciences",
"301": "Sociology and anthropology",
"302": "Social interaction",
"303": "Social processes",
"304": "Factors affecting social behavior",
"305": "Groups of people",
"306": "Culture and institutions",
"307": "Communities",
"31": "Statistics",
"310": "Collections of general statistics",
"314": "General statistics of Europe",
"315": "General statistics of Asia",
"316": "General statistics of Africa",
"317": "General statistics of North America",
"318": "General statistics of South America",
"319": "General statistics of other parts of the world    Of Pacific Ocean islands",
"32": "Political science",
"320": "Political science (Politics and government)",
"321": "Systems of governments and states",
"322": "Relation of state to organized groups",
"323": "Civil and political rights",
"324": "The political process",
"325": "International migration and colonization",
"326": "Slavery and emancipation",
"327": "International relations",
"328": "The legislative process",
"33": "Economics",
"330": "Economics",
"331": "Labor economics",
"332": "Financial economics",
"333": "Economics of land and energy",
"334": "Cooperatives",
"335": "Socialism and related systems",
"336": "Public finance",
"337": "International economics",
"338": "Production",
"339": "Macroeconomics and related topics",
"34": "Law",
"340": "Law",
"341": "Law of nations",
"342": "Constitutional and administrative law",
"343": "Military, defense, public property, public finance, tax, commerce (trade), industrial law",
"344": "Labor, social, education & cultural law",
"345": "Criminal law",
"346": "Private law",
"347": "Procedure and courts",
"348": "Laws, regulations, cases",
"349": "Law of specific jurisdictions, areas, socioeconomic regions, regional intergovernmental organizations",
"35": "Public administration & military science",
"350": "Public administration and military science",
"351": "Public administration",
"352": "General considerations of public administration",
"353": "Specific fields of public administration",
"354": "Public administration of economy and environment",
"355": "Military science",
"356": "Foot forces and warfare",
"357": "Mounted forces & warfare",
"358": "Air and other specialized forces and warfare; engineering and related services",
"359": "Sea forces and warfare",
"36": "Social problems & social services",
"360": "Social problems & social services",
"361": "Social problems & social welfare in general",
"362": "Social welfare problems and services",
"363": "Other social problems and services",
"364": "Criminology",
"365": "Penal and related institutions",
"366": "Secret associations and societies",
"367": "General clubs",
"368": "Insurance",
"369": "Miscellaneous kinds of associations",
"37": "Education",
"370": "Education",
"371": "Schools and their activities; special education",
"372": "Primary education (Elementary education)",
"373": "Secondary education",
"374": "Adult education",
"375": "Curricula",
"378": "Higher education (Tertiary education)",
"379": "Public policy issues in education",
"38": "Commerce, communications & transportation",
"380": "Commerce, communications, transportation",
"381": "Commerce (Trade)",
"382": "International commerce (Foreign trade)",
"383": "Postal communication",
"384": "Communications",
"385": "Railroad transportation",
"386": "Inland waterway & ferry transportation",
"387": "Water, air & space transportation",
"388": "Transportation",
"389": "Metrology and standardization",
"39": "Customs, etiquette & folklore",
"390": "Customs, etiquette, folklore",
"391": "Costume and personal appearance",
"392": "Customs of life cycle and domestic life",
"393": "Death customs",
"394": "General customs",
"395": "Etiquette (Manners)",
"398": "Folklore",
"399": "Customs of war and diplomacy",
"4": "Language",
"40": "Language",
"400": "Language",
"401": "Philosophy and theory; international languages",
"402": "Miscellany",
"403": "Dictionaries, encyclopedias, concordances",
"404": "Special topics of language",
"405": "Serial publications",
"406": "Organizations and management",
"407": "Education, research & related topics",
"408": "Groups of people",
"409": "Geographic treatment and biography",
"41": "Linguistics",
"410": "Linguistics",
"411": "Writing systems",
"412": "Etymology of standard forms of languages",
"413": "Dictionaries of standard forms of languages",
"414": "Phonology & phonetics",
"415": "Grammar of standard forms of languages",
"417": "Dialectology and historical linguistics",
"418": "Standard usage (Prescriptive linguistics)",
"419": "Sign languages",
"42": "English & Old English languages",
"420": "English & Old English languages",
"421": "Writing system, phonology, phonetics of standard English",
"422": "Etymology of standard English",
"423": "Dictionaries of standard English",
"425": "Grammar of standard English",
"427": "Historical and geographic variations, modern nongeographic variations of English",
"428": "Standard English usage (Prescriptive linguistics)",
"429": "Old English (Anglo-Saxon)",
"43": "German & related languages",
"430": "German & related languages",
"431": "German writing systems & phonology",
"432": "Etymology of standard German",
"433": "Dictionaries of standard German",
"435": "Grammar of standard German",
"437": "Historical and geographic variations, modern nongeographic variations of German",
"438": "Standard German usage",
"439": "Other Germanic languages",
"44": "French & related languages",
"440": "Romance languages    French",
"441": "Writing systems, phonology, phonetics of standard French",
"442": "Etymology of standard French",
"443": "Dictionaries of standard French",
"445": "Grammar of standard French",
"447": "Historical and geographic variations, modern nongeographic variations of French",
"448": "Standard French usage (Prescriptive linguistics)",
"449": "Occitan, Catalan, Franco-Provençal",
"45": "Italian, Romanian & related languages",
"450": "Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican",
"451": "Writing systems, phonology, phonetics of standard Italian",
"452": "Etymology of standard Italian",
"453": "Dictionaries of standard Italian",
"455": "Grammar of standard Italian",
"457": "Historical and geographic variations, modern nongeographic variations of Italian",
"458": "Standard Italian usage",
"459": "Sardinian",
"46": "Spanish, Portuguese, Galician",
"460": "Spanish, Portuguese, Galician",
"461": "Writing systems, phonology, phonetics of standard Spanish",
"462": "Etymology of standard Spanish",
"463": "Dictionaries of standard Spanish",
"465": "Grammar of standard Spanish",
"467": "Historical and geographic variations, modern nongeographic variations of Spanish",
"468": "Standard Spanish usage",
"469": "Portuguese",
"47": "Latin & Italic languages",
"470": "Italic languages    Latin",
"471": "Writing systems, phonology, phonetics of classical Latin",
"472": "Classical Latin etymology",
"473": "Dictionaries of classical Latin",
"475": "Grammar of classical Latin",
"477": "Old, postclassical & Vulgar Latin",
"478": "Classical Latin usage (Prescriptive linguistics)",
"479": "Other Italic languages",
"48": "Classical & modern Greek languages",
"480": "Classical Greek and related Hellenic languages",
"481": "Writing systems, phonology, phonetics of classical Greek",
"482": "Etymology of classical Greek",
"483": "Dictionaries of classical Greek",
"485": "Grammar of classical Greek",
"487": "Preclassical and postclassical Greek",
"488": "Classical Greek usage (Prescriptive linguistics)",
"489": "Other Hellenic languages",
"49": "Other languages",
"490": "Other languages",
"491": "East Indo-European and Celtic languages",
"492": "Afro-Asiatic languages",
"493": "Non-Semitic Afro-Asiatic languages",
"494": "Altaic, Uralic, Hyperborean, Dravidian languages, miscellaneous languages of south Asia",
"495": "Languages of east and southeast Asia",
"496": "African languages",
"497": "North American native languages",
"498": "South American native languages",
"499": "Austronesian & other languages",
"5": "Science",
"50": "Science",
"500": "Science",
"501": "Philosophy & theory",
"502": "Miscellany",
"503": "Dictionaries, encyclopedias, concordances",
"505": "Serial publications",
"506": "Organizations and management",
"507": "Education, research, related topics",
"508": "Natural history",
"509": "Historical, geographic & persons treatment",
"51": "Mathematics",
"510": "Mathematics",
"511": "General principles of mathematics",
"512": "Algebra",
"513": "Arithmetic",
"514": "Topology",
"515": "Analysis",
"516": "Geometry",
"518": "Numerical analysis",
"519": "Probabilities and applied mathematics",
"52": "Astronomy",
"520": "Astronomy and allied sciences",
"521": "Celestial mechanics",
"522": "Techniques, procedures, apparatus, equipment, materials",
"523": "Specific celestial bodies and phenomena",
"525": "Earth (Astronomical geography)",
"526": "Mathematical geography",
"527": "Celestial navigation",
"528": "Ephemerides",
"529": "Chronology",
"53": "Physics",
"530": "Physics",
"531": "Classical mechanics",
"532": "Fluid mechanics; liquid mechanics",
"533": "Pneumatics (Gas mechanics)",
"534": "Sound and related vibrations",
"535": "Light and infrared and ultraviolet phenomena",
"536": "Heat",
"537": "Electricity & electronics",
"538": "Magnetism",
"539": "Modern physics",
"54": "Chemistry",
"540": "Chemistry and allied sciences",
"541": "Physical chemistry",
"542": "Techniques, equipment & materials",
"543": "Analytical chemistry",
"546": "Inorganic chemistry",
"547": "Organic chemistry",
"548": "Crystallography",
"549": "Mineralogy",
"55": "Earth sciences & geology",
"550": "Earth sciences",
"551": "Geology, hydrology, meteorology",
"552": "Petrology",
"553": "Economic geology",
"554": "Earth sciences of Europe",
"555": "Earth sciences of Asia",
"556": "Earth sciences of Africa",
"557": "Earth sciences of North America",
"558": "Earth sciences of South America",
"559": "Earth sciences of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
"56": "Fossils & prehistoric life",
"560": "Paleontology",
"561": "Paleobotany; fossil microorganisms",
"562": "Fossil invertebrates",
"563": "Miscellaneous fossil marine and seashore invertebrates",
"564": "Fossil Mollusca and Molluscoidea",
"565": "Fossil Arthropoda",
"566": "Fossil Chordata",
"567": "Fossil cold-blooded vertebrates",
"568": "Fossil birds",
"569": "Fossil mammals",
"57": "Biology",
"570": "Life sciences    Biology",
"571": "Physiology and related subjects",
"572": "Biochemistry",
"573": "Specific physiological systems in animals, regional histology and physiology in animals",
"575": "Specific parts of and physiological systems in plants",
"576": "Genetics and evolution",
"577": "Ecology",
"578": "Natural history of organisms and related subjects",
"579": "Microorganisms, fungi, algae",
"58": "Plants (Botany)",
"580": "Plants",
"581": "Specific topics in natural history of plants",
"582": "Plants noted for specific vegetative characteristics and flowers",
"583": "Dicotyledons",
"584": "Monocotyledons",
"585": "Pinophyta (Gymnosperms)",
"586": "Seedless plants",
"587": "Vascular seedless plants",
"588": "Bryophyta",
"59": "Animals (Zoology)",
"590": "Animals",
"591": "Specific topics in natural history",
"592": "Invertebrates",
"593": "Miscellaneous marine and seashore invertebrates",
"594": "Mollusks & molluscoids",
"595": "Arthropoda",
"596": "Chordata",
"597": "Cold-blooded vertebrates",
"598": "Aves (Birds)",
"599": "Mammalia (Mammals)",
"6": "Technology",
"60": "Technology",
"600": "Technology",
"601": "Philosophy and theory",
"602": "Miscellany",
"603": "Dictionaries & encyclopedias",
"604": "Technical drawing, hazardous materials technology; groups of people",
"605": "Serial publications",
"606": "Organizations",
"607": "Education, research, related topics",
"608": "Patents",
"609": "Historical, geographic, persons treatment",
"61": "Medicine & health",
"610": "Medicine and health",
"611": "Human anatomy, cytology, histology",
"612": "Human physiology",
"613": "Personal health and safety",
"614": "Forensic medicine; incidence of injuries, wounds, disease; public preventive medicine",
"615": "Pharmacology and therapeutics",
"616": "Diseases",
"617": "Surgery, regional medicine, dentistry, ophthalmology, otology, audiology",
"618": "Other branches of medicine    Gynecology and obstetrics",
"62": "Engineering",
"620": "Engineering and allied operations",
"621": "Applied physics",
"622": "Mining and related operations",
"623": "Military and nautical engineering",
"624": "Civil engineering",
"625": "Engineering of railroads & roads",
"627": "Hydraulic engineering",
"628": "Sanitary engineering",
"629": "Other branches of engineering",
"63": "Agriculture",
"630": "Agriculture and related technologies",
"631": "Specific techniques; apparatus, equipment, materials",
"632": "Plant injuries, diseases, pests",
"633": "Field and plantation crops",
"634": "Orchards, fruits, forestry",
"635": "Garden crops (Horticulture)",
"636": "Animal husbandry",
"637": "Processing dairy & related products",
"638": "Insect culture",
"639": "Hunting, fishing, conservation, related technologies",
"64": "Home & family management",
"640": "Home and family management",
"641": "Food & drink",
"642": "Meals and table service",
"643": "Housing and household equipment",
"644": "Household utilities",
"645": "Household furnishings",
"646": "Sewing, clothing, management of personal and family life",
"647": "Management of public households (Institutional housekeeping)",
"648": "Housekeeping",
"649": "Child rearing; home care of people with disabilities and illnesses",
"65": "Management & public relations",
"650": "Management and auxiliary services",
"651": "Office services",
"652": "Processes of written communication",
"653": "Shorthand",
"657": "Accounting",
"658": "General management",
"659": "Advertising and public relations",
"66": "Chemical engineering",
"660": "Chemical engineering and related technologies",
"661": "Technology of industrial chemicals",
"662": "Technology of explosives, fuels, related products",
"663": "Beverage technology",
"664": "Food technology",
"665": "Technology of industrial oils, fats, waxes, gases",
"666": "Ceramic and allied technologies",
"667": "Cleaning, color, coating, related technologies",
"668": "Technology of other organic products",
"669": "Metallurgy",
"67": "Manufacturing",
"670": "Manufacturing",
"671": "Metalworking processes and primary metal products",
"672": "Iron, steel, other iron alloys",
"673": "Nonferrous metals",
"674": "Lumber processing, wood products, cork",
"675": "Leather and fur processing",
"676": "Pulp and paper technology",
"677": "Textiles",
"678": "Elastomers and elastomer products",
"679": "Other products of specific materials",
"68": "Manufacture for specific uses",
"680": "Manufacture of products for specific uses",
"681": "Precision instruments and other devices",
"682": "Small forge work (Blacksmithing)",
"683": "Hardware and household appliances",
"684": "Furnishings and home workshops",
"685": "Leather and fur goods, and related products",
"686": "Printing and related activities",
"687": "Clothing and accessories",
"688": "Other final products & packaging",
"69": "Construction of buildings",
"690": "Buildings",
"691": "Building materials",
"692": "Auxiliary construction practices",
"693": "Construction in specific types of materials and for specific purposes",
"694": "Wood construction",
"695": "Roof covering",
"696": "Utilities",
"697": "Heating, ventilating & air-conditioning",
"698": "Detail finishing",
"7": "Arts & recreation",
"70": "Arts",
"700": "Arts",
"701": "Philosophy and theory of fine and decorative arts",
"702": "Miscellany of fine and decorative arts",
"703": "Dictionaries, encyclopedias, concordances of fine and decorative arts",
"704": "Special topics in fine and decorative arts",
"705": "Serial publications of fine and decorative arts",
"706": "Organizations and management of fine and decorative arts",
"707": "Education, research, related topics of fine and decorative arts",
"708": "Galleries, museums, private collections of fine and decorative arts",
"709": "Historical, geographic & persons treatment",
"71": "Area planning & landscape architecture",
"710": "Area planning and landscape architecture",
"711": "Area planning (Civic art)",
"712": "Landscape architecture (Landscape design)",
"713": "Landscape architecture of trafficways",
"714": "Water features in landscape architecture",
"715": "Woody plants in landscape architecture",
"716": "Herbaceous plants in landscape architecture",
"717": "Structures in landscape architecture",
"718": "Landscape design of cemeteries",
"719": "Natural landscapes",
"72": "Architecture",
"720": "Architecture",
"721": "Architectural materials and structural elements",
"722": "Architecture from earliest times to ca. 300",
"723": "Architecture from ca. 300 to 1399",
"724": "Architecture from 1400",
"725": "Public structures",
"726": "Buildings for religious purposes",
"727": "Buildings for educational and research purposes",
"728": "Residential and related buildings",
"729": "Design and decoration of structures and accessories",
"73": "Sculpture, ceramics & metalwork",
"730": "Plastic arts    Sculpture",
"731": "Processes, forms & subjects of sculpture",
"732": "Sculpture from earliest times to ca. 500, sculpture of nonliterate peoples",
"733": "Greek, Etruscan, Roman sculpture",
"734": "Sculpture from ca. 500 to 1399",
"735": "Sculpture from 1400",
"736": "Carving and carvings",
"737": "Numismatics and sigillography",
"738": "Ceramic arts",
"739": "Art metalwork",
"74": "Graphic arts & decorative arts",
"740": "Graphic arts",
"741": "Drawing and drawings",
"742": "Perspective in drawing",
"743": "Drawing and drawings by subject",
"745": "Decorative arts",
"746": "Textile arts",
"747": "Interior decoration",
"748": "Glass",
"749": "Furniture and accessories",
"75": "Painting",
"750": "Painting and paintings",
"751": "Techniques, procedures, apparatus, equipment, materials, forms",
"752": "Color",
"753": "Symbolism, allegory, mythology, legend",
"754": "Genre paintings",
"755": "Religion",
"757": "Human figures",
"758": "Nature, architectural subjects and cityscapes, other specific subjects",
"759": "History, geographic treatment, biography",
"76": "Printmaking & prints",
"760": "Printmaking and prints",
"761": "Relief processes (Block printing)",
"763": "Lithographic processes (Planographic processes)",
"764": "Chromolithography and serigraphy",
"765": "Metal engraving",
"766": "Mezzotinting, aquatinting, related processes",
"767": "Etching and drypoint",
"769": "Prints",
"77": "Photography, computer art, film, video",
"770": "Photography, computer art, cinematography, videography",
"771": "Techniques, procedures, apparatus, equipment, materials",
"772": "Metallic salt processes",
"773": "Pigment processes of printing",
"774": "Holography",
"775": "Digital photography",
"776": "Computer art (Digital art)",
"777": "Cinematography and videography",
"778": "Specific fields and special kinds of photography",
"779": "Photographs",
"78": "Music",
"780": "Music",
"781": "General principles & musical forms",
"782": "Vocal music",
"783": "Music for single voices",
"784": "Instruments & instrumental ensembles",
"785": "Ensembles with only one instrument per part",
"786": "Keyboard, mechanical, electrophonic, percussion instruments",
"787": "Stringed instruments (Chordophones)",
"788": "Wind instruments (Aerophones)",
"79": "Sports, games & entertainment",
"790": "Recreational and performing arts",
"791": "Public performances",
"792": "Stage presentations",
"793": "Indoor games and amusements",
"794": "Indoor games of skill",
"795": "Games of chance",
"796": "Athletic and outdoor sports and games",
"797": "Aquatic & air sports",
"798": "Equestrian sports and animal racing",
"799": "Fishing, hunting, shooting",
"8": "Literature",
"80": "Literature, rhetoric & criticism",
"800": "Literature (Belles-lettres) and rhetoric",
"801": "Philosophy and theory",
"802": "Miscellany",
"803": "Dictionaries, encyclopedias, concordances",
"805": "Serial publications",
"806": "Organizations and management",
"807": "Education, research, related topics",
"808": "Rhetoric and collections of literary texts from more than two literatures",
"809": "History, description, critical appraisal of more than two literatures",
"81": "American literature in English",
"810": "American literature in English",
"811": "American poetry in English",
"812": "American drama in English",
"813": "American fiction in English",
"814": "American essays in English",
"815": "American speeches in English",
"816": "American letters in English",
"817": "American humor and satire in English",
"818": "American miscellaneous writings",
"82": "English & Old English literatures",
"820": "English and Old English (Anglo-Saxon) literatures",
"821": "English poetry",
"822": "English drama",
"823": "English fiction",
"824": "English essays",
"825": "English speeches",
"826": "English letters",
"827": "English humor and satire",
"828": "English miscellaneous writings",
"829": "Old English (Anglo-Saxon) literature",
"83": "German & related literatures",
"830": "Literatures of Germanic languages    German literature",
"831": "German poetry",
"832": "German drama",
"833": "German fiction",
"834": "German essays",
"835": "German speeches",
"836": "German letters",
"837": "German humor & satire",
"838": "German miscellaneous writings",
"839": "Other Germanic literatures",
"84": "French & related literatures",
"840": "French literature and literatures of related Romance languages",
"841": "French poetry",
"842": "French drama",
"843": "French fiction",
"844": "French essays",
"845": "French speeches",
"846": "French letters",
"847": "French humor & satire",
"848": "French miscellaneous writings",
"849": "Occitan, Catalan, Franco-Provençal literatures",
"85": "Italian, Romanian & related literatures",
"850": "Literatures of Italian, Dalmatian, Romanian, Rhaetian, Sardinian, Corsican languages",
"851": "Italian poetry",
"852": "Italian drama",
"853": "Italian fiction",
"854": "Italian essays",
"855": "Italian speeches",
"856": "Italian letters",
"857": "Italian humor and satire",
"858": "Italian miscellaneous writings",
"859": "Literatures of Romanian, Rhaetian, Sardinian, Corsican languages",
"86": "Spanish, Portuguese, Galician literatures",
"860": "Spanish & Portuguese literatures",
"861": "Spanish poetry",
"862": "Spanish drama",
"863": "Spanish fiction",
"864": "Spanish essays",
"865": "Spanish speeches",
"866": "Spanish letters",
"867": "Spanish humor and satire",
"868": "Spanish miscellaneous writings",
"869": "Literatures of Portuguese and Galician languages",
"87": "Latin & Italic literatures",
"870": "Latin & Italic literatures",
"871": "Latin poetry",
"872": "Latin dramatic poetry and drama",
"873": "Latin epic poetry and fiction",
"874": "Latin lyric poetry",
"875": "Latin speeches",
"876": "Latin letters",
"877": "Latin humor and satire",
"878": "Latin miscellaneous writings",
"879": "Literatures of other Italic languages",
"88": "Classical & modern Greek literatures",
"880": "Literatures of Hellenic languages    Classical Greek literature",
"881": "Classical Greek poetry",
"882": "Classical Greek dramatic poetry and drama",
"883": "Classical Greek epic poetry and fiction",
"884": "Classical Greek lyric poetry",
"885": "Classical Greek speeches",
"886": "Classical Greek letters",
"887": "Classical Greek humor and satire",
"888": "Classical Greek miscellaneous writings",
"889": "Modern Greek literature",
"89": "Other literatures",
"890": "Literatures of other specific languages and language families",
"891": "East Indo-European and Celtic literatures",
"892": "Afro-Asiatic literatures",
"893": "Non-Semitic Afro-Asiatic literatures",
"894": "Literatures of Altaic, Uralic, Hyperborean, Dravidian languages; literatures of miscellaneous languages of south Asia",
"895": "Literatures of East and Southeast Asia",
"896": "African literatures",
"897": "North American native literatures",
"898": "Literatures of South American native languages",
"899": "Literatures of non-Austronesian languages of Oceania, of Austronesian languages, of miscellaneous languages",
"9": "History & geography",
"90": "History",
"900": "History, geography, and auxiliary disciplines",
"901": "Philosophy and theory of history",
"902": "Miscellany",
"903": "Dictionaries, encyclopedias, concordances of history",
"904": "Collected accounts of events",
"905": "Serial publications of history",
"906": "Organizations and management of history",
"907": "Education, research & related topics",
"908": "History with respect to groups of people",
"909": "World history",
"91": "Geography & travel",
"910": "Geography and travel",
"911": "Historical geography",
"912": "Graphic representations of surface of earth and of extraterrestrial worlds",
"913": "Geography of and travel in ancient world",
"914": "Geography of and travel in Europe",
"915": "Geography of and travel in Asia",
"916": "Geography of and travel in Africa",
"917": "Geography of and travel in North America",
"918": "Geography of & travel in South America",
"919": "Geography of and travel in Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica and on extraterrestrial worlds",
"92": "Biography & genealogy",
"920": "Biography, genealogy, insignia",
"929": "Genealogy, names, insignia",
"93": "History of ancient world (to ca. 499)",
"930": "History of ancient world to ca. 499",
"931": "China to 420",
"932": "Egypt to 640",
"933": "Palestine to 70",
"934": "South Asia to 647",
"935": "Mesopotamia to 637 and Iranian Plateau to 637",
"936": "Europe north and west of Italian Peninsula to ca. 499",
"937": "Italian Peninsula to 476 and adjacent territories to 476",
"938": "Greece to 323",
"939": "Other parts of ancient world to ca. 640",
"94": "History of Europe",
"940": "History of Europe",
"941": "British Isles",
"942": "England and Wales",
"943": "Germany and neighboring central European countries",
"944": "France and Monaco",
"945": "Italy, San Marino, Vatican City, Malta",
"946": "Spain, Andorra, Gibraltar, Portugal",
"947": "Russia and neighboring east European countries",
"948": "Scandinavia",
"949": "Other parts of Europe",
"95": "History of Asia",
"950": "History of Asia",
"951": "China and adjacent areas",
"952": "Japan",
"953": "Arabian Peninsula and adjacent areas",
"954": "India and neighboring south Asian countries",
"955": "Iran",
"956": "Middle East (Near East)",
"957": "Siberia (Asiatic Russia)",
"958": "Central Asia",
"959": "Southeast Asia",
"96": "History of Africa",
"960": "History of Africa",
"961": "Tunisia & Libya",
"962": "Egypt, Sudan, South Sudan",
"963": "Ethiopia and Eritrea",
"964": "Northwest African coast & offshore islands",
"965": "Algeria",
"966": "West Africa and offshore islands",
"967": "Central Africa and offshore islands",
"968": "Republic of South Africa and neighboring southern African countries",
"969": "South Indian Ocean islands",
"97": "History of North America",
"970": "History of North America",
"971": "Canada",
"972": "Middle America; Mexico",
"973": "United States",
"974": "Northeastern United States (New England and Middle Atlantic states)",
"975": "Southeastern United States (South Atlantic states)",
"976": "South central United States    Gulf Coast states",
"977": "North central United States",
"978": "Western United States",
"979": "Great Basin and Pacific Slope region of United States",
"98": "History of South America",
"980": "History of South America",
"981": "Brazil",
"982": "Argentina",
"983": "Chile",
"984": "Bolivia",
"985": "Peru",
"986": "Colombia and Ecuador",
"987": "Venezuela",
"988": "Guiana",
"989": "Paraguay and Uruguay",
"99": "History of other areas",
"990": "History of Australasia, Pacific Ocean islands, Atlantic Ocean islands, Arctic islands, Antarctica, extraterrestrial worlds",
"993": "New Zealand",
"994": "Australia",
"995": "New Guinea and neighboring countries of Melanesia",
"996": "Other parts of Pacific    Polynesia",
"997": "Atlantic Ocean islands",
"998": "Arctic islands and Antarctica",
"999": "Extraterrestrial worlds"
}
if __name__ == '__main__':
import json
import re
from ox.cache import read_url
dewey = {}
for i in range(0, 1000):
url = 'http://dewey.info/class/%s/about.en.json' % i
print(url)
data = json.loads(read_url(url).decode('utf-8'))
for d in list(data.values()):
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
dewey[str(i)] = value
break
data = json.dumps(dewey, indent=4, ensure_ascii=False, sort_keys=True).encode('utf-8')
with open(__file__) as f:
pydata = f.read()
pydata = re.sub(
re.compile('\nDEWEY = {.*?}\n\n', re.DOTALL),
'\nDEWEY = %s\n\n' % data, pydata)
with open(__file__, 'w') as f:
f.write(pydata)

View file

@ -1,102 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from ox.cache import read_url
import ox
import re
import xml.etree.ElementTree as ET
from .dewey import get_classification
from .marc_countries import COUNTRIES
from .utils import normalize_isbn
import logging
logger = logging.getLogger(__name__)
def get_ids(key, value):
ids = []
if key == 'isbn':
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
html = ox.cache.read_url(url).decode('utf-8', 'ignore')
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
if match:
ids.append(('lccn', match.group(1)))
elif key == 'lccn':
info = lookup(value)
for key in ('oclc', 'isbn'):
if key in info:
for value in info[key]:
ids.append((key, value))
if ids:
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id):
logger.debug('lookup %s', id)
ns = '{http://www.loc.gov/mods/v3}'
url = 'http://lccn.loc.gov/%s/mods' % id
info = {
'lccn': [id]
}
try:
data = read_url(url).decode('utf-8')
mods = ET.fromstring(data)
except:
try:
data = read_url(url, timeout=0).decode('utf-8')
mods = ET.fromstring(data)
except:
logger.debug('lookup for %s url: %s failed', id, url, exc_info=True)
return info
title = mods.findall(ns + 'titleInfo')
if not title:
return {}
info['title'] = ''.join([': ' + e.text.strip() if e.tag == ns + 'subTitle' else ' ' + e.text.strip() for e in title[0]]).strip()
origin = mods.findall(ns + 'originInfo')
if origin:
info['place'] = []
for place in origin[0].findall(ns + 'place'):
terms = place.findall(ns + 'placeTerm')
if terms and terms[0].attrib['type'] == 'text':
e = terms[0]
info['place'].append(e.text)
elif terms and terms[0].attrib['type'] == 'code':
e = terms[0]
info['country'] = COUNTRIES.get(e.text, e.text)
publisher = [e.text for e in origin[0].findall(ns + 'publisher')]
if publisher:
info['publisher'] = publisher[0]
info['date'] = ''.join([e.text
for e in origin[0].findall(ns + 'dateIssued') if e.attrib.get('encoding') == 'marc'])
for i in mods.findall(ns + 'identifier'):
key = i.attrib['type']
value = i.text
if key in ('oclc', 'lccn', 'isbn'):
if i.attrib['type'] == 'oclc':
value = value.replace('ocn', '').replace('ocm', '')
if i.attrib['type'] == 'isbn':
value = normalize_isbn(i.text)
if not key in info:
info[key] = []
if value not in info[key]:
info[key].append(value)
for i in mods.findall(ns + 'classification'):
if i.attrib['authority'] == 'ddc':
info['classification'] = get_classification(i.text.split('/')[0])
info['author'] = []
for a in mods.findall(ns + 'name'):
if a.attrib.get('usage') == 'primary':
info['author'].append(' '.join([e.text for e in a.findall(ns + 'namePart') if not e.attrib.get('type') in ('date', )]))
info['author'] = [ox.normalize_name(a) for a in info['author']]
toc = mods.findall(ns + 'tableOfContents')
if toc:
info['description'] = toc[0].text.strip()
for key in list(info.keys()):
if not info[key]:
del info[key]
return info
info = lookup

View file

@ -1,97 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from ox.cache import read_url
from ox import find_re, strip_tags, decode_html
import stdnum.isbn
from .utils import find_isbns
import logging
logger = logging.getLogger(__name__)
base = 'http://www.lookupbyisbn.com'
def get_ids(key, value):
ids = []
def add_other_isbn(v):
if len(v) == 10:
ids.append(('isbn', stdnum.isbn.to_isbn13(v)))
if len(v) == 13 and v.startswith('978'):
ids.append(('isbn', stdnum.isbn.to_isbn10(v)))
if key in ('isbn', 'asin'):
url = '%s/Search/Book/%s/1' % (base, value)
data = read_url(url).decode('utf-8')
m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data)
if m:
asin = m[0].split('/')[-3]
if stdnum.isbn.to_isbn10(asin) or not stdnum.isbn.is_valid(asin):
ids.append(('asin', asin))
if key == 'isbn':
add_other_isbn(value)
if key == 'asin':
if stdnum.isbn.is_valid(value):
ids.append(('isbn', value))
add_other_isbn(value)
else:
for isbn in amazon_lookup(value):
if stdnum.isbn.is_valid(isbn):
ids.append(('isbn', isbn))
add_other_isbn(isbn)
if ids:
logger.debug('get_ids %s, %s => %s', key, value, ids)
return ids
def lookup(id):
logger.debug('lookup %s', id)
r = {
'asin': [id]
}
url = '%s/Lookup/Book/%s/%s/1' % (base, id, id)
logger.debug('%s', url)
data = read_url(url).decode('utf-8')
r["title"] = find_re(data, "<h2>(.*?)</h2>")
if r["title"] == 'Error!':
return {}
keys = {
'author': 'Author(s)',
'publisher': 'Publisher',
'date': 'Publication date',
'edition': 'Edition',
'binding': 'Binding',
'volume': 'Volume(s)',
'pages': 'Pages',
}
for key in keys:
r[key] = find_re(data, '<span class="title">%s:</span>(.*?)</li>'% re.escape(keys[key]))
if r[key] == '--' or not r[key]:
del r[key]
if key == 'pages' and key in r:
r[key] = int(r[key])
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
r['description'] = decode_html(strip_tags(desc))
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
for key in r:
if isinstance(r[key], str):
r[key] = decode_html(strip_tags(r[key])).strip()
if 'author' in r and isinstance(r['author'], str) and r['author']:
r['author'] = [r['author']]
else:
r['author'] = []
if not r['author'] or r['author'][0].isupper():
del r['author']
if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
r['description'] = ''
return r
def amazon_lookup(asin):
url = 'http://www.amazon.com/dp/%s' % asin
html = read_url(url, timeout=-1).decode('utf-8', 'ignore')
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))

View file

@ -1,409 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
COUNTRIES = {
"gw": "Germany",
"gv": "Guinea",
"gu": "Guam",
"gt": "Guatemala",
"gs": "Georgia (Republic)",
"gr": "Greece",
"-ge": "Germany (East)",
"gp": "Guadeloupe",
"mnu": "Minnesota",
"gy": "Guyana",
"gd": "Grenada",
"gb": "Kiribati",
"go": "Gabon",
"gm": "Gambia",
"alu": "Alabama",
"gi": "Gibraltar",
"gh": "Ghana",
"tz": "Tanzania",
"tv": "Tuvalu",
"tu": "Turkey",
"tr": "Trinidad and Tobago",
"ts": "United Arab Emirates",
"to": "Tonga",
"tl": "Tokelau",
"tk": "Turkmenistan",
"th": "Thailand",
"ti": "Tunisia",
"tg": "Togo",
"tc": "Turks and Caicos Islands",
"ta": "Tajikistan",
"-gn": "Gilbert and Ellice Islands",
"-us": "United States",
"-ajr": "Azerbaijan S.S.R.",
"-iu": "Israel-Syria Demilitarized Zones",
"-iw": "Israel-Jordan Demilitarized Zones",
"za": "Zambia",
"nbu": "Nebraska",
"scu": "South Carolina",
"bg": "Bangladesh",
"cau": "California",
"abc": "Alberta",
"xoa": "Northern Territory",
"meu": "Maine",
"ctu": "Connecticut",
"my": "Malaysia",
"aku": "Alaska",
"gl": "Greenland",
"-cn": "Canada",
"wiu": "Wisconsin",
"-cz": "Canal Zone",
"txu": "Texas",
"-cs": "Czechoslovakia",
"-cp": "Canton and Enderbury Islands",
"msu": "Mississippi",
"-ln": "Central and Southern Line Islands",
"nkc": "New Brunswick",
"it": "Italy",
"tnu": "Tennessee",
"vp": "Various places",
"mg": "Madagascar",
"mf": "Mauritius",
"mc": "Monaco",
"-ur": "Soviet Union",
"mm": "Malta",
"ml": "Mali",
"mo": "Montenegro",
"flu": "Florida",
"deu": "Delaware",
"mk": "Oman",
"mj": "Montserrat",
"mu": "Mauritania",
"mw": "Malawi",
"mv": "Moldova",
"mq": "Martinique",
"mp": "Mongolia",
"mr": "Morocco",
"-ui": "United Kingdom Misc. Islands",
"mx": "Mexico",
"-uk": "United Kingdom",
"mz": "Mozambique",
"kyu": "Kentucky",
"hiu": "Hawaii",
"enk": "England",
"nyu": "New York (State)",
"fp": "French Polynesia",
"fr": "France",
"fs": "Terres australes et antarctiques fran&ccedil;aises",
"mau": "Massachusetts",
"snc": "Saskatchewan",
"fa": "Faroe Islands",
"fg": "French Guiana",
"lau": "Louisiana",
"fj": "Fiji",
"fk": "Falkland Islands",
"fm": "Micronesia (Federated States)",
"sz": "Switzerland",
"sy": "Syria",
"sx": "Namibia",
"ss": "Western Sahara",
"sr": "Surinam",
"sq": "Swaziland",
"sp": "Spain",
"sw": "Sweden",
"su": "Saudi Arabia",
"st": "Saint-Martin",
"sj": "Sudan",
"si": "Singapore",
"sh": "Spanish North Africa",
"so": "Somalia",
"sn": "Sint Maarten",
"sm": "San Marino",
"sl": "Sierra Leone",
"sc": "Saint-Barth&eacute;lemy",
"sa": "South Africa",
"sg": "Senegal",
"sf": "Sao Tome and Principe",
"se": "Seychelles",
"sd": "South Sudan",
"-unr": "Ukraine",
"-kgr": "Kirghiz S.S.R.",
"le": "Lebanon",
"lb": "Liberia",
"-hk": "Hong Kong",
"lo": "Lesotho",
"lh": "Liechtenstein",
"li": "Lithuania",
"lv": "Latvia",
"lu": "Luxembourg",
"vtu": "Vermont",
"ls": "Laos",
"xc": "Maldives",
"ly": "Libya",
"oku": "Oklahoma",
"ye": "Yemen",
"-tkr": "Turkmen S.S.R.",
"nfc": "Newfoundland and Labrador",
"ft": "Djibouti",
"em": "Timor-Leste",
"eg": "Equatorial Guinea",
"ea": "Eritrea",
"ec": "Ecuador",
"-gsr": "Georgian S.S.R.",
"et": "Ethiopia",
"es": "El Salvador",
"er": "Estonia",
"ru": "Russia (Federation)",
"rw": "Rwanda",
"re": "R&eacute;union",
"rb": "Serbia",
"rm": "Romania",
"rh": "Zimbabwe",
"-err": "Estonia",
"oru": "Oregon",
"quc": "Qu&eacute;bec (Province)",
"ntc": "Northwest Territories",
"wlk": "Wales",
"xj": "Saint Helena",
"xk": "Saint Lucia",
"xh": "Niue",
"xn": "Macedonia",
"xo": "Slovakia",
"xl": "Saint Pierre and Miquelon",
"xm": "Saint Vincent and the Grenadines",
"xb": "Cocos (Keeling) Islands",
"onc": "Ontario",
"xa": "Christmas Island (Indian Ocean)",
"xf": "Midway Islands",
"xd": "Saint Kitts-Nevis",
"xe": "Marshall Islands",
"nhu": "New Hampshire",
"xx": "No place, unknown, or undetermined",
"fi": "Finland",
"xr": "Czech Republic",
"xs": "South Georgia and the South Sandwich Islands",
"xp": "Spratly Island",
"xv": "Slovenia",
"-tt": "Trust Territory of the Pacific Islands",
"iau": "Iowa",
"ncu": "North Carolina",
"stk": "Scotland",
"xra": "South Australia",
"miu": "Michigan",
"kg": "Kyrgyzstan",
"ke": "Kenya",
"ko": "Korea (South)",
"kn": "Korea (North)",
"kv": "Kosovo",
"ku": "Kuwait",
"kz": "Kazakhstan",
"-pt": "Portuguese Timor",
"ksu": "Kansas",
"dm": "Benin",
"dk": "Denmark",
"-ys": "Yemen (People's Democratic Republic)",
"-yu": "Serbia and Montenegro",
"-bwr": "Byelorussian S.S.R.",
"dr": "Dominican Republic",
"dq": "Dominica",
"qa": "Qatar",
"aru": "Arkansas",
"nuc": "Nunavut",
"wf": "Wallis and Futuna",
"wk": "Wake Island",
"wj": "West Bank of the Jordan River",
"jm": "Jamaica",
"vra": "Victoria",
"jo": "Jordan",
"ws": "Samoa",
"ji": "Johnston Atoll",
"-na": "Netherlands Antilles",
"ja": "Japan",
"cou": "Colorado",
"-wb": "West Berlin",
"ilu": "Illinois",
"-nm": "Northern Mariana Islands",
"ck": "Colombia",
"cj": "Cayman Islands",
"ci": "Croatia",
"ch": "China (Republic : 1949- )",
"co": "Cura&ccedil;ao",
"cm": "Cameroon",
"cl": "Chile",
"-rur": "Russian S.F.S.R.",
"cb": "Cambodia",
"ca": "Caribbean Netherlands",
"cg": "Congo (Democratic Republic)",
"cf": "Congo (Brazzaville)",
"-lir": "Lithuania",
"cd": "Chad",
"cy": "Cyprus",
"cx": "Central African Republic",
"cr": "Costa Rica",
"cq": "Comoros",
"cw": "Cook Islands",
"cv": "Cape Verde",
"cu": "Cuba",
"pr": "Puerto Rico",
"pp": "Papua New Guinea",
"pw": "Palau",
"py": "Paraguay",
"pc": "Pitcairn Island",
"pf": "Paracel Islands",
"pg": "Guinea-Bissau",
"pe": "Peru",
"pk": "Pakistan",
"ph": "Philippines",
"pn": "Panama",
"po": "Portugal",
"pl": "Poland",
"pic": "Prince Edward Island",
"xxu": "United States",
"gau": "Georgia",
"xxc": "Canada",
"xxk": "United Kingdom",
"iy": "Iraq-Saudi Arabia Neutral Zone",
"vb": "British Virgin Islands",
"vc": "Vatican City",
"ve": "Venezuela",
"iq": "Iraq",
"vi": "Virgin Islands of the United States",
"is": "Israel",
"ir": "Iran",
"vm": "Vietnam",
"iv": "C&ocirc;te d'Ivoire",
"ii": "India",
"-ac": "Ashmore and Cartier Islands",
"io": "Indonesia",
"-ai": "Anguilla",
"ic": "Iceland",
"ie": "Ireland",
"pau": "Pennsylvania",
"-jn": "Jan Mayen",
"nik": "Northern Ireland",
"wyu": "Wyoming",
"-air": "Armenian S.S.R.",
"-sv": "Swan Islands",
"-mvr": "Moldavian S.S.R.",
"-sk": "Sikkim",
"riu": "Rhode Island",
"-sb": "Svalbard",
"-xi": "Saint Kitts-Nevis-Anguilla",
"wea": "Western Australia",
"cc": "China",
"nvu": "Nevada",
"mou": "Missouri",
"ce": "Sri Lanka",
"qea": "Queensland",
"-mh": "Macao",
"nju": "New Jersey",
"ykc": "Yukon Territory",
"-vs": "Vietnam, South",
"tma": "Tasmania",
"-vn": "Vietnam, North",
"bd": "Burundi",
"be": "Belgium",
"bf": "Bahamas",
"nmu": "New Mexico",
"ba": "Bahrain",
"bb": "Barbados",
"bl": "Brazil",
"bm": "Bermuda Islands",
"bn": "Bosnia and Hercegovina",
"bo": "Bolivia",
"bh": "Belize",
"bi": "British Indian Ocean Territory",
"bt": "Bhutan",
"bu": "Bulgaria",
"bv": "Bouvet Island",
"bw": "Belarus",
"bp": "Solomon Islands",
"br": "Burma",
"bs": "Botswana",
"dcu": "District of Columbia",
"bx": "Brunei",
"aca": "Australian Capital Territory",
"idu": "Idaho",
"xna": "New South Wales",
"ot": "Mayotte",
"ndu": "North Dakota",
"nsc": "Nova Scotia",
"-kzr": "Kazakh S.S.R.",
"mbc": "Manitoba",
"-lvr": "Latvia",
"-uzr": "Uzbek S.S.R.",
"wau": "Washington (State)",
"vau": "Virginia",
"sdu": "South Dakota",
"gz": "Gaza Strip",
"ht": "Haiti",
"hu": "Hungary",
"ho": "Honduras",
"hm": "Heard and McDonald Islands",
"xga": "Coral Sea Islands Territory",
"uy": "Uruguay",
"uz": "Uzbekistan",
"uv": "Burkina Faso",
"up": "United States Misc. Pacific Islands",
"mtu": "Montana",
"un": "Ukraine",
"utu": "Utah",
"ug": "Uganda",
"ua": "Egypt",
"azu": "Arizona",
"uc": "United States Misc. Caribbean Islands",
"aa": "Albania",
"ae": "Algeria",
"ag": "Argentina",
"af": "Afghanistan",
"ai": "Armenia (Republic)",
"inu": "Indiana",
"uik": "United Kingdom Misc. Islands",
"aj": "Azerbaijan",
"am": "Anguilla",
"ao": "Angola",
"an": "Andorra",
"aq": "Antigua and Barbuda",
"as": "American Samoa",
"au": "Austria",
"at": "Australia",
"aw": "Aruba",
"ay": "Antarctica",
"ohu": "Ohio",
"nl": "New Caledonia",
"-ry": "Ryukyu Islands, Southern",
"nn": "Vanuatu",
"no": "Norway",
"ne": "Netherlands",
"ng": "Niger",
"nx": "Norfolk Island",
"nz": "New Zealand",
"np": "Nepal",
"nq": "Nicaragua",
"nr": "Nigeria",
"mdu": "Maryland",
"nu": "Nauru",
"nw": "Northern Mariana Islands",
"wvu": "West Virginia",
"-xxr": "Soviet Union",
"-tar": "Tajik S.S.R.",
"bcc": "British Columbia"
}
if __name__ == '__main__':
import json
import re
import ox
from ox.cache import read_url
url = "http://www.loc.gov/marc/countries/countries_code.html"
data = read_url(url).decode('utf-8')
countries = dict([
[ox.strip_tags(c) for c in r]
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
])
data = json.dumps(countries, indent=4, ensure_ascii=False).encode('utf-8')
with open(__file__) as f:
pydata = f.read()
pydata = re.sub(
re.compile('\nCOUNTRIES = {.*?}\n\n', re.DOTALL),
'\nCOUNTRIES = %s\n\n' % data, pydata)
with open(__file__, 'w') as f:
f.write(pydata)

View file

@ -1,210 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from datetime import datetime
from urllib.parse import urlencode
import json
from ox.cache import read_url
from .dewey import get_classification
from .marc_countries import COUNTRIES
from .utils import normalize_isbn
import logging
logger = logging.getLogger(__name__)
KEYS = {
'authors': 'author',
'covers': 'cover',
'dewey_decimal_class': 'classification',
'isbn_10': 'isbn',
'isbn_13': 'isbn',
'lccn': 'lccn',
'number_of_pages': 'pages',
'languages': 'language',
'oclc_numbers': 'oclc',
'publish_country': 'country',
'publish_date': 'date',
'publishers': 'publisher',
'publish_places': 'place',
'series': 'series',
'title': 'title',
}
def find(query):
query = query.strip()
logger.debug('find %s', query)
r = api.search(query)
results = []
ids = [b for b in r.get('result', []) if b.startswith('/books')]
books = api.get_many(ids).get('result', [])
for olid, value in books.items():
olid = olid.split('/')[-1]
book = format(value)
book['olid'] = [olid]
book['primaryid'] = ['olid', olid]
results.append(book)
return results
def get_ids(key, value):
ids = []
if key == 'olid':
data = lookup(value)
for id in ('isbn', 'lccn', 'oclc'):
if id in data:
for v in data[id]:
if (id, v) not in ids:
ids.append((id, v))
elif key in ('isbn', 'oclc', 'lccn'):
logger.debug('get_ids %s %s', key, value)
if key == 'isbn':
key = 'isbn_%s'%len(value)
r = api.things({'type': '/type/edition', key: value})
for b in r.get('result', []):
if b.startswith('/books'):
olid = b.split('/')[-1]
for kv in [('olid', olid)] + get_ids('olid', olid):
if kv not in ids:
ids.append(kv)
if ids:
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id, return_all=False):
logger.debug('lookup %s', id)
info = api.get('/books/' + id).get('result', {})
#url = 'https://openlibrary.org/books/%s.json' % id
#info = json.loads(read_url(url).decode('utf-8'))
data = format(info, return_all)
if 'olid' not in data:
data['olid'] = []
if id not in data['olid']:
data['olid'] = [id]
logger.debug('lookup %s => %s', id, list(data.keys()))
return data
def get_type(obj):
type_ = obj.get('type')
if isinstance(type_, dict):
type_ = type_['key']
return type_
def parse_date(s):
#"January 1, 1998"
for pattern, fmt in (('%B %d, %Y', '%Y-%m-%d'), ('%B %Y', '%Y-%m')):
try:
d = datetime.strptime(s, pattern)
s = d.strftime(fmt)
return s
except:
pass
return s
def format(info, return_all=False):
data = {}
if 'works' in info:
work = api.get(info['works'][0]['key'])['result']
else:
work = None
for key in KEYS:
if key in info:
value = info[key]
if key == 'authors':
if work:
value = resolve_names([r['author']
for r in work.get('authors', []) if get_type(r) == '/type/author_role'])
else:
value = resolve_names(value)
elif key == 'publish_country':
value = value.strip()
value = COUNTRIES.get(value, value)
elif key == 'covers':
value = 'https://covers.openlibrary.org/b/id/%s.jpg' % value[0]
elif key == 'languages':
value = resolve_names(value)
elif key in ('isbn_10', 'isbn_13'):
if not isinstance(value, list):
value = [value]
value = list(map(normalize_isbn, value))
if KEYS[key] in data:
value = data[KEYS[key]] + value
elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'):
value = value[0]
if key == 'publish_date':
value = parse_date(value)
if key == 'publish_places':
for i, v in enumerate(value):
if v.startswith('['):
v = v[1:]
if v.endswith(']'):
v = v[:-1]
value[i] = v
data[KEYS[key]] = value
if 'subtitle' in info:
data['title'] += ' ' + info['subtitle']
if 'classification' in data:
value = data['classification']
if isinstance(value, list):
value = value[0]
data['classification'] = get_classification(value.split('/')[0])
return data
def resolve_names(objects, key='name'):
r = []
data = api.get_many([k['key'] for k in objects]).get('result', {})
for k, value in data.items():
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
value = api.get(value['location']).get('result', {})
r.append(value[key])
return r
class API(object):
base = 'https://openlibrary.org/api'
def _request(self, action, data, timeout=None):
for key in data:
if not isinstance(data[key], str):
data[key] = json.dumps(data[key])
url = self.base + '/' + action + '?' + urlencode(data)
if timeout is None:
r = read_url(url).decode('utf-8')
if '504 Gateway Time-out' in r:
r = read_url(url, timeout=-1).decode('utf-8')
result = json.loads(r)
else:
r = read_url(url, timeout).decode('utf-8')
if '504 Gateway Time-out' in r:
r = read_url(url, timeout=-1).decode('utf-8')
result = json.loads(r)
if 'status' in result and result['status'] == 'error' or 'error' in result:
logger.info('FAILED %s %s', action, data)
logger.info('URL %s', url)
return result
def get(self, key):
data = self._request('get', {'key': key})
return data
def get_many(self, keys):
data = self._request('get_many', {'keys': keys})
return data
def search(self, query):
if isinstance(query, str):
query = {
'query': query
}
data = self._request('search', {'q': query})
if 'status' in data and data['status'] == 'error':
logger.info('FAILED %s', query)
return data
def things(self, query):
data = self._request('things', {'query': query})
return data
api = API()

View file

@ -1,116 +0,0 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import hashlib
from ox.cache import read_url
import lxml.html
import stdnum.isbn
from .utils import normalize_isbn
import logging
logger = logging.getLogger(__name__)
base_url = 'http://www.worldcat.org'
def get_ids(key, value):
ids = []
if key == 'isbn':
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
html = read_url(url).decode('utf-8')
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
if matches:
info = lookup(matches[0])
ids.append(('oclc', matches[0]))
for v in info.get('isbn', []):
if v != value:
ids.append(('isbn', v))
elif key == 'oclc':
info = lookup(value)
if 'isbn' in info:
for value in info['isbn']:
ids.append(('isbn', value))
if ids:
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id):
data = {
'oclc': [id]
}
url = '%s/oclc/%s' % (base_url, id)
html = read_url(url).decode('utf-8')
doc = lxml.html.document_fromstring(html)
for e in doc.xpath("//*[contains(@id, 'bibtip')]"):
key = e.attrib['id'].replace('bibtip_', '')
value = e.text_content().strip()
if value:
data[key] = value
info = doc.xpath('//textarea[@id="util-em-note"]')
if info:
info = info[0].text
info = dict([i.split(':', 1) for i in info.split('\n\n')[1].split('\n')])
for key in info:
k = key.lower()
value = info[key].strip()
if value:
data[k] = value
for key in ('id', 'instance', 'mediatype', 'reclist', 'shorttitle'):
if key in data:
del data[key]
if 'isxn' in data:
for isbn in data.pop('isxn').split(' '):
isbn = normalize_isbn(isbn)
if stdnum.isbn.is_valid(isbn):
if not 'isbn' in data:
data['isbn'] = []
if isbn not in data['isbn']:
data['isbn'].append(isbn)
cover = doc.xpath('//img[@class="cover"]')
if cover:
data['cover'] = cover[0].attrib['src']
if data['cover'].startswith('//'):
data['cover'] = 'http:' + data['cover']
cdata = read_url(data['cover'])
if hashlib.sha1(cdata).hexdigest() in (
'd2e9ab0c87193d69a7d3a3c21ae4aa550f7dcf00',
'70f16d3e077cdd47ef6b331001dbb1963677fa04'
):
del data['cover']
if 'author' in data:
data['author'] = data['author'].split('; ')
if 'title' in data:
data['title'] = data['title'].replace(' : ', ': ')
if 'publisher' in data:
m = re.compile('(.+) : (.+), (\d{4})').findall(data['publisher'])
if m:
place, publisher, date = m[0]
data['publisher'] = publisher
data['date'] = date
data['place'] = [place]
elif ':' in data['publisher']:
place, publisher = data['publisher'].split(':', 1)
data['place'] = [place.strip()]
data['publisher'] = publisher.split(',')[0].strip()
m = re.compile('\d{4}').findall(publisher)
if m:
data['date'] = m[0]
if 'place' in data:
if data['place'][0].startswith('['):
data['place'] = [data['place'][0][1:]]
if data['place'][0].endswith(']'):
data['place'] = [data['place'][0][:-1]]
logger.debug('lookup %s => %s', id, list(data.keys()))
return data
info = lookup
def find(title, author, year):
return []