openmedialibrary/oml/meta/marc_countries.py

410 lines
11 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
COUNTRIES = {
"gw": "Germany",
"gv": "Guinea",
"gu": "Guam",
"gt": "Guatemala",
"gs": "Georgia (Republic)",
"gr": "Greece",
"-ge": "Germany (East)",
"gp": "Guadeloupe",
"mnu": "Minnesota",
"gy": "Guyana",
"gd": "Grenada",
"gb": "Kiribati",
"go": "Gabon",
"gm": "Gambia",
"alu": "Alabama",
"gi": "Gibraltar",
"gh": "Ghana",
"tz": "Tanzania",
"tv": "Tuvalu",
"tu": "Turkey",
"tr": "Trinidad and Tobago",
"ts": "United Arab Emirates",
"to": "Tonga",
"tl": "Tokelau",
"tk": "Turkmenistan",
"th": "Thailand",
"ti": "Tunisia",
"tg": "Togo",
"tc": "Turks and Caicos Islands",
"ta": "Tajikistan",
"-gn": "Gilbert and Ellice Islands",
"-us": "United States",
"-ajr": "Azerbaijan S.S.R.",
"-iu": "Israel-Syria Demilitarized Zones",
"-iw": "Israel-Jordan Demilitarized Zones",
"za": "Zambia",
"nbu": "Nebraska",
"scu": "South Carolina",
"bg": "Bangladesh",
"cau": "California",
"abc": "Alberta",
"xoa": "Northern Territory",
"meu": "Maine",
"ctu": "Connecticut",
"my": "Malaysia",
"aku": "Alaska",
"gl": "Greenland",
"-cn": "Canada",
"wiu": "Wisconsin",
"-cz": "Canal Zone",
"txu": "Texas",
"-cs": "Czechoslovakia",
"-cp": "Canton and Enderbury Islands",
"msu": "Mississippi",
"-ln": "Central and Southern Line Islands",
"nkc": "New Brunswick",
"it": "Italy",
"tnu": "Tennessee",
"vp": "Various places",
"mg": "Madagascar",
"mf": "Mauritius",
"mc": "Monaco",
"-ur": "Soviet Union",
"mm": "Malta",
"ml": "Mali",
"mo": "Montenegro",
"flu": "Florida",
"deu": "Delaware",
"mk": "Oman",
"mj": "Montserrat",
"mu": "Mauritania",
"mw": "Malawi",
"mv": "Moldova",
"mq": "Martinique",
"mp": "Mongolia",
"mr": "Morocco",
"-ui": "United Kingdom Misc. Islands",
"mx": "Mexico",
"-uk": "United Kingdom",
"mz": "Mozambique",
"kyu": "Kentucky",
"hiu": "Hawaii",
"enk": "England",
"nyu": "New York (State)",
"fp": "French Polynesia",
"fr": "France",
"fs": "Terres australes et antarctiques françaises",
"mau": "Massachusetts",
"snc": "Saskatchewan",
"fa": "Faroe Islands",
"fg": "French Guiana",
"lau": "Louisiana",
"fj": "Fiji",
"fk": "Falkland Islands",
"fm": "Micronesia (Federated States)",
"sz": "Switzerland",
"sy": "Syria",
"sx": "Namibia",
"ss": "Western Sahara",
"sr": "Surinam",
"sq": "Swaziland",
"sp": "Spain",
"sw": "Sweden",
"su": "Saudi Arabia",
"st": "Saint-Martin",
"sj": "Sudan",
"si": "Singapore",
"sh": "Spanish North Africa",
"so": "Somalia",
"sn": "Sint Maarten",
"sm": "San Marino",
"sl": "Sierra Leone",
"sc": "Saint-Barthélemy",
"sa": "South Africa",
"sg": "Senegal",
"sf": "Sao Tome and Principe",
"se": "Seychelles",
"sd": "South Sudan",
"-unr": "Ukraine",
"-kgr": "Kirghiz S.S.R.",
"le": "Lebanon",
"lb": "Liberia",
"-hk": "Hong Kong",
"lo": "Lesotho",
"lh": "Liechtenstein",
"li": "Lithuania",
"lv": "Latvia",
"lu": "Luxembourg",
"vtu": "Vermont",
"ls": "Laos",
"xc": "Maldives",
"ly": "Libya",
"oku": "Oklahoma",
"ye": "Yemen",
"-tkr": "Turkmen S.S.R.",
"nfc": "Newfoundland and Labrador",
"ft": "Djibouti",
"em": "Timor-Leste",
"eg": "Equatorial Guinea",
"ea": "Eritrea",
"ec": "Ecuador",
"-gsr": "Georgian S.S.R.",
"et": "Ethiopia",
"es": "El Salvador",
"er": "Estonia",
"ru": "Russia (Federation)",
"rw": "Rwanda",
"re": "Réunion",
"rb": "Serbia",
"rm": "Romania",
"rh": "Zimbabwe",
"-err": "Estonia",
"oru": "Oregon",
"quc": "Québec (Province)",
"ntc": "Northwest Territories",
"wlk": "Wales",
"xj": "Saint Helena",
"xk": "Saint Lucia",
"xh": "Niue",
"xn": "Macedonia",
"xo": "Slovakia",
"xl": "Saint Pierre and Miquelon",
"xm": "Saint Vincent and the Grenadines",
"xb": "Cocos (Keeling) Islands",
"onc": "Ontario",
"xa": "Christmas Island (Indian Ocean)",
"xf": "Midway Islands",
"xd": "Saint Kitts-Nevis",
"xe": "Marshall Islands",
"nhu": "New Hampshire",
"xx": "No place, unknown, or undetermined",
"fi": "Finland",
"xr": "Czech Republic",
"xs": "South Georgia and the South Sandwich Islands",
"xp": "Spratly Island",
"xv": "Slovenia",
"-tt": "Trust Territory of the Pacific Islands",
"iau": "Iowa",
"ncu": "North Carolina",
"stk": "Scotland",
"xra": "South Australia",
"miu": "Michigan",
"kg": "Kyrgyzstan",
"ke": "Kenya",
"ko": "Korea (South)",
"kn": "Korea (North)",
"kv": "Kosovo",
"ku": "Kuwait",
"kz": "Kazakhstan",
"-pt": "Portuguese Timor",
"ksu": "Kansas",
"dm": "Benin",
"dk": "Denmark",
"-ys": "Yemen (People's Democratic Republic)",
"-yu": "Serbia and Montenegro",
"-bwr": "Byelorussian S.S.R.",
"dr": "Dominican Republic",
"dq": "Dominica",
"qa": "Qatar",
"aru": "Arkansas",
"nuc": "Nunavut",
"wf": "Wallis and Futuna",
"wk": "Wake Island",
"wj": "West Bank of the Jordan River",
"jm": "Jamaica",
"vra": "Victoria",
"jo": "Jordan",
"ws": "Samoa",
"ji": "Johnston Atoll",
"-na": "Netherlands Antilles",
"ja": "Japan",
"cou": "Colorado",
"-wb": "West Berlin",
"ilu": "Illinois",
"-nm": "Northern Mariana Islands",
"ck": "Colombia",
"cj": "Cayman Islands",
"ci": "Croatia",
"ch": "China (Republic : 1949- )",
"co": "Curaçao",
"cm": "Cameroon",
"cl": "Chile",
"-rur": "Russian S.F.S.R.",
"cb": "Cambodia",
"ca": "Caribbean Netherlands",
"cg": "Congo (Democratic Republic)",
"cf": "Congo (Brazzaville)",
"-lir": "Lithuania",
"cd": "Chad",
"cy": "Cyprus",
"cx": "Central African Republic",
"cr": "Costa Rica",
"cq": "Comoros",
"cw": "Cook Islands",
"cv": "Cape Verde",
"cu": "Cuba",
"pr": "Puerto Rico",
"pp": "Papua New Guinea",
"pw": "Palau",
"py": "Paraguay",
"pc": "Pitcairn Island",
"pf": "Paracel Islands",
"pg": "Guinea-Bissau",
"pe": "Peru",
"pk": "Pakistan",
"ph": "Philippines",
"pn": "Panama",
"po": "Portugal",
"pl": "Poland",
"pic": "Prince Edward Island",
"xxu": "United States",
"gau": "Georgia",
"xxc": "Canada",
"xxk": "United Kingdom",
"iy": "Iraq-Saudi Arabia Neutral Zone",
"vb": "British Virgin Islands",
"vc": "Vatican City",
"ve": "Venezuela",
"iq": "Iraq",
"vi": "Virgin Islands of the United States",
"is": "Israel",
"ir": "Iran",
"vm": "Vietnam",
"iv": "Côte d'Ivoire",
"ii": "India",
"-ac": "Ashmore and Cartier Islands",
"io": "Indonesia",
"-ai": "Anguilla",
"ic": "Iceland",
"ie": "Ireland",
"pau": "Pennsylvania",
"-jn": "Jan Mayen",
"nik": "Northern Ireland",
"wyu": "Wyoming",
"-air": "Armenian S.S.R.",
"-sv": "Swan Islands",
"-mvr": "Moldavian S.S.R.",
"-sk": "Sikkim",
"riu": "Rhode Island",
"-sb": "Svalbard",
"-xi": "Saint Kitts-Nevis-Anguilla",
"wea": "Western Australia",
"cc": "China",
"nvu": "Nevada",
"mou": "Missouri",
"ce": "Sri Lanka",
"qea": "Queensland",
"-mh": "Macao",
"nju": "New Jersey",
"ykc": "Yukon Territory",
"-vs": "Vietnam, South",
"tma": "Tasmania",
"-vn": "Vietnam, North",
"bd": "Burundi",
"be": "Belgium",
"bf": "Bahamas",
"nmu": "New Mexico",
"ba": "Bahrain",
"bb": "Barbados",
"bl": "Brazil",
"bm": "Bermuda Islands",
"bn": "Bosnia and Hercegovina",
"bo": "Bolivia",
"bh": "Belize",
"bi": "British Indian Ocean Territory",
"bt": "Bhutan",
"bu": "Bulgaria",
"bv": "Bouvet Island",
"bw": "Belarus",
"bp": "Solomon Islands",
"br": "Burma",
"bs": "Botswana",
"dcu": "District of Columbia",
"bx": "Brunei",
"aca": "Australian Capital Territory",
"idu": "Idaho",
"xna": "New South Wales",
"ot": "Mayotte",
"ndu": "North Dakota",
"nsc": "Nova Scotia",
"-kzr": "Kazakh S.S.R.",
"mbc": "Manitoba",
"-lvr": "Latvia",
"-uzr": "Uzbek S.S.R.",
"wau": "Washington (State)",
"vau": "Virginia",
"sdu": "South Dakota",
"gz": "Gaza Strip",
"ht": "Haiti",
"hu": "Hungary",
"ho": "Honduras",
"hm": "Heard and McDonald Islands",
"xga": "Coral Sea Islands Territory",
"uy": "Uruguay",
"uz": "Uzbekistan",
"uv": "Burkina Faso",
"up": "United States Misc. Pacific Islands",
"mtu": "Montana",
"un": "Ukraine",
"utu": "Utah",
"ug": "Uganda",
"ua": "Egypt",
"azu": "Arizona",
"uc": "United States Misc. Caribbean Islands",
"aa": "Albania",
"ae": "Algeria",
"ag": "Argentina",
"af": "Afghanistan",
"ai": "Armenia (Republic)",
"inu": "Indiana",
"uik": "United Kingdom Misc. Islands",
"aj": "Azerbaijan",
"am": "Anguilla",
"ao": "Angola",
"an": "Andorra",
"aq": "Antigua and Barbuda",
"as": "American Samoa",
"au": "Austria",
"at": "Australia",
"aw": "Aruba",
"ay": "Antarctica",
"ohu": "Ohio",
"nl": "New Caledonia",
"-ry": "Ryukyu Islands, Southern",
"nn": "Vanuatu",
"no": "Norway",
"ne": "Netherlands",
"ng": "Niger",
"nx": "Norfolk Island",
"nz": "New Zealand",
"np": "Nepal",
"nq": "Nicaragua",
"nr": "Nigeria",
"mdu": "Maryland",
"nu": "Nauru",
"nw": "Northern Mariana Islands",
"wvu": "West Virginia",
"-xxr": "Soviet Union",
"-tar": "Tajik S.S.R.",
"bcc": "British Columbia"
}
if __name__ == '__main__':
import json
import re
import ox
from ox.cache import read_url
url = "http://www.loc.gov/marc/countries/countries_code.html"
data = read_url(url)
countries = dict([
[ox.strip_tags(c) for c in r]
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
])
data = json.dumps(countries, indent=4, ensure_ascii=False).encode('utf-8')
with open(__file__) as f:
pydata = f.read()
pydata = re.sub(
re.compile('\nCOUNTRIES = {.*?}\n\n', re.DOTALL),
'\nCOUNTRIES = %s\n\n' % data, pydata)
with open(__file__, 'w') as f:
f.write(pydata)