1601 lines
No EOL
59 KiB
Python
1601 lines
No EOL
59 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import division
|
|
import Image
|
|
import json
|
|
import math
|
|
import os
|
|
import ox
|
|
import re
|
|
|
|
"""
|
|
run this, then html/geo.html, then py/fix_json.py
|
|
|
|
311 countries
|
|
-- sovereign, dependent and disputed countries according to
|
|
http://en.wikipedia.org/wiki/List_of_sovereign_states
|
|
-- dissolved countries, according to
|
|
http://en.wikipedia.org/wiki/ISO_3166-3 and http://www.imdb.com/country/
|
|
-- entities with country codes, according to
|
|
http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
|
(like Ascension, Canary Islands, Ceuta and Melilla, Diego Garcia,
|
|
European Union, Metropolitan France, Tristan da Cunha, UK...)
|
|
|
|
writes json/geo.json and json/countries.json,
|
|
both will used by html/geo.html
|
|
|
|
"""
|
|
|
|
MIN_LAT = -math.degrees(math.atan(math.sinh(math.pi)))
|
|
|
|
geo = {
|
|
'code': {
|
|
# missing on wikipedia
|
|
'Neutral Zone': 'NTHH',
|
|
'Réunion': 'RE',
|
|
# not yet assigned, see http://en.wikipedia.org/wiki/List_of_sovereign_states
|
|
'South Sudan': 'SS',
|
|
# disputed, see http://en.wikipedia.org/wiki/List_of_sovereign_states
|
|
'Abkhazia': 'GE-AB',
|
|
'Kosovo': 'RS-KO',
|
|
'Nagorno-Karabakh': 'AZ-NK',
|
|
'Northern Cyprus': 'CY-NC',
|
|
'South Ossetia': 'GE-SO',
|
|
'Somaliland': 'SO-SO',
|
|
'Transnistria': 'MD-TR',
|
|
# dependent, see http://en.wikipedia.org/wiki/List_of_sovereign_states
|
|
'Akrotiri and Dhekelia': 'GB-AD',
|
|
'Ashmore and Cartier Islands': 'AU-AC',
|
|
'Coral Sea Islands': 'AU-CS',
|
|
'Peter I Island': 'NO-PI',
|
|
# dependent, see http://en.wikipedia.org/wiki/ISO_3166-2:GB
|
|
'England': 'GB-ENG',
|
|
'Northern Ireland': 'GB-NIR',
|
|
'Scotland': 'GB-SCT',
|
|
'Wales': 'GB-WLS',
|
|
# antarctic, see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
|
|
'Adélie Land': 'FR-AQ',
|
|
'Antártica': 'CL-AQ',
|
|
'Argentine Antarctica': 'AR-AQ',
|
|
'Australian Antarctic Territory': 'AU-AQ',
|
|
'Ross Dependency': 'NZ-AQ',
|
|
# exceptional, see http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
|
'Canary Islands': 'IC',
|
|
'Ceuta and Melilla': 'EA',
|
|
'UK': 'UK',
|
|
# dissolved, see http://www.imdb.com/country/
|
|
'Korea': 'KOHH',
|
|
'Netherlands Antilles': 'ANHH',
|
|
'Siam': 'SITH',
|
|
'West Germany': 'DEDE'
|
|
},
|
|
'coordinates': {
|
|
# see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
|
|
'Adélie Land': {
|
|
'south': MIN_LAT,
|
|
'west': 136.03333333,
|
|
'north': -60,
|
|
'east': 142.18333333
|
|
},
|
|
'Antarctica': {
|
|
'south': MIN_LAT,
|
|
'west': -179.99999999,
|
|
'north': -60,
|
|
'east': 179.99999999
|
|
},
|
|
'Antártica': {
|
|
'south': MIN_LAT,
|
|
'west': -90,
|
|
'north': -60,
|
|
'east': -53
|
|
},
|
|
'Argentine Antarctica': {
|
|
'south': MIN_LAT,
|
|
'west': -74,
|
|
'north': -60,
|
|
'east': -25
|
|
},
|
|
'Australian Antarctic Territory': {
|
|
'south': MIN_LAT,
|
|
'west': 44.63333333,
|
|
'north': -60,
|
|
'east': 160
|
|
},
|
|
'British Antarctic Territory': {
|
|
'south': MIN_LAT,
|
|
'west': -80,
|
|
'north': -60,
|
|
'east': -20
|
|
},
|
|
'Queen Maud Land': {
|
|
'south': MIN_LAT,
|
|
'west': 20,
|
|
'north': -60,
|
|
'east': 44.63333333
|
|
},
|
|
'Ross Dependency': {
|
|
'south': MIN_LAT,
|
|
'west': 160,
|
|
'north': -60,
|
|
'east': -150
|
|
}
|
|
},
|
|
'dependencies': {
|
|
'Argentina': ['Argentine Antarctica'],
|
|
'Argentina, Australia, Chile, France, New Zealand, Norway, United Kingdom': ['Antarctica'],
|
|
'Australia': [
|
|
'Ashmore and Cartier Islands', 'Australian Antarctic Territory', 'Christmas Island', 'Cocos Islands', 'Coral Sea Islands',
|
|
'Heard Island and McDonald Islands', 'Norfolk Island'
|
|
],
|
|
'Chile': ['Antártica'],
|
|
'China': ['Hong Kong', 'Macau'],
|
|
'Denmark': ['Faroe Islands', 'Greenland'],
|
|
'Finland': ['Åland'],
|
|
'France': [
|
|
'Adélie Land', 'Clipperton Island', 'French Afar and Issas', 'French Guiana', 'French Polynesia',
|
|
'French Southern and Antarctic Territories', 'French Southern Territories', 'Guadeloupe', 'Martinique', 'Mayotte',
|
|
'New Caledonia', 'Réunion', 'Saint Barthélemy', 'Saint Martin', 'Saint Pierre and Miquelon',
|
|
'Wallis and Futuna'
|
|
],
|
|
'France, United Kingdom': ['New Hebrides'],
|
|
'Netherlands': [
|
|
'Aruba', 'Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Netherlands Antilles', 'Sint Maarten'
|
|
],
|
|
'New Zealand': ['Cook Islands', 'Niue', 'Ross Dependency', 'Tokelau'],
|
|
'Norway': ['Bouvet Island', 'Peter I Island', 'Queen Maud Land', 'Svalbard and Jan Mayen'],
|
|
'Spain': ['Canary Islands', 'Ceuta and Melilla'],
|
|
'Soviet Union': ['Byelorussian Soviet Socialist Republic'], # fixme: what about ukraine?
|
|
'United Kingdom': [
|
|
'Akrotiri and Dhekelia', 'Anguilla', 'Ascension Island', 'Bermuda', 'British Antarctic Territory',
|
|
'British Indian Ocean Territory', 'British Virgin Islands', 'Cayman Islands', 'Diego Garcia', 'England',
|
|
'Falkland Islands', 'Gibraltar', 'Gilbert and Ellice Islands', 'Guernsey', 'Isle of Man',
|
|
'Jersey', 'Montserrat', 'Northern Ireland', 'Pitcairn Islands', 'Saint Helena',
|
|
'Scotland', 'South Georgia and the South Sandwich Islands', 'Tristan da Cunha', 'Turks and Caicos Islands', 'Wales'
|
|
],
|
|
'United Kingdom, United States': ['Canton and Enderbury Islands'],
|
|
'United States': [
|
|
'American Samoa', 'Guam', 'Northern Mariana Islands', 'Johnston Island', 'Midway Islands',
|
|
'Pacific Islands', 'Panama Canal Zone', 'Puerto Rico', 'United States Minor Outlying Islands', 'United States Miscellaneous Pacific Islands',
|
|
'United States Virgin Islands', 'Wake Island'
|
|
]
|
|
},
|
|
'disputes': {
|
|
'Azerbaijan': ['Nagorno-Karabakh'],
|
|
'Cyprus': ['Northern Cyprus'],
|
|
'Georgia': ['Abkhazia', 'South Ossetia'],
|
|
'Iraq, Saudi Arabia': ['Neutral Zone'],
|
|
'Moldova': ['Transnistria'],
|
|
'Serbia': ['Kosovo'],
|
|
'Somalia': ['Somaliland']
|
|
},
|
|
'dissolved': {
|
|
# merged
|
|
'Canton and Enderbury Islands': ['Kiribati'],
|
|
'East Germany': ['Germany'],
|
|
'Johnston Island': ['United States Minor Outlying Islands'],
|
|
'Midway Islands': ['United States Minor Outlying Islands'],
|
|
'North Vietnam': ['Vietnam'],
|
|
'Panama Canal Zone': ['Panama'],
|
|
'Sikkim': ['India'],
|
|
'South Yemen': ['Yemen'],
|
|
'United States Miscellaneous Pacific Islands': ['United States Minor Outlying Islands'],
|
|
'Wake Island': ['United States Minor Outlying Islands'],
|
|
'West Germany': ['Germany'],
|
|
# renamed
|
|
'Burma': ['Myanmar'],
|
|
'Byelorussian Soviet Socialist Republic': ['Belarus'],
|
|
'Dahomey': ['Benin'],
|
|
'East Timor': ['Timor-Leste'],
|
|
'French Afar and Issas': ['Djibouti'],
|
|
'New Hebrides': ['Vanuatu'],
|
|
'Rhodesia': ['Zimbabwe'],
|
|
'Siam': ['Thailand'],
|
|
'Upper Volta': ['Burkina Faso'],
|
|
'Yugoslavia': ['Serbia and Montenegro'], # fixme: shape and flag suggest 'split'
|
|
'Zaire': ['Democratic Republic of the Congo'],
|
|
# split
|
|
'Czechoslovakia': ['Czech Republic', 'Slovakia'],
|
|
'French Southern and Antarctic Territories': ['Adélie Land', 'French Southern Territories'],
|
|
'Gilbert and Ellice Islands': ['Kiribati', 'Tuvalu'],
|
|
'Korea': ['North Korea', 'South Korea'],
|
|
'Netherlands Antilles': ['Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Sint Maarten'],
|
|
'Neutral Zone': ['Iraq', 'Saudi Arabia'],
|
|
'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
|
|
'Serbia and Montenegro': ['Serbia', 'Montenegro'],
|
|
'Soviet Union': [
|
|
'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia',
|
|
'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania', 'Moldova',
|
|
'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
|
|
]
|
|
},
|
|
'flag': {
|
|
'Australian Antarctic Territory': 'Flag_of_Australia.svg',
|
|
'Antarctica': 'Flag_of_the_Antarctic_Treaty.svg',
|
|
'Antártica': 'Flag_of_Magallanes,_Chile.svg',
|
|
'Ashmore and Cartier Islands': 'Flag_of_Australia.svg',
|
|
'Bonaire, Saint Eustatius and Saba': 'Flag_of_Bonaire.svg',
|
|
'Burma': 'Flag_of_Myanmar_(1974-2010).svg',
|
|
'Byelorussian Soviet Socialist Republic': 'Flag_of_Byelorussian_SSR.svg',
|
|
'Canton and Enderbury Islands': 'Flag_of_Gilbert_and_Ellice_Islands.svg',
|
|
'Ceuta and Melilla': 'Flag_Ceuta.svg',
|
|
'Coral Sea Islands': 'Flag_of_Australia.svg',
|
|
'Diego Garcia': 'Flag_of_the_British_Indian_Ocean_Territory.svg',
|
|
'French Guiana': 'Flag_of_French_Guiana.svg',
|
|
'Korea': 'Flag_of_Korea_1882.svg',
|
|
# 'Libya': 'Flag_of_the_Libyan_Jamahiriya_1977.svg',
|
|
'Metropolitan France': 'Flag_of_France.svg',
|
|
'Neutral Zone': 'Flag_of_the_United_Nations.svg',
|
|
'New Hebrides': 'Flag_of_Anglo-French_Joint_Naval_Commission.svg',
|
|
'Northern Ireland': 'Ulster_banner.svg',
|
|
'Panama Canal Zone': 'Panama_Canal_Zone_Flag.png',
|
|
'Peter I Island': 'Flag_of_Norway.svg',
|
|
'Réunion': 'Drapeau_Reunion_APDR.png',
|
|
'Saint Martin': 'Flag_of_Saint-Martin_(local).svg',
|
|
'Siam': 'State_Flag_of_Thailand_(1916).svg',
|
|
'Svalbard and Jan Mayen': 'Flag_of_Norway.svg',
|
|
'United States Miscellaneous Pacific Islands': 'Flag_of_the_United_States.svg',
|
|
'Wallis and Futuna': 'Flag_of_Wallis_and_Futuna.svg',
|
|
'Yugoslavia': 'Flag_of_SFR_Yugoslavia.svg'
|
|
},
|
|
'google_name': {
|
|
'Bahamas': 'The Bahamas',
|
|
'Brunei': 'Brunei Darussalam',
|
|
'Cocos Islands': 'Cocos (Keeling) Islands',
|
|
'Gambia': 'The Gambia',
|
|
'Kosovo': 'Kosova (Kosovo)',
|
|
'Macedonia': 'Former Yugoslav Republic of Macedonia',
|
|
'Myanmar': 'Burma',
|
|
'Palestine': 'Palestinian Territories',
|
|
'Republic of the Congo': 'Congo',
|
|
'Sahrawi': 'Western Sahara',
|
|
'United Kingdom': 'UK',
|
|
'United States': 'USA'
|
|
},
|
|
'google_query': {
|
|
'Akrotiri and Dhekelia': ['Akrotiri, Cyprus', 'Dhekelia, Cyprus'],
|
|
'Bonaire, Saint Eustatius and Saba': ['Bonaire', 'Saba, Netherlands Antilles', 'Saint Eustatius'],
|
|
'Byelorussian Soviet Socialist Republic': ['Belarus'],
|
|
'Canton and Enderbury Islands': ['Canton Island', 'Enderbury Island'],
|
|
'Ceuta and Melilla': ['Ceuta', 'Melilla'],
|
|
'Clipperton Island': ['Île de la Passion'],
|
|
'Curaçao': ['Banda Abou, Curaçao'],
|
|
'Czechoslovakia': ['Czech Republic', 'Slovakia'],
|
|
'East Germany': [
|
|
'Mecklenburg Vorpommern', 'Saxony', 'Thuringia'
|
|
],
|
|
'Dahomey': ['Benin'],
|
|
'European Union': ['Europe'],
|
|
'French Afar and Issas': ['Djibouti'],
|
|
# see http://en.wikipedia.org/wiki/French_Southern_and_Antarctic_Lands
|
|
# and http://en.wikipedia.org/wiki/Scattered_Islands_in_the_Indian_Ocean
|
|
'French Southern and Antarctic Territories': ['Adélie Land', 'Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
|
|
'French Southern Territories': ['Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
|
|
'Georgia': ['Georgia, Asia'],
|
|
# see http://en.wikipedia.org/wiki/Gilbert_Islands
|
|
'Gilbert and Ellice Islands': ['Arorae, Kiribati', 'Butaritari, Kiribati', 'Makin, Kiribati', 'Tuvalu'],
|
|
'Jamaica': ['Clarendon Parish, Jamaica', 'St. James Parish, Jamaica', 'St. Thomas Parish, Jamaica', 'Westmoreland Parish, Jamaica'], # in case results are us-biased
|
|
'Johnston Island': ['Johnston Atoll'],
|
|
'Korea': ['North Korea', 'South Korea'],
|
|
'Lebanon': ['Lebanon, Asia'], # in case results are us-biased
|
|
'Metropolitan France': ['France'],
|
|
'Midway Islands': ['Midway Atoll'],
|
|
'Neutral Zone': ['Neutral Zone, Saudi Arabia'],
|
|
'New Hebrides': ['Vanuatu'],
|
|
'North Vietnam': ['Ha Giang, Vietnam', 'Lai Chau, Vietnam', 'Thua Thien-Hue, Vietnam'],
|
|
'Northern Cyprus': ['Karpass, Cyprus', 'Kokkina, Cyprus', 'Lympia, Cyprus'],
|
|
'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
|
|
# 'Palestine': ['71, Israel', 'El-arish Rafah, Egypt'],
|
|
'Panama Canal Zone': ['Amador, Panama', 'Fort Sherman, Panama'],
|
|
'Sahrawi': ['Western Sahara'],
|
|
'Saint Helena': ['Ascension Island', 'Longwood, Saint Helena', 'Tristan da Cunha'], # in case results are us-biased
|
|
'San Marino': ['San Marino, Europe'], # in case results are us-biased
|
|
'Serbia and Montenegro': ['Montenegro', 'Serbia'],
|
|
'Siam': ['Thailand'],
|
|
'Sint Maarten': ['Sint Maarten, Netherlands Antilles'],
|
|
'South Ossetia': ['Shida Kartli'],
|
|
# see http://en.wikipedia.org/wiki/Sudan#States_and_regions
|
|
'South Sudan': [
|
|
'Central Equatoria, Sudan', 'Eastern Equatoria, Sudan', 'Jonglei, Sudan', 'Lakes, Sudan', 'Northern Bahr el Ghazal, Sudan',
|
|
'Upper Nile, Sudan', 'Unity, Sudan', 'Warrap, Sudan', 'Western Bahr el Ghazal, Sudan', 'Western Equatoria, Sudan'
|
|
],
|
|
'South Yemen': ['Al-Mahrah, Yemen', 'Lahij, Yemen'],
|
|
'Soviet Union': [
|
|
'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia, Europe',
|
|
'Latvia', 'Lithuania', 'Kazakhstan', 'Kyrgyzstan', 'Moldova',
|
|
'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
|
|
],
|
|
'United Kingdom': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
|
|
'United States Minor Outlying Islands': ['Midway Islands'],
|
|
# Baker Island, Howland Island, Jarvis Island, Kingman Reef, Palmyra Atoll --
|
|
# but the first three each return the full United States Minor Outlying Islands
|
|
'United States Miscellaneous Pacific Islands': ['Kingman Reef', 'Palmyra Atoll'],
|
|
'UK': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
|
|
'Upper Volta': ['Burkina Faso'],
|
|
'Wake Island': ['Wake Atoll'],
|
|
'Wales': ['Wales, United Kingdom'],
|
|
'West Germany': ['Schleswig Holstein', 'Northrhine Westphalia', 'Bavaria, Germany'],
|
|
'Yugoslavia': [
|
|
'Bosnia and Herzegovina', 'Croatia', 'Serbia', 'Slovenia', 'Macedonia',
|
|
'Montenegro'
|
|
]
|
|
},
|
|
'icon': {
|
|
'AC': 'cross(340, resize, 684)',
|
|
'AD': 'extend()',
|
|
'AE': 'crop(683)',
|
|
'AF': 'extend()',
|
|
'AG': 'extend(left, left)',
|
|
'_AG': 'resize(); copy(580, 90, 1468, 534, 592, 296, 387, 238); copy(580, 0, 1468, 90, 592, 238, 387, 0) # *2/3',
|
|
'AI': 'extend(right, right)',
|
|
'AIDJ': 'extend()',
|
|
'AL': 'crop()',
|
|
'AM': 'crop()',
|
|
'ANHH': 'crop()',
|
|
'AO': 'crop()',
|
|
'AQ': 'crop()',
|
|
'AR': 'crop()',
|
|
'AR-AQ': 'extend(right, left)',
|
|
'AS': 'extend(left, left)',
|
|
'AT': 'crop()',
|
|
'AU': 'extend(right, right)',
|
|
'AU-AC': 'extend(right, right)',
|
|
'AU-AQ': 'extend(right, right)',
|
|
'AU-CS': 'extend(right, right)',
|
|
'AW': 'crop(left)',
|
|
'AX': 'crop(827)',
|
|
'AZ': 'crop()',
|
|
'AZ-NK': 'crop(right)',
|
|
'BA': 'crop(1054)',
|
|
'BB': 'extend()',
|
|
'BD': 'crop(922)',
|
|
'BE': 'extend()',
|
|
'BF': 'crop()',
|
|
'BG': 'crop()',
|
|
'BH': 'crop(666)',
|
|
'_BI': 'crop()',
|
|
'BI': 'resize(); copy(677, 267, 1371, 961, 696, 696, 267, 267, circle)',
|
|
'BJ': 'crop(819)',
|
|
'BL': 'extend()',
|
|
'BM': 'extend(right, right)',
|
|
'BN': 'crop()',
|
|
'BO': 'crop()',
|
|
'BQ': 'crop(left)',
|
|
'BQAQ': 'extend(right, right)',
|
|
'BR': 'extend()',
|
|
'BS': 'cut(0, 0, 342, 0); resize()',
|
|
'BT': 'extend(left, right)',
|
|
'BUMM': 'slice(444)',
|
|
'BV': 'crop(left)',
|
|
'BW': 'crop()',
|
|
'BY': 'crop(left)',
|
|
'BYAA': 'crop(left)',
|
|
'BZ': 'crop()',
|
|
'CA': 'extend()',
|
|
'CC': 'extend()',
|
|
'CD': 'crop(left)',
|
|
'CF': 'crop(); copy(85, 0, 597, 341, 512, 341, 0, 0)',
|
|
'CG': 'resize()',
|
|
'CH': 'pass()',
|
|
'CI': 'extend()',
|
|
'CK': 'extend(right, right)',
|
|
'CL': 'crop(left)',
|
|
'CL-AQ': 'extend()',
|
|
'CM': 'extend()',
|
|
'CN': 'cut(1024, 0, 0, 341); resize(); copy(145, 73, 885, 679, 740, 606, 142, 209)',
|
|
'CO': 'crop()',
|
|
'CP': 'extend()',
|
|
'CR': 'crop(left)',
|
|
'CSHH': 'resize()',
|
|
'CSXX': 'crop()',
|
|
'CTKI': 'extend(right, right)',
|
|
'CU': 'crop(left)',
|
|
'CV': 'crop(768)',
|
|
'CW': 'crop(left)',
|
|
'CX': 'cut(2, 1, 2, 1); extend(right, left)',
|
|
'CY': 'crop()',
|
|
'CY-NC': 'crop(811)',
|
|
'CZ': 'resize()',
|
|
'DDDE': 'crop()',
|
|
'DE': 'crop()',
|
|
'DEDE': 'crop()',
|
|
'DG': 'extend(right, right)',
|
|
'DJ': 'crop(left)',
|
|
'DK': 'crop(left)',
|
|
'DM': 'crop()',
|
|
'DO': 'crop()',
|
|
'DYBJ': 'crop(819)',
|
|
'DZ': 'crop()',
|
|
'EA': 'crop()',
|
|
'EC': 'crop()',
|
|
'EE': 'crop()',
|
|
'EG': 'crop()',
|
|
'EH': 'cut(0, 0, 683, 0); resize(); copy(768, 0, 1280, 1024, 512, 1024, 512, 0)',
|
|
'ER': 'extend(right, right)',
|
|
'ES': 'crop(left)',
|
|
'ET': 'crop()',
|
|
'EU': 'crop()',
|
|
'FI': 'crop(740)',
|
|
'FJ': 'extend(right, right)',
|
|
'FK': 'extend(right, right)',
|
|
'FM': 'crop()',
|
|
'FO': 'crop(left)',
|
|
'FQHH': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
|
|
'FR': 'extend()',
|
|
'FR-AQ': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
|
|
'FXFR': 'extend()',
|
|
'GA': 'crop()',
|
|
'GB': 'cross(340, resize, 684)',
|
|
'GB-AD': 'cross(340, resize, 684)',
|
|
'GB-ENG': 'crop()',
|
|
'GB-NIR': 'crop()',
|
|
'GB-SCT': 'resize()',
|
|
'GB-WLS': 'extend()',
|
|
'GD': 'crop()',
|
|
'_GD': 'border(172); copy(172, 172, 1867, 1057, 885, 885, 172, 172)',
|
|
'GE': 'cross(272, crop)',
|
|
'GE-AB': 'crop(left)',
|
|
'GE-SO': 'crop()',
|
|
'GEHH': 'extend(right, right)',
|
|
'GF': 'crop()',
|
|
'GG': 'crop()',
|
|
'GH': 'crop()',
|
|
'GI': 'crop()',
|
|
'GL': 'crop(797)',
|
|
'GM': 'crop()',
|
|
'GN': 'extend()',
|
|
'GP': 'extend()',
|
|
'GQ': 'cut(0, 0, 1024, 0); resize(); copy(683, 0, 1365, 1365, 682, 1365, 683, 0)',
|
|
'GR': 'crop(left)',
|
|
'GS': 'extend(right, right)',
|
|
'GT': 'extend()',
|
|
'GU': 'border(50)',
|
|
'GW': 'slice(341)',
|
|
'GY': 'resize()',
|
|
'HK': 'crop()',
|
|
'HM': 'extend(right, right)',
|
|
'HN': 'crop()',
|
|
'HR': 'crop()',
|
|
'HT': 'crop()',
|
|
'HU': 'crop()',
|
|
'HVBF': 'crop()',
|
|
'IC': 'extend()',
|
|
'ID': 'crop()',
|
|
'IE': 'extend()',
|
|
'IL': 'crop()',
|
|
'IM': 'crop()',
|
|
'IN': 'crop()',
|
|
'IO': 'extend(right, right)',
|
|
'IQ': 'crop()',
|
|
'IR': 'crop()',
|
|
'IS': 'crop(left)',
|
|
'IT': 'extend()',
|
|
'JE': 'crop()',
|
|
'JM': 'resize()',
|
|
'JO': 'crop(left)',
|
|
'JP': 'crop()',
|
|
'JTUM': 'extend()',
|
|
'KE': 'crop()',
|
|
'KG': 'crop()',
|
|
'KH': 'crop()',
|
|
'KI': 'crop()',
|
|
'KM': 'crop(left)',
|
|
'KN': 'crop()',
|
|
'KOHH': 'extend()',
|
|
'KP': 'crop(683)',
|
|
'KR': 'extend()',
|
|
'KW': 'cut(0, 0, 512, 0); resize()',
|
|
'KY': 'extend(right, right)',
|
|
'KZ': 'crop(left); copy(696, 0, 1526, 1024, 840, 1024, 184, 0)',
|
|
'LA': 'crop()',
|
|
'LB': 'crop()',
|
|
'LC': 'crop()',
|
|
'LI': 'slice(455)',
|
|
'LK': 'cut(649, 0, 0, 0); border(85)',
|
|
'LR': 'crop(left)',
|
|
'LS': 'crop()',
|
|
'LT': 'crop()',
|
|
'LU': 'crop()',
|
|
'LV': 'crop()',
|
|
'LY': 'crop()',
|
|
'MA': 'crop()',
|
|
'MC': 'crop()',
|
|
'MD': 'extend()',
|
|
'MD-TR': 'crop(left)',
|
|
'ME': 'border(51)',
|
|
'MF': 'crop()',
|
|
'MG': 'crop(left)',
|
|
'MH': 'extend()',
|
|
'MIUM': 'extend()',
|
|
'MK': 'cut(308, 0, 308, 0); resize(); copy(842, 330, 1206, 694, 364, 364, 330, 330, circle)',
|
|
'ML': 'extend()',
|
|
'MM': 'crop()',
|
|
'MN': 'extend()',
|
|
'MO': 'crop()',
|
|
'MP': 'crop()',
|
|
'MQ': 'cross(198, crop)',
|
|
'MR': 'crop()',
|
|
'MS': 'extend(right, right)',
|
|
'MT': 'border(480)',
|
|
'MU': 'crop()',
|
|
'MV': 'border(341)',
|
|
'MW': 'crop()',
|
|
'MX': 'extend()',
|
|
'_MY': 'crop(left)',
|
|
'MY': 'crop(right); copy(0, 0, 1, 1, 585, 585, 0, 0); copy(0, 0, 1024, 585, 585, 334, 0, 126)',
|
|
'MZ': 'crop(left)',
|
|
'NA': 'crop(left)',
|
|
'NC': 'crop(681)',
|
|
'NE': 'crop()',
|
|
'NF': 'extend()',
|
|
'NG': 'extend()',
|
|
'NHVU': 'crop()',
|
|
'NI': 'crop()',
|
|
'NL': 'crop()',
|
|
'NO': 'crop(left)',
|
|
'NO-PI': 'crop(left)',
|
|
'NP': 'construct()',
|
|
'NQAQ': 'crop(left)',
|
|
'NR': 'crop(left)',
|
|
'NTHH': 'crop()',
|
|
'NU': 'extend(right, right)',
|
|
'NZ': 'extend(right, right)',
|
|
'NZ-AQ': 'extend(right, right)',
|
|
'OM': 'crop(left)',
|
|
'PA': 'slice()',
|
|
'PCHH': 'crop()',
|
|
'PE': 'extend()',
|
|
'PF': 'crop()',
|
|
'PG': 'extend(right, left)',
|
|
'PH': 'crop(left)',
|
|
'PK': 'extend()',
|
|
'PL': 'crop()',
|
|
'PM': 'cut(558, 0, 0, 0); extend()',
|
|
'PN': 'extend(right, right)',
|
|
'PR': 'crop(left)',
|
|
'PS': 'cut(0, 0, 683, 0); resize()',
|
|
'PT': 'crop(819)',
|
|
'PUUM': 'crop(778)',
|
|
'PW': 'crop(896)',
|
|
'PY': 'crop()',
|
|
'PZPA': 'crop()',
|
|
'QA': 'crop(659)',
|
|
'RE': 'cut(464, 0, 0, 0); resize()',
|
|
'RHZW': 'cut(0, 3, 0, 2); extend()',
|
|
'RO': 'extend()',
|
|
'RS': 'crop(732)',
|
|
'RS-KO': 'crop()',
|
|
'RU': 'crop()',
|
|
'RW': 'crop(right)',
|
|
'SA': 'crop()',
|
|
'SB': 'extend(left, right); copy(103, 972, 206, 1024, 103, 52, 0, 1536); copy(1842, 0, 1945, 52, 103, 52, 1945, 460)',
|
|
'SC': 'resize()',
|
|
'SD': 'cut(0, 0, 683, 0); resize()',
|
|
'SE': 'crop(768)',
|
|
'SG': 'crop(787)',
|
|
'SH': 'extend(right, right)',
|
|
'SI': 'crop(left)',
|
|
'SITH': 'crop()',
|
|
'SJ': 'crop(left)',
|
|
'SK': 'crop(679)',
|
|
'SKIN': 'border(165)',
|
|
'SL': 'crop()',
|
|
'SM': 'crop()',
|
|
'SN': 'extend()',
|
|
'SO': 'crop()',
|
|
'SO-SO': 'crop()',
|
|
'SR': 'crop()',
|
|
'SS': 'crop(left)',
|
|
'ST': 'resize(); copy(832, 320, 1216, 704, 384, 384, 272, 320); copy(1344, 320, 1728, 704, 384, 384, 624, 320)',
|
|
'SUHH': 'crop(right); copy(85, 0, 597, 512, 1024, 1024, 0, 0)',
|
|
'SV': 'crop()',
|
|
'SX': 'crop(left)',
|
|
'SY': 'slice(614)',
|
|
'SZ': 'extend()',
|
|
'TA': 'extend(right, right)',
|
|
'TC': 'extend(right, right)',
|
|
'TD': 'extend()',
|
|
'TF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
|
|
'TG': 'crop(left)',
|
|
'TH': 'crop()',
|
|
'TJ': 'crop()',
|
|
'TK': 'extend()',
|
|
'TL': 'crop(left)',
|
|
'TM': 'crop(left)',
|
|
'TN': 'crop()',
|
|
'TO': 'slice(427)',
|
|
'TPTL': 'crop(left)',
|
|
'TR': 'crop(844)',
|
|
'TT': 'resize()',
|
|
'TV': 'extend(right, right)',
|
|
'TW': 'slice()',
|
|
'TZ': 'resize()',
|
|
'UA': 'crop()',
|
|
'UG': 'crop()',
|
|
'UK': 'cross(340, resize, 684)',
|
|
'UM': 'crop(778)',
|
|
'US': 'crop(778)',
|
|
'UY': 'crop(left)',
|
|
'UZ': 'crop(left)',
|
|
'VA': 'pass()',
|
|
'VC': 'extend()',
|
|
'VDVN': 'crop()',
|
|
'VE': 'crop(); copy(0, 0, 455, 455, 455, 455, 0, 0)',
|
|
'VG': 'extend(right, right)',
|
|
'VI': 'extend()',
|
|
'VN': 'crop()',
|
|
'VU': 'crop(left)',
|
|
'WF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
|
|
'WKUM': 'crop(left)',
|
|
'WS': 'slice()',
|
|
'YDYE': 'crop(left)',
|
|
'YE': 'crop()',
|
|
'YT': 'extend()',
|
|
'YUCS': 'crop()',
|
|
'ZA': 'resize()',
|
|
'ZM': 'cut(598, 0, 0, 0); extend()',
|
|
'ZRCD': 'crop()',
|
|
'ZW': 'crop(left)'
|
|
},
|
|
'imdb_code': {
|
|
'Côte d\'Ivoire': 'ci',
|
|
'Democratic Republic of the Congo': 'cd',
|
|
'Palestine': 'ps',
|
|
'Serbia and Montenegro': 'xfy'
|
|
},
|
|
'imdb_name': {
|
|
'Côte d\'Ivoire': 'Ivory Coast', # bug in some imdb entries
|
|
'Democratic Republic of the Congo': 'Democratic Republic of Congo', # bug in some imdb entries
|
|
'Palestine': 'Occupied Palestinian Territory', # bug in some imdb entries
|
|
'Serbia and Montenegro': 'Federal Republic of Yugoslavia'
|
|
},
|
|
|
|
# import json
|
|
# import re
|
|
# from ox.net import readUrl
|
|
# html = readUrl('http://www.imdb.com/language/')
|
|
# results = re.compile(re.compile('<a href="/language/.*?">(.*?)</a>').findall(html))
|
|
# languages = {}
|
|
# for result in results:
|
|
# languages[result] = ''
|
|
# print json.dumps(languages, sort_keys=True)
|
|
|
|
# http://www.imdb.com/language/
|
|
'languages': {
|
|
'Abkhazian': 'Abkhazia',
|
|
'Aboriginal': 'Australia',
|
|
'Aché': '',
|
|
'Acholi': '',
|
|
'Afrikaans': 'South Africa',
|
|
'Aidoukrou': '',
|
|
'Akan': '',
|
|
'Albanian': 'Albania',
|
|
'Algonquin': '',
|
|
'American': 'United States',
|
|
'Amharic': 'Ethiopia',
|
|
'Apache': '',
|
|
'Arabic': 'Saudi Arabia',
|
|
'Aragonese': '',
|
|
'Aramaic': 'Syria',
|
|
'Arapaho': '',
|
|
'Armenian': 'Armenia',
|
|
'Assamese': 'India',
|
|
'Assyrian Neo-Aramaic': '',
|
|
'Athapascan': '',
|
|
'Australian': 'Australia',
|
|
'Awadhi': '',
|
|
'Aymara': 'Bolivia',
|
|
'Azerbaijani': 'Azerbaijan',
|
|
'Bable': '',
|
|
'Baka': '',
|
|
'Balinese': 'Indonesia',
|
|
'Bambara': 'Mali',
|
|
'Basque': 'Spain',
|
|
'Bassari': '',
|
|
'Belarusian': 'Belarus',
|
|
'Bemba': '',
|
|
'Bengali': 'Bangladesh',
|
|
'Berber': 'Morocco', # ?, one of multiple
|
|
'Bhojpuri': '',
|
|
'Bicolano': 'Philippines',
|
|
'Bodo': '',
|
|
'Bosnian': 'Bosnia and Herzegovina',
|
|
'Brazilian': 'Brazil',
|
|
'Breton': 'France',
|
|
'British': 'United Kingdom',
|
|
'Bulgarian': 'Bulgaria',
|
|
'Burmese': 'Burma',
|
|
'Cantonese': 'China',
|
|
'Catalan': 'Spain',
|
|
'Central Khmer': 'Cambodia',
|
|
'Chaozhou': 'China',
|
|
'Chechen': 'Russia',
|
|
'Cherokee': 'United States',
|
|
'Cheyenne': 'United States',
|
|
'Chhattisgarhi': 'India',
|
|
'Chinese': 'China',
|
|
'Cornish': 'United Kingdom',
|
|
'Corsican': 'France',
|
|
'Cree': 'Canada',
|
|
'Creek': 'United States',
|
|
'Creole': '',
|
|
'Creoles and pidgins': '',
|
|
'Croatian': 'Croatia',
|
|
'Crow': '',
|
|
'Czech': 'Czech Republic',
|
|
'Danish': 'Denmark',
|
|
'Dari': 'Afghanistan',
|
|
'Desiya': '',
|
|
'Dinka': '',
|
|
'Djerma': 'Niger',
|
|
'Dogri': '',
|
|
'Dutch': 'Netherlands',
|
|
'Dyula': '',
|
|
'Dzongkha': '',
|
|
'East-Greenlandic': 'Greenland',
|
|
'Eastern Frisian': 'Germany',
|
|
'Egyptian (Ancient)': 'Egypt',
|
|
'English': 'United Kingdom',
|
|
'Esperanto': '',
|
|
'Estonian': 'Estonia',
|
|
'Ewe': '',
|
|
'Faliasch': '',
|
|
'Faroese': 'Faroe Islands',
|
|
'Filipino': 'Philippines',
|
|
'Finnish': 'Finland',
|
|
'Flemish': 'Belgium',
|
|
'Fon': '',
|
|
'French': 'France',
|
|
'Fulah': '',
|
|
'Fur': '',
|
|
'Gaelic': 'Ireland',
|
|
'Galician': 'Spain',
|
|
'Georgian': 'Georgia',
|
|
'German': 'Germany',
|
|
'Grebo': '',
|
|
'Greek': 'Greece',
|
|
'Greek, Ancient (to 1453)': 'Greece',
|
|
'Greenlandic': 'Greenland',
|
|
'Guarani': 'Paraguay',
|
|
'Gujarati': 'India',
|
|
'Gumatj': '',
|
|
'Gunwinggu': 'Australia',
|
|
'Haitian': 'Haiti',
|
|
'Hakka': 'China',
|
|
'Haryanvi': '',
|
|
'Hassanya': 'Mauritania',
|
|
'Hausa': 'Nigeria',
|
|
'Hawaiian': 'United States',
|
|
'Hebrew': 'Israel',
|
|
'Hindi': 'India',
|
|
'Hmong': '',
|
|
'Hokkien': 'China',
|
|
'Hopi': 'United States',
|
|
'Hungarian': 'Hungary',
|
|
'Iban': '',
|
|
'Ibo': 'Nigeria',
|
|
'Icelandic': 'Iceland',
|
|
'Indian': 'India',
|
|
'Indonesian': 'Indonesia',
|
|
'Inuktitut': '',
|
|
'Inupiaq': '',
|
|
'Irish Gaelic': 'Ireland',
|
|
'Italian': 'Italy',
|
|
'Japanese': 'Japan',
|
|
'Jola-Fonyi': '',
|
|
'Ju\'hoan': '',
|
|
'Kaado': '',
|
|
'Kabuverdianu': 'Cape Verde',
|
|
'Kabyle': '',
|
|
'Kalmyk-Oirat': '',
|
|
'Kannada': 'India',
|
|
'Karajá': '',
|
|
'Karbi': '',
|
|
'Karen': '',
|
|
'Kazakh': 'Kazakhstan',
|
|
'Khanty': 'Russia',
|
|
'Khasi': '',
|
|
'Kikuyu': '',
|
|
'Kinyarwanda': '',
|
|
'Kirundi': '',
|
|
'Klingon': '',
|
|
'Kodava': '',
|
|
'Konkani': 'India',
|
|
'Korean': 'South Korea',
|
|
'Korowai': 'Papua New Guinea',
|
|
'Kriolu': 'Cape Verde',
|
|
'Kru': '',
|
|
'Kudmali': '',
|
|
'Kuna': '',
|
|
'Kurdish': 'Turkey', # ?, one of multiple
|
|
'Kwakiutl': '',
|
|
'Kyrgyz': 'Kyrgyzstan',
|
|
'Ladakhi': 'India',
|
|
'Ladino': '',
|
|
'Lao': 'Laos',
|
|
'Latin': 'Italy',
|
|
'Latvian': 'Latvia',
|
|
'Limbu': '',
|
|
'Lingala': 'Democratic Republic of the Congo',
|
|
'Lithuanian': 'Lithuania',
|
|
'Low German': 'Germany',
|
|
'Luxembourgish': 'Luxemburg',
|
|
'Macedonian': 'Macedonia',
|
|
'Macro-Jê': '',
|
|
'Magahi': '',
|
|
'Maithili': '',
|
|
'Malagasy': 'Madagascar',
|
|
'Malay': 'Malaysia',
|
|
'Malayalam': 'India',
|
|
'Malecite-Passamaquoddy': '',
|
|
'Malinka': 'Guinea',
|
|
'Maltese': 'Malta',
|
|
'Manchu': '',
|
|
'Mandarin': 'China',
|
|
'Mandingo': '',
|
|
'Manipuri': '',
|
|
'Maori': 'New Zealand',
|
|
'Mapudungun': 'Chile',
|
|
'Marathi': 'India',
|
|
'Marshallese': 'Marshall Islands',
|
|
'Masai': '',
|
|
'Masalit': '',
|
|
'Maya': 'Mexico',
|
|
'Mende': 'Sierra Leone',
|
|
'Micmac': '',
|
|
'Middle English': 'England',
|
|
'Min Nan': '',
|
|
'Minangkabau': '',
|
|
'Mirandese': '',
|
|
'Mizo': '',
|
|
'Mohawk': '',
|
|
'Mongolian': 'Mongolia',
|
|
'Montagnais': '',
|
|
'More': 'Burkina Faso',
|
|
'Morisyen': '',
|
|
'Nagpuri': 'India',
|
|
'Nahuatl': '',
|
|
'Nama': '',
|
|
'Navajo': 'United States',
|
|
'Naxi': 'China',
|
|
'Ndebele': '',
|
|
'Neapolitan': 'Italy',
|
|
'Nenets': '',
|
|
'Nepali': 'Nepal',
|
|
'Nisga\'a': 'Canada',
|
|
'None': '',
|
|
'Norse, Old': '',
|
|
'North American Indian': '',
|
|
'Norwegian': 'Norway',
|
|
'Nushi': '',
|
|
'Nyaneka': '',
|
|
'Nyanja': 'Malawi',
|
|
'Occitan': '',
|
|
'Ojibwa': '',
|
|
'Ojihimba': '',
|
|
'Old English': 'England',
|
|
'Oriya': '',
|
|
'Papiamento': '',
|
|
'Parsee': 'Iran',
|
|
'Pashtu': 'Afghanistan',
|
|
'Pawnee': '',
|
|
'Persian': 'Iran',
|
|
'Peul': '',
|
|
'Polish': 'Poland',
|
|
'Polynesian': '',
|
|
'Portuguese': 'Portugal',
|
|
'Pular': '',
|
|
'Punjabi': 'India',
|
|
'Purepecha': '',
|
|
'Quechua': 'Peru',
|
|
'Quenya': '',
|
|
'Rajasthani': 'India',
|
|
'Rawan': '',
|
|
'Romanian': 'Romania',
|
|
'Romansh': 'Switzerland',
|
|
'Romany': 'Romania',
|
|
'Rotuman': '',
|
|
'Russian': 'Russia',
|
|
'Ryukyuan': 'Japan',
|
|
'Saami': 'Finland',
|
|
'Samoan': 'Samoa',
|
|
'Sanskrit': '',
|
|
'Sardinian': 'Italy',
|
|
'Scanian': '',
|
|
'Serbian': 'Serbia',
|
|
'Serbo-Croatian': 'Yugoslavia',
|
|
'Serer': '',
|
|
'Shanghainese': 'China',
|
|
'Shanxi': 'China',
|
|
'Shona': 'Zimbabwe',
|
|
'Shoshoni': 'United States',
|
|
'Sicilian': 'Italy',
|
|
'Sindarin': '',
|
|
'Sindhi': 'Pakistan',
|
|
'Sinhala': 'Sri Lanka',
|
|
'Sioux': 'United States',
|
|
'Slovak': 'Slovakia',
|
|
'Slovenian': 'Slovenia',
|
|
'Somali': 'Somalia',
|
|
'Songhay': '',
|
|
'Soninke': '',
|
|
'Sorbian': 'Germany',
|
|
'Sotho': 'Lesotho',
|
|
'Sousson': '',
|
|
'Spanish': 'Spain',
|
|
'Sranan': '',
|
|
'Swahili': 'Kenya', # ?, one of multiple
|
|
'Swedish': 'Sweden',
|
|
'Swiss German': 'Switzerland',
|
|
'Sylheti': '',
|
|
'Tagalog': 'Philippines',
|
|
'Tajik': 'Tajikistan',
|
|
'Tamashek': 'Algeria', # ?, one of multiple
|
|
'Tamil': 'Sri Lanka',
|
|
'Tarahumara': '',
|
|
'Tatar': 'Russia',
|
|
'Telugu': 'India',
|
|
'Teochew': '',
|
|
'Thai': 'Thailand',
|
|
'Tibetan': 'China',
|
|
'Tigrigna': '',
|
|
'Tlingit': '',
|
|
'Tok Pisin': '',
|
|
'Tonga (Tonga Islands)': 'Tonga',
|
|
'Tsonga': '',
|
|
'Tswa': '',
|
|
'Tswana': '',
|
|
'Tulu': '',
|
|
'Tupi': '',
|
|
'Turkish': 'Turkey',
|
|
'Turkmen': 'Turkmenistan',
|
|
'Tuvinian': '',
|
|
'Tzotzil': '',
|
|
'Ukrainian': 'Ukraine',
|
|
'Ungwatsi': '',
|
|
'Urdu': 'Pakistan',
|
|
'Uzbek': 'Uzbekistan',
|
|
'Vietnamese': 'Vietnam',
|
|
'Visayan': '',
|
|
'Washoe': '',
|
|
'Welsh': 'Wales',
|
|
'Wolof': 'Senegal', # ?, one of multiple
|
|
'Xhosa': 'South Africa',
|
|
'Yakut': '',
|
|
'Yapese': '',
|
|
'Yiddish': 'Israel',
|
|
'Yoruba': 'Nigeria',
|
|
'Zulu': 'South Africa'
|
|
},
|
|
'other': ['European Union', 'Metropolitan France', 'UK'],
|
|
'wikipedia_name': {
|
|
# ambiguous on wikipedia
|
|
'Cocos (Keeling) Islands': 'Cocos Islands',
|
|
'Collectivity of Saint Martin': 'Saint Martin',
|
|
'Federated States of Micronesia': 'Micronesia',
|
|
'French Territory of the Afars and the Issas': 'French Afar and Issas',
|
|
'Georgia (country)': 'Georgia',
|
|
'Nagorno-Karabakh Republic': 'Nagorno-Karabakh',
|
|
'People\'s Republic of China': 'China',
|
|
'Republic of China': 'Taiwan',
|
|
'Republic of Dahomey': 'Dahomey',
|
|
'Republic of Ireland': 'Ireland',
|
|
'Republic of Kosovo': 'Kosovo',
|
|
'Republic of Macedonia': 'Macedonia',
|
|
'Republic of Upper Volta': 'Upper Volta',
|
|
'Sahrawi Arab Democratic Republic': 'Sahrawi',
|
|
'Saudi-Iraqi neutral zone': 'Neutral Zone',
|
|
'State of Palestine': 'Palestine',
|
|
'Trust Territory of the Pacific Islands': 'Pacific Islands'
|
|
},
|
|
'wikipedia_url': {
|
|
# dependencies of guernsey
|
|
'Alderney': '',
|
|
'Herm': '',
|
|
'Sark': '',
|
|
# territory of pakistan
|
|
'Azad_Kashmir': '',
|
|
'Gilgit-Baltistan': '',
|
|
# wrong in http://en.wikipedia.org/wiki/List_of_sovereign_states
|
|
'Coral_Sea_Islands_Territory': 'Coral_Sea_Islands',
|
|
'Kingdom_of_the_Netherlands': 'Netherlands',
|
|
'Saint-Barth%C3%A9lemy': 'Saint_Barth%C3%A9lemy',
|
|
'Saint_Martin': 'Collectivity_of_Saint_Martin',
|
|
# wrong in http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
|
'Caribbean_Netherlands': 'Bonaire,_Saint_Eustatius_and_Saba',
|
|
'Ceuta': 'Ceuta_and_Melilla',
|
|
'Palestinian_territories': 'State_of_Palestine',
|
|
'Saudi%E2%80%93Iraqi_neutral_zone': 'Saudi-Iraqi_neutral_zone',
|
|
'Western_Sahara': 'Sahrawi_Arab_Democratic_Republic',
|
|
# wrong in http://en.wikipedia.org/wiki/ISO_3166-3
|
|
'Johnston_Atoll': 'Johnston_Island',
|
|
'Midway_Atoll': 'Midway_Islands',
|
|
# wrong in all
|
|
'%C3%85land_Islands': 'Åland',
|
|
'East Timor': 'Timor-Leste',
|
|
'Cocos_(Keeling)_Islands': 'Cocos_Islands',
|
|
'French_Southern_and_Antarctic_Lands': 'French_Southern_and_Antarctic_Territories',
|
|
'Saint_Helena,_Ascension_and_Tristan_da_Cunha': 'Saint_Helena',
|
|
'The_Bahamas': 'Bahamas',
|
|
'The_Gambia': 'Gambia'
|
|
},
|
|
'wikipedia_urls': [
|
|
# not in any list
|
|
# antarctic
|
|
'Adélie_Land', 'Antártica', 'Argentine_Antarctica',
|
|
# dependent
|
|
'England', 'Northern_Ireland', 'Scotland', 'Wales',
|
|
# former
|
|
'Korea', 'Siam', 'West_Germany',
|
|
# other
|
|
'East Timor', 'French_Southern_Territories', 'Peter_I_Island', 'South_Sudan', 'UK'
|
|
]
|
|
}
|
|
|
|
def get_cities():
|
|
cities = wikipedia.get_cities()
|
|
return cities
|
|
|
|
def get_countries():
|
|
geonames_countries = geonames.get_countries()
|
|
write_json('../json/geonames.org/countries.json', geonames_countries, True)
|
|
imdb_countries = imdb.get_countries()
|
|
write_json('../json/imdb.com/countries.json', imdb_countries, True)
|
|
wikipedia_countries = wikipedia.get_countries()
|
|
write_json('../json/wikipedia.org/countries.json', wikipedia_countries, True)
|
|
countries = wikipedia_countries
|
|
logs = []
|
|
for country in countries:
|
|
# dependencies
|
|
country['dependencies'] = []
|
|
country['dependency'] = []
|
|
for c, d in geo['dependencies'].iteritems():
|
|
c = c.split(', ')
|
|
if country['name'] in c:
|
|
country['dependencies'] = d
|
|
break
|
|
elif country['name'] in d:
|
|
country['dependency'] = c
|
|
break
|
|
# disputes
|
|
country['disputes'] = []
|
|
country['disputed'] = []
|
|
for c, d in geo['disputes'].iteritems():
|
|
c = c.split(', ')
|
|
if country['name'] in c:
|
|
country['disputes'] = d
|
|
break
|
|
elif country['name'] in d:
|
|
country['disputed'] = c
|
|
break
|
|
# dissolved
|
|
country['dissolved'] = geo['dissolved'][country['name']] if country['name'] in geo['dissolved'] else []
|
|
# google name
|
|
if country['name'] in geo['google_name']:
|
|
country['googleName'] = geo['google_name'][country['name']]
|
|
else:
|
|
country['googleName'] = country['name']
|
|
# other
|
|
country['other'] = country['name'] in geo['other']
|
|
# imdb
|
|
if country['name'] in geo['imdb_name']:
|
|
country['imdbCode'] = geo['imdb_code'][country['name']]
|
|
country['imdbName'] = geo['imdb_name'][country['name']]
|
|
else:
|
|
for imdb_country in imdb_countries:
|
|
if imdb_country['code'].upper() == country['code'] or imdb_country['name'] == country['name']:
|
|
country['imdbCode'] = imdb_country['code']
|
|
country['imdbName'] = imdb_country['name']
|
|
break
|
|
get_country_flag(country['code'], country['flagURL'])
|
|
get_country_icon(country['code'])
|
|
# languages
|
|
country['languages'] = []
|
|
for language, language_country in geo['languages'].iteritems():
|
|
if language_country == country['name']:
|
|
country['languages'].append(language)
|
|
write_json('../json/countries.json', countries)
|
|
return countries
|
|
|
|
def get_country_flag(code, url):
|
|
# max width on wikipedia
|
|
width = 2048
|
|
img = read_url(url)
|
|
png_file = '../png/flags/' + str(width) + '/' + code + '.png'
|
|
if url[-4:] == '.svg':
|
|
svg_file = '../svg/flags/' + code + '.svg'
|
|
update_image = not os.path.exists(svg_file) or read_file(svg_file) != img
|
|
if update_image:
|
|
write_file('../svg/flags/' + code + '.svg', img)
|
|
if not os.path.exists(png_file) or update_image:
|
|
png = read_url(url.replace('/commons/', '/commons/thumb/') + '/' + str(width) + 'px-.png')
|
|
write_file(png_file, png)
|
|
png = Image.open(png_file)
|
|
else:
|
|
update_image = not os.path.exists(png_file) or read_file(png_file) != img
|
|
if update_image:
|
|
write_file(png_file, img)
|
|
png = Image.open(png_file)
|
|
png = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
|
|
write_image(png_file, png)
|
|
else:
|
|
png = Image.open(png_file)
|
|
for width in [256, 32]:
|
|
file = '../png/flags/' + str(width) + '/' + code + '.png'
|
|
if not os.path.exists(file) or update_image:
|
|
png_ = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
|
|
write_image(file, png_)
|
|
|
|
def get_country_icon(code):
|
|
def _border(flag, args):
|
|
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
|
|
left = int((flag.size[0] - flag.size[1]) / 2 + args[0])
|
|
right = left + flag.size[1] - args[0] * 2
|
|
crop_left = flag.crop((0, 0, args[0], flag.size[1]))
|
|
crop_center = flag.crop((left, 0, right, flag.size[1]))
|
|
crop_right = flag.crop((flag.size[0] - args[0], 0, flag.size[0], flag.size[1]))
|
|
icon.paste(crop_left, (0, 0))
|
|
icon.paste(crop_center, (args[0], 0))
|
|
icon.paste(crop_right, (flag.size[1] - args[0], 0))
|
|
return icon
|
|
def _construct(flag):
|
|
# nepal
|
|
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
|
|
# blue
|
|
crop = flag.crop((0, flag.size[1] - 85, 85, flag.size[1]))
|
|
crop = crop.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS)
|
|
icon.paste(crop, (0, 0))
|
|
# red
|
|
crop = flag.crop((86, flag.size[1] - 171, 171, flag.size[1] - 86))
|
|
crop = crop.resize((flag.size[1] - 170, flag.size[1] - 170), Image.ANTIALIAS)
|
|
icon.paste(crop, (85, 85))
|
|
# moon
|
|
crop = flag.crop((147, 753, 147 + 732, 1151))
|
|
icon.paste(crop, (int((flag.size[1] - 732) / 2), 753))
|
|
# sun
|
|
crop = flag.crop((147, 1449, 147 + 732, 1449 + 732))
|
|
mask_ = Image.open('../png/circle.png')
|
|
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
|
|
mask.paste(mask_, (0, 0))
|
|
mask = mask.resize((732, 732), Image.ANTIALIAS)
|
|
icon.paste(crop, (int((flag.size[1] - 732) / 2), 1449), mask)
|
|
return icon
|
|
def _copy(flag, args):
|
|
icon = flag
|
|
flag = Image.open('../png/flags/2048/' + code + '.png')
|
|
if len(args) == 8:
|
|
args.append('rect')
|
|
crop = flag.crop((args[0], args[1], args[2], args[3]))
|
|
crop = crop.resize((args[4], args[5]), Image.ANTIALIAS)
|
|
if args[8] == 'rect':
|
|
icon.paste(crop, (args[6], args[7]))
|
|
elif args[8] == 'circle':
|
|
mask_ = Image.open('../png/circle.png')
|
|
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
|
|
mask.paste(mask_, (0, 0))
|
|
mask = mask.resize((args[4], args[5]), Image.ANTIALIAS)
|
|
icon.paste(crop, (args[6], args[7]), mask)
|
|
return icon
|
|
def _crop(flag, args):
|
|
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
|
|
if len(args) == 0:
|
|
left = int((flag.size[0] - flag.size[1]) / 2)
|
|
elif args[0] == 'left':
|
|
left = 0
|
|
elif args[0] == 'right':
|
|
left = flag.size[0] - flag.size[1]
|
|
else:
|
|
left = int(args[0]) - int(flag.size[1] / 2)
|
|
icon.paste(flag.crop((left, 0, left + flag.size[1], flag.size[1])), (0, 0))
|
|
return icon
|
|
def _cross(flag, args):
|
|
icon = _crop(flag, [])
|
|
if args[1] == 'crop':
|
|
left = int((flag.size[0] - args[0]) / 4 - (flag.size[1] - args[0]) / 4)
|
|
right = left + int((flag.size[1] - args[0]) / 2)
|
|
crop = flag.crop((left, 0, right, flag.size[1]))
|
|
icon.paste(crop, (0, 0))
|
|
crop = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
|
|
icon.paste(crop, (int((flag.size[1] + args[0]) / 2), 0))
|
|
elif args[1] == 'resize':
|
|
width = args[2]
|
|
height = int((flag.size[1] - args[0]) / 2)
|
|
offset = int((flag.size[1] + args[0]) / 2)
|
|
crop = flag.crop((0, 0, width, height)).resize((height, height), Image.ANTIALIAS)
|
|
icon.paste(crop, (0, 0))
|
|
crop = flag.crop((flag.size[0] - width, 0, flag.size[0], height)).resize((height, height), Image.ANTIALIAS)
|
|
icon.paste(crop, (offset, 0))
|
|
crop = flag.crop((0, offset, width, flag.size[1])).resize((height, height), Image.ANTIALIAS)
|
|
icon.paste(crop, (0, offset))
|
|
crop = flag.crop((flag.size[0] - width, offset, flag.size[0], flag.size[1])).resize((height, height), Image.ANTIALIAS)
|
|
icon.paste(crop, (offset, offset))
|
|
return icon
|
|
def _cut(flag, args):
|
|
return flag.crop((args[0], args[1], flag.size[0] - args[2], flag.size[1] - args[3]))
|
|
def _extend(flag, args):
|
|
icon = Image.new('RGBA', (flag.size[0], flag.size[0]))
|
|
top = int((flag.size[0] - flag.size[1]) / 2)
|
|
bottom = int((flag.size[0] - flag.size[1]) / 2 + 0.5)
|
|
icon.paste(flag, (0, top))
|
|
if len(args) == 0:
|
|
crop_top = flag.crop((0, 1, flag.size[0], 2))
|
|
crop_bottom = flag.crop((0, flag.size[1] - 2, flag.size[0], flag.size[1] - 1))
|
|
else:
|
|
if args[0] == 'left':
|
|
pixel = flag.crop((2, 1, 3, 2))
|
|
else:
|
|
pixel = flag.crop((flag.size[0] - 3, 1, flag.size[0] - 2, 2))
|
|
crop_top = Image.new('RGBA', (flag.size[0], 1))
|
|
for x in range(flag.size[0]):
|
|
crop_top.paste(pixel, (x, 0))
|
|
if args[1] == 'left':
|
|
pixel = flag.crop((1, flag.size[1] - 3, 2, flag.size[1] - 2))
|
|
else:
|
|
pixel = flag.crop((flag.size[0] - 3, flag.size[1] - 2, flag.size[0] - 2, flag.size[1] - 1))
|
|
crop_bottom = Image.new('RGBA', (flag.size[0], 1))
|
|
for x in range(flag.size[0]):
|
|
crop_bottom.paste(pixel, (x, 0))
|
|
for y in range(top):
|
|
icon.paste(crop_top, (0, y))
|
|
for y in range(bottom):
|
|
icon.paste(crop_bottom, (0, top + flag.size[1] + y))
|
|
return icon
|
|
def _pass(flag):
|
|
return flag
|
|
def _resize(flag, args):
|
|
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
|
|
icon.paste(flag.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS))
|
|
return icon
|
|
def _slice(flag, args):
|
|
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
|
|
if len(args) == 0:
|
|
args = [int(flag.size[0] / 4)]
|
|
width = int(flag.size[1] / 2)
|
|
left = int(args[0] - width / 2)
|
|
right = int(args[0] + width / 2)
|
|
crop_left = flag.crop((left, 0, right, flag.size[1]))
|
|
crop_right = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
|
|
icon.paste(crop_left, (0, 0))
|
|
icon.paste(crop_right, (width, 0))
|
|
return icon
|
|
file = '../png/icons/1024/' + code + '.png'
|
|
if not os.path.exists(file) or True:
|
|
flag = Image.open('../png/flags/2048/' + code + '.png')
|
|
icon = None
|
|
functions = geo['icon'][code].split(' # ')[0].split('; ')
|
|
if functions[0]: # remove later
|
|
for function in functions:
|
|
pos = function.find('(')
|
|
args = function[pos + 1:-1].split(', ')
|
|
if args[0]:
|
|
args = map(lambda x: int(x) if x[0] in '-0123456789' else x, args)
|
|
else:
|
|
args = []
|
|
function = function[:pos]
|
|
if function == 'border':
|
|
icon = _border(flag, args)
|
|
elif function == 'construct':
|
|
icon = _construct(flag)
|
|
elif function == 'copy':
|
|
icon = _copy(flag, args)
|
|
elif function == 'crop':
|
|
icon = _crop(flag, args)
|
|
elif function == 'cross':
|
|
icon = _cross(flag, args)
|
|
elif function == 'cut':
|
|
icon = _cut(flag, args)
|
|
elif function == 'extend':
|
|
icon = _extend(flag, args)
|
|
elif function == 'pass':
|
|
icon = _pass(flag)
|
|
elif function == 'resize':
|
|
icon = _resize(flag, args)
|
|
elif function == 'slice':
|
|
icon = _slice(flag, args)
|
|
flag = icon
|
|
write_image(file, icon.resize((1024, 1024), Image.ANTIALIAS))
|
|
else:
|
|
icon = Image.open(file)
|
|
for width in [256, 16]:
|
|
file = '../png/icons/' + str(width) + '/' + code + '.png'
|
|
if not os.path.exists(file) or True:
|
|
write_image(file, icon.resize((width, width), Image.ANTIALIAS))
|
|
size = 256
|
|
mask_ = Image.open('../png/circle.png')
|
|
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
|
|
mask.paste(mask_, (0, 0))
|
|
for size in [256, 32]:
|
|
marker = Image.new('RGBA', (size, size))
|
|
icon_ = icon.resize((size, size), Image.ANTIALIAS)
|
|
mask_ = mask.resize((size, size), Image.ANTIALIAS)
|
|
marker.paste(icon_, (0, 0), mask_)
|
|
file = '../png/markers/' + str(size) + '/' + code + '.png'
|
|
if not os.path.exists(file) or True:
|
|
write_image(file, marker)
|
|
|
|
def get_oxjs_languages():
|
|
# geonames_countries = get_geonames_countries()
|
|
geonames_languages = get_geonames_languages()
|
|
imdb_languages = get_imdb_languages()
|
|
languages = []
|
|
for geonames_language in geonames_languages:
|
|
language = {
|
|
'geonames.org': geonames_language
|
|
}
|
|
for imdb_language in imdb_languages:
|
|
if imdb_language['code'] in [
|
|
language['geonames.org']['ISO_639-1'],
|
|
language['geonames.org']['ISO_639-2'],
|
|
language['geonames.org']['ISO_639-3']
|
|
]:
|
|
language['imdb.com'] = imdb_language
|
|
languages.append(language)
|
|
break
|
|
write_json('../json/oxjs.org/languages.json', languages, True)
|
|
|
|
def make_tree(places):
|
|
def get_node(find, node=None):
|
|
if node and find(node):
|
|
return node
|
|
nodes = tree if not node else node['nodes']
|
|
for node in nodes:
|
|
found = get_node(find, node)
|
|
if found:
|
|
return found
|
|
def get_node_id(place):
|
|
ids = map(lambda x: place[x['key']], geo['levels'])
|
|
ids = filter(lambda x: x != '' and x != '00', ids)
|
|
return '-'.join(ids)
|
|
def get_parent_id(node_id):
|
|
return '-'.join(node_id.split('-')[:-1])
|
|
tree = []
|
|
for l, level in enumerate(geo['levels']):
|
|
for place in places:
|
|
if place['feature_code'] == level['featureCode']:
|
|
node = {
|
|
'geonameid': place['geonameid'],
|
|
'name': place['name'],
|
|
'node_id': get_node_id(place),
|
|
'nodes': [],
|
|
}
|
|
print node['name']
|
|
print node['node_id']
|
|
if l == 0:
|
|
parent = tree
|
|
else:
|
|
parent_id = get_parent_id(node['node_id'])
|
|
parent = get_node(lambda x: x['node_id'] == parent_id)['nodes']
|
|
parent.append(node)
|
|
parent = sorted(parent, key=lambda x: x['node_id'])
|
|
write_json('../json/tree.json', tree)
|
|
print
|
|
return tree
|
|
|
|
def read_file(file):
|
|
print 'reading', file
|
|
f = open(file)
|
|
data = f.read()
|
|
f.close()
|
|
return data
|
|
|
|
def read_json(file):
|
|
data = read_url(file) if file.startswith('http://') else read_file(file)
|
|
return json.loads(data)
|
|
|
|
def read_table(file, keys, drop=[], filter=lambda x: True, sort=lambda x: x):
|
|
def parse_value(str, t):
|
|
if type(t) == float:
|
|
str = float(str) if str else t
|
|
elif type(t) == int:
|
|
str = int(str) if str else t
|
|
return str
|
|
data = []
|
|
if file.startswith('http://'):
|
|
tmp_file = '_tmp.data'
|
|
write_file(tmp_file, read_url(file, cache=False))
|
|
f = open(tmp_file)
|
|
else:
|
|
print 'reading', file
|
|
f = open(file)
|
|
for r, row in enumerate(f):
|
|
if row and row[0] != '#':
|
|
item = {}
|
|
cols = row[:-1].split('\t')
|
|
for c, col in enumerate(cols):
|
|
key = keys[c]
|
|
if not key['name'] in drop:
|
|
if type(key['type']) == list:
|
|
if col:
|
|
col = col.split(',')
|
|
value = map(lambda x: parse_value(x, key['type'][0]), col)
|
|
else:
|
|
value = []
|
|
else:
|
|
value = parse_value(col, key['type'])
|
|
item[key['name']] = value
|
|
if filter(item):
|
|
data.append(item)
|
|
print data
|
|
return sorted(data, key=sort)
|
|
|
|
def read_url(url, cache=True):
|
|
print 'reading', url
|
|
data = ''
|
|
if cache:
|
|
data = ox.cache.readUrl(url)
|
|
else:
|
|
data = ox.net.readUrl(url)
|
|
return data
|
|
|
|
def write_file(file, data):
|
|
print 'writing', file
|
|
write_path(file)
|
|
f = open(file, 'w')
|
|
f.write(data)
|
|
f.close()
|
|
return len(data)
|
|
|
|
def write_image(file, image):
|
|
print 'writing', file
|
|
write_path(file)
|
|
image.save(file)
|
|
|
|
def write_json(file, data, reformat=False):
|
|
if reformat:
|
|
data = json.dumps(data, sort_keys=True)
|
|
else:
|
|
data = json.dumps(data, indent=4, sort_keys=True)
|
|
if reformat:
|
|
data = data.replace('[{', '[\n {')
|
|
data = data.replace('}, {', '},\n {')
|
|
data = data.replace('}]', '}\n]')
|
|
#data = re.sub('{\n\s+', '{', data)
|
|
#data = re.sub(', \n\s+"', ', "', data)
|
|
#data = re.sub('\n\s+}', '}', data)
|
|
#data = data.replace('{\n ', '{')
|
|
#data = data.replace(', \n "', ', "')
|
|
#data = data.replace('\n }', '}')
|
|
write_file(file, data)
|
|
|
|
def write_log(file, line):
|
|
if line == None:
|
|
data = ''
|
|
elif not os.path.exists(file):
|
|
data = line
|
|
else:
|
|
data = read_file(file) + '\n' + line
|
|
write_file(file, data)
|
|
|
|
def write_path(file):
|
|
path = os.path.split(file)[0]
|
|
if path and not os.path.exists(path):
|
|
os.makedirs(path)
|
|
|
|
import geonames
|
|
import imdb
|
|
import wikipedia
|
|
|
|
if __name__ == '__main__':
|
|
|
|
write_json('../json/geo.json', geo)
|
|
countries = get_countries()
|
|
sys.exit()
|
|
|
|
file = '../txt/geonames.org/cities1000.txt'
|
|
geo['keys'] = read_json('../json/oxjs.org/geonames.keys.json')
|
|
keys = geo['keys']['place']['geonames.org']
|
|
drop = ['alternatenames']
|
|
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
|
|
filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 10000
|
|
sort = lambda x: -x['population']
|
|
data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
|
|
write_json('../json/geonames.org/cities10000.json', data, False)
|
|
print len(data)
|
|
sys.exit()
|
|
|
|
|
|
cities = get_cities()
|
|
print cities, len(cities)
|
|
sys.exit()
|
|
|
|
countries = get_countries()
|
|
print 'current independent', len(filter(
|
|
lambda x: not len(x['dependency']) and not x['former'] and not x['other'], countries)
|
|
)
|
|
print 'former independent', len(filter(
|
|
lambda x: not len(x['dependency']) and x['former'] and not x['other'], countries)
|
|
)
|
|
print 'current dependency', len(filter(
|
|
lambda x: len(x['dependency']) and not x['former'] and not x['other'], countries)
|
|
)
|
|
print 'former dependency', len(filter(
|
|
lambda x: len(x['dependency']) and x['former'] and not x['other'], countries)
|
|
)
|
|
print 'other', len(filter(
|
|
lambda x: x['other'], countries)
|
|
)
|
|
sys.exit()
|
|
|
|
file = '../txt/geonames.org/cities1000.txt'
|
|
keys = geo['keys']['place']['geonames.org']
|
|
drop = ['alternatenames']
|
|
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
|
|
filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 100000
|
|
sort = lambda x: -x['population']
|
|
data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
|
|
write_json('../json/geonames.org/cities100000.json', data, False)
|
|
print len(data)
|
|
|
|
get_oxjs_languages()
|
|
|
|
get_geonames_countries()
|
|
get_geonames_languages()
|
|
get_imdb_countries()
|
|
get_imdb_languages()
|
|
sys.exit()
|
|
|
|
#places = read_json('../json/geonames.org/DE.A.json')
|
|
#tree = make_tree(places)
|
|
#write_json('../json/geonames.org/tree.DE.json', tree)
|
|
|
|
file = '../txt/geonames.org/countryInfo.txt'
|
|
keys = geo['keys']['country']['geonames.org']
|
|
sort = lambda x: -x['Population']
|
|
data = read_table(file, keys, sort=sort)
|
|
write_json('../json/' + file[7:-4] + '.json', data)
|
|
print len(data)
|
|
|
|
file = '../txt/geonames.org/iso-languagecodes.txt'
|
|
keys = geo['keys']['language']['geonames.org']
|
|
sort = lambda x: x['Language_Name']
|
|
data = read_table(file, keys, sort=sort)
|
|
write_json('../json/' + file[7:-4] + '.json', data)
|
|
print len(data)
|
|
|
|
"""
|
|
languages = read_json('../json/geonames.org/iso-languagecodes.json')
|
|
countries = read_json('../json/geonames.org/countryInfo.json')
|
|
max_languages = max(map(lambda x: len(x['Languages']), countries))
|
|
language_countries = {}
|
|
for language in languages:
|
|
language_name = language['Language_Name']
|
|
language_iso = [language['ISO_639-3'], language['ISO_639-2'], language['ISO_639-1']]
|
|
found = False
|
|
for i in range(max_languages):
|
|
for country in countries:
|
|
if len(country['Languages']) > i:
|
|
country_language = country['Languages'][i].split('-')[0]
|
|
if country_language in language_iso:
|
|
if not language_name in language_countries:
|
|
language_countries[language_name] = []
|
|
language_countries[language_name].append({
|
|
'country': country['Country'],
|
|
'population': country['Population'],
|
|
'position': i + 1
|
|
})
|
|
write_json('../json/geonames.org/languageCountriesAll.json', language_countries)
|
|
for language, countries in language_countries.iteritems():
|
|
language_countries[language] = countries[0]['country']
|
|
language_countries['English'] = 'United Kingdom'
|
|
language_countries['French'] = 'France'
|
|
language_countries['Portuguese'] = 'Portugal'
|
|
language_countries['Spanish'] = 'Spain'
|
|
write_json('../json/geonames.org/languageCountries.json', language_countries)
|
|
"""
|
|
|
|
file = '../txt/geonames.org/cities15000.txt'
|
|
keys = geo['keys']['place']['geonames.org']
|
|
filter = lambda x: x['population'] >= 100000
|
|
sort = lambda x: -x['population']
|
|
data = read_table(file, keys, filter=filter, sort=sort)
|
|
write_json('../json/geonames.org/cities100000.json', data)
|
|
print len(data)
|
|
|
|
file = '../txt/geonames.org/DE.txt'
|
|
keys = geo['keys']['place']['geonames.org']
|
|
filter = lambda x: x['feature_class'] == 'A' or x['feature_code'] in [
|
|
'PPLA', 'PPLA2', 'PPLA2', 'PPLA4', 'PPLC', 'PPLG'
|
|
]
|
|
sort = lambda x: -x['population']
|
|
data = read_table(file, keys, filter=filter, sort=sort)
|
|
write_json('../json/geonames.org/DE.A,P.json', data)
|
|
print len(data) |