oxjs/tools/geo/py/geo.py

# -*- coding: utf-8 -*-

from __future__ import division
import Image
import json
import math
import os
import ox
import re

"""
run this, then html/geo.html, then py/fix_json.py

311 countries
--  sovereign, dependent and disputed countries according to
    http://en.wikipedia.org/wiki/List_of_sovereign_states
--  dissolved countries, according to
    http://en.wikipedia.org/wiki/ISO_3166-3 and http://www.imdb.com/country/
--  entities with country codes, according to
    http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
    (like Ascension, Canary Islands, Ceuta and Melilla, Diego Garcia,
    European Union, Metropolitan France, Tristan da Cunha, UK...)

writes json/geo.json and json/countries.json,
both will used by html/geo.html

"""

MIN_LAT = -math.degrees(math.atan(math.sinh(math.pi)))

geo = {
    'code': {
        # missing on wikipedia
        'Neutral Zone': 'NTHH',
        'Réunion': 'RE',
        # not yet assigned, see http://en.wikipedia.org/wiki/List_of_sovereign_states
        'South Sudan': 'SS',
        # disputed, see http://en.wikipedia.org/wiki/List_of_sovereign_states
        'Abkhazia': 'GE-AB',
        'Kosovo': 'RS-KO',
        'Nagorno-Karabakh': 'AZ-NK',
        'Northern Cyprus': 'CY-NC',
        'South Ossetia': 'GE-SO',
        'Somaliland': 'SO-SO',
        'Transnistria': 'MD-TR',
        # dependent, see http://en.wikipedia.org/wiki/List_of_sovereign_states
        'Akrotiri and Dhekelia': 'GB-AD',
        'Ashmore and Cartier Islands': 'AU-AC',
        'Coral Sea Islands': 'AU-CS',
        'Peter I Island': 'NO-PI',
        # dependent, see http://en.wikipedia.org/wiki/ISO_3166-2:GB
        'England': 'GB-ENG',
        'Northern Ireland': 'GB-NIR',
        'Scotland': 'GB-SCT',
        'Wales': 'GB-WLS',
        # antarctic, see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
        'Adélie Land': 'FR-AQ',
        'Antártica': 'CL-AQ',
        'Argentine Antarctica': 'AR-AQ',
        'Australian Antarctic Territory': 'AU-AQ',
        'Ross Dependency': 'NZ-AQ',
        # exceptional, see http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
        'Canary Islands': 'IC',
        'Ceuta and Melilla': 'EA',
        'UK': 'UK',
        # dissolved, see http://www.imdb.com/country/
        'Korea': 'KOHH',
        'Netherlands Antilles': 'ANHH',
        'Siam': 'SITH',
        'West Germany': 'DEDE'
    },
    'coordinates': {
        # see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
        'Adélie Land': {
            'south': MIN_LAT,
            'west': 136.03333333,
            'north': -60,
            'east': 142.18333333
        },
        'Antarctica': {
            'south': MIN_LAT,
            'west': -179.99999999,
            'north': -60,
            'east': 179.99999999
        },
        'Antártica': {
            'south': MIN_LAT,
            'west': -90,
            'north': -60,
            'east': -53
        },
        'Argentine Antarctica': {
            'south': MIN_LAT,
            'west': -74,
            'north': -60,
            'east': -25
        },
        'Australian Antarctic Territory': {
            'south': MIN_LAT,
            'west': 44.63333333,
            'north': -60,
            'east': 160
        },
        'British Antarctic Territory': {
            'south': MIN_LAT,
            'west': -80,
            'north': -60,
            'east': -20
        },
        'Queen Maud Land': {
            'south': MIN_LAT,
            'west': 20,
            'north': -60,
            'east': 44.63333333
        },
        'Ross Dependency': {
            'south': MIN_LAT,
            'west': 160,
            'north': -60,
            'east': -150
        }
    },
    'dependencies': {
        'Argentina': ['Argentine Antarctica'],
        'Argentina, Australia, Chile, France, New Zealand, Norway, United Kingdom': ['Antarctica'],
        'Australia': [
            'Ashmore and Cartier Islands', 'Australian Antarctic Territory', 'Christmas Island', 'Cocos Islands', 'Coral Sea Islands',
            'Heard Island and McDonald Islands', 'Norfolk Island'
        ],
        'Chile': ['Antártica'],
        'China': ['Hong Kong', 'Macau'],
        'Denmark': ['Faroe Islands', 'Greenland'],
        'Finland': ['Åland'],
        'France': [
            'Adélie Land', 'Clipperton Island', 'French Afar and Issas', 'French Guiana', 'French Polynesia',
            'French Southern and Antarctic Territories', 'French Southern Territories', 'Guadeloupe', 'Martinique', 'Mayotte',
            'New Caledonia', 'Réunion', 'Saint Barthélemy', 'Saint Martin', 'Saint Pierre and Miquelon',
            'Wallis and Futuna'
        ],
        'France, United Kingdom': ['New Hebrides'],
        'Netherlands': [
            'Aruba', 'Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Netherlands Antilles', 'Sint Maarten'
        ],
        'New Zealand': ['Cook Islands', 'Niue', 'Ross Dependency', 'Tokelau'],
        'Norway': ['Bouvet Island', 'Peter I Island', 'Queen Maud Land', 'Svalbard and Jan Mayen'],
        'Spain': ['Canary Islands', 'Ceuta and Melilla'],
        'Soviet Union': ['Byelorussian Soviet Socialist Republic'], # fixme: what about ukraine?
        'United Kingdom': [
            'Akrotiri and Dhekelia', 'Anguilla', 'Ascension Island', 'Bermuda', 'British Antarctic Territory',
            'British Indian Ocean Territory', 'British Virgin Islands', 'Cayman Islands', 'Diego Garcia', 'England',
            'Falkland Islands', 'Gibraltar', 'Gilbert and Ellice Islands', 'Guernsey', 'Isle of Man',
            'Jersey', 'Montserrat', 'Northern Ireland', 'Pitcairn Islands', 'Saint Helena',
            'Scotland', 'South Georgia and the South Sandwich Islands', 'Tristan da Cunha', 'Turks and Caicos Islands', 'Wales'
        ],
        'United Kingdom, United States': ['Canton and Enderbury Islands'],
        'United States': [
            'American Samoa', 'Guam', 'Northern Mariana Islands', 'Johnston Island', 'Midway Islands',
            'Pacific Islands', 'Panama Canal Zone', 'Puerto Rico', 'United States Minor Outlying Islands', 'United States Miscellaneous Pacific Islands',
            'United States Virgin Islands', 'Wake Island'
        ]
    },
    'disputes': {
        'Azerbaijan': ['Nagorno-Karabakh'],
        'Cyprus': ['Northern Cyprus'],
        'Georgia': ['Abkhazia', 'South Ossetia'],
        'Iraq, Saudi Arabia': ['Neutral Zone'],
        'Moldova': ['Transnistria'],
        'Serbia': ['Kosovo'],
        'Somalia': ['Somaliland']
    },
    'dissolved': {
        # merged
        'Canton and Enderbury Islands': ['Kiribati'],
        'East Germany': ['Germany'],
        'Johnston Island': ['United States Minor Outlying Islands'],
        'Midway Islands': ['United States Minor Outlying Islands'],
        'North Vietnam': ['Vietnam'],
        'Panama Canal Zone': ['Panama'],
        'Sikkim': ['India'],
        'South Yemen': ['Yemen'],
        'United States Miscellaneous Pacific Islands': ['United States Minor Outlying Islands'],
        'Wake Island': ['United States Minor Outlying Islands'],
        'West Germany': ['Germany'],
        # renamed
        'Burma': ['Myanmar'],
        'Byelorussian Soviet Socialist Republic': ['Belarus'],
        'Dahomey': ['Benin'],
        'East Timor': ['Timor-Leste'],
        'French Afar and Issas': ['Djibouti'],
        'New Hebrides': ['Vanuatu'],
        'Rhodesia': ['Zimbabwe'],
        'Siam': ['Thailand'],
        'Upper Volta': ['Burkina Faso'],
        'Yugoslavia': ['Serbia and Montenegro'], # fixme: shape and flag suggest 'split'
        'Zaire': ['Democratic Republic of the Congo'],
        # split
        'Czechoslovakia': ['Czech Republic', 'Slovakia'],
        'French Southern and Antarctic Territories': ['Adélie Land', 'French Southern Territories'],
        'Gilbert and Ellice Islands': ['Kiribati', 'Tuvalu'],
        'Korea': ['North Korea', 'South Korea'],
        'Netherlands Antilles': ['Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Sint Maarten'],
        'Neutral Zone': ['Iraq', 'Saudi Arabia'],
        'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
        'Serbia and Montenegro': ['Serbia', 'Montenegro'],
        'Soviet Union': [
            'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia',
            'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania', 'Moldova',
            'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
        ]
    },
    'flag': {
        'Australian Antarctic Territory': 'Flag_of_Australia.svg',
        'Antarctica': 'Flag_of_the_Antarctic_Treaty.svg',
        'Antártica': 'Flag_of_Magallanes,_Chile.svg',
        'Ashmore and Cartier Islands': 'Flag_of_Australia.svg',
        'Bonaire, Saint Eustatius and Saba': 'Flag_of_Bonaire.svg',
        'Burma': 'Flag_of_Myanmar_(1974-2010).svg',
        'Byelorussian Soviet Socialist Republic': 'Flag_of_Byelorussian_SSR.svg',
        'Canton and Enderbury Islands': 'Flag_of_Gilbert_and_Ellice_Islands.svg',
        'Ceuta and Melilla': 'Flag_Ceuta.svg',
        'Coral Sea Islands': 'Flag_of_Australia.svg',
        'Diego Garcia': 'Flag_of_the_British_Indian_Ocean_Territory.svg',
        'French Guiana': 'Flag_of_French_Guiana.svg',
        'Korea': 'Flag_of_Korea_1882.svg',
        # 'Libya': 'Flag_of_the_Libyan_Jamahiriya_1977.svg',
        'Metropolitan France': 'Flag_of_France.svg',
        'Neutral Zone': 'Flag_of_the_United_Nations.svg',
        'New Hebrides': 'Flag_of_Anglo-French_Joint_Naval_Commission.svg',
        'Northern Ireland': 'Ulster_banner.svg',
        'Panama Canal Zone': 'Panama_Canal_Zone_Flag.png',
        'Peter I Island': 'Flag_of_Norway.svg',
        'Réunion': 'Drapeau_Reunion_APDR.png',
        'Saint Martin': 'Flag_of_Saint-Martin_(local).svg',
        'Siam': 'State_Flag_of_Thailand_(1916).svg',
        'Svalbard and Jan Mayen': 'Flag_of_Norway.svg',
        'United States Miscellaneous Pacific Islands': 'Flag_of_the_United_States.svg',
        'Wallis and Futuna': 'Flag_of_Wallis_and_Futuna.svg',
        'Yugoslavia': 'Flag_of_SFR_Yugoslavia.svg'
    },
    'google_name': {
        'Bahamas': 'The Bahamas',
        'Brunei': 'Brunei Darussalam',
        'Cocos Islands': 'Cocos (Keeling) Islands',
        'Gambia': 'The Gambia',
        'Kosovo': 'Kosova (Kosovo)',
        'Macedonia': 'Former Yugoslav Republic of Macedonia',
        'Myanmar': 'Burma',
        'Palestine': 'Palestinian Territories',
        'Republic of the Congo': 'Congo',
        'Sahrawi': 'Western Sahara',
        'United Kingdom': 'UK',
        'United States': 'USA'
    },
    'google_query': {
        'Akrotiri and Dhekelia': ['Akrotiri, Cyprus', 'Dhekelia, Cyprus'],
        'Bonaire, Saint Eustatius and Saba': ['Bonaire', 'Saba, Netherlands Antilles', 'Saint Eustatius'],
        'Byelorussian Soviet Socialist Republic': ['Belarus'],
        'Canton and Enderbury Islands': ['Canton Island', 'Enderbury Island'],
        'Ceuta and Melilla': ['Ceuta', 'Melilla'],
        'Clipperton Island': ['Île de la Passion'],
        'Curaçao': ['Banda Abou, Curaçao'],
        'Czechoslovakia': ['Czech Republic', 'Slovakia'],
        'East Germany': [
            'Mecklenburg Vorpommern', 'Saxony', 'Thuringia'
        ],
        'Dahomey': ['Benin'],
        'European Union': ['Europe'],
        'French Afar and Issas': ['Djibouti'],
        # see http://en.wikipedia.org/wiki/French_Southern_and_Antarctic_Lands
        # and http://en.wikipedia.org/wiki/Scattered_Islands_in_the_Indian_Ocean
        'French Southern and Antarctic Territories': ['Adélie Land', 'Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
        'French Southern Territories': ['Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
        'Georgia': ['Georgia, Asia'],
         # see http://en.wikipedia.org/wiki/Gilbert_Islands
        'Gilbert and Ellice Islands': ['Arorae, Kiribati', 'Butaritari, Kiribati', 'Makin, Kiribati', 'Tuvalu'],
        'Jamaica': ['Clarendon Parish, Jamaica', 'St. James Parish, Jamaica', 'St. Thomas Parish, Jamaica', 'Westmoreland Parish, Jamaica'], # in case results are us-biased
        'Johnston Island': ['Johnston Atoll'],
        'Korea': ['North Korea', 'South Korea'],
        'Lebanon': ['Lebanon, Asia'], # in case results are us-biased
        'Metropolitan France': ['France'],
        'Midway Islands': ['Midway Atoll'],
        'Neutral Zone': ['Neutral Zone, Saudi Arabia'],
        'New Hebrides': ['Vanuatu'],
        'North Vietnam': ['Ha Giang, Vietnam', 'Lai Chau, Vietnam', 'Thua Thien-Hue, Vietnam'],
        'Northern Cyprus': ['Karpass, Cyprus', 'Kokkina, Cyprus', 'Lympia, Cyprus'],
        'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
        # 'Palestine': ['71, Israel', 'El-arish Rafah, Egypt'],
        'Panama Canal Zone': ['Amador, Panama', 'Fort Sherman, Panama'],
        'Sahrawi': ['Western Sahara'],
        'Saint Helena': ['Ascension Island', 'Longwood, Saint Helena', 'Tristan da Cunha'], # in case results are us-biased
        'San Marino': ['San Marino, Europe'], # in case results are us-biased
        'Serbia and Montenegro': ['Montenegro', 'Serbia'],
        'Siam': ['Thailand'],
        'Sint Maarten': ['Sint Maarten, Netherlands Antilles'],
        'South Ossetia': ['Shida Kartli'],
        # see http://en.wikipedia.org/wiki/Sudan#States_and_regions
        'South Sudan': [
            'Central Equatoria, Sudan', 'Eastern Equatoria, Sudan', 'Jonglei, Sudan', 'Lakes, Sudan', 'Northern Bahr el Ghazal, Sudan',
            'Upper Nile, Sudan', 'Unity, Sudan', 'Warrap, Sudan', 'Western Bahr el Ghazal, Sudan', 'Western Equatoria, Sudan'
        ],
        'South Yemen': ['Al-Mahrah, Yemen', 'Lahij, Yemen'],
        'Soviet Union': [
            'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia, Europe',
            'Latvia', 'Lithuania', 'Kazakhstan', 'Kyrgyzstan', 'Moldova',
            'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
        ],
        'United Kingdom': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
        'United States Minor Outlying Islands': ['Midway Islands'],
        # Baker Island, Howland Island, Jarvis Island, Kingman Reef, Palmyra Atoll --
        # but the first three each return the full United States Minor Outlying Islands
        'United States Miscellaneous Pacific Islands': ['Kingman Reef', 'Palmyra Atoll'],
        'UK': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
        'Upper Volta': ['Burkina Faso'],
        'Wake Island': ['Wake Atoll'],
        'Wales': ['Wales, United Kingdom'],
        'West Germany': ['Schleswig Holstein', 'Northrhine Westphalia', 'Bavaria, Germany'],
        'Yugoslavia': [
            'Bosnia and Herzegovina', 'Croatia', 'Serbia', 'Slovenia', 'Macedonia',
            'Montenegro'
        ]
    },
    'icon': {
        'AC': 'cross(340, resize, 684)',
        'AD': 'extend()',
        'AE': 'crop(683)',
        'AF': 'extend()',
        'AG': 'extend(left, left)',
        '_AG': 'resize(); copy(580, 90, 1468, 534, 592, 296, 387, 238); copy(580, 0, 1468, 90, 592, 238, 387, 0) # *2/3',
        'AI': 'extend(right, right)',
        'AIDJ': 'extend()',
        'AL': 'crop()',
        'AM': 'crop()',
        'ANHH': 'crop()',
        'AO': 'crop()',
        'AQ': 'crop()',
        'AR': 'crop()',
        'AR-AQ': 'extend(right, left)',
        'AS': 'extend(left, left)',
        'AT': 'crop()',
        'AU': 'extend(right, right)',
        'AU-AC': 'extend(right, right)',
        'AU-AQ': 'extend(right, right)',
        'AU-CS': 'extend(right, right)',
        'AW': 'crop(left)',
        'AX': 'crop(827)',
        'AZ': 'crop()',
        'AZ-NK': 'crop(right)',
        'BA': 'crop(1054)',
        'BB': 'extend()',
        'BD': 'crop(922)',
        'BE': 'extend()',
        'BF': 'crop()',
        'BG': 'crop()',
        'BH': 'crop(666)',
        '_BI': 'crop()',
        'BI': 'resize(); copy(677, 267, 1371, 961, 696, 696, 267, 267, circle)',
        'BJ': 'crop(819)',
        'BL': 'extend()',
        'BM': 'extend(right, right)',
        'BN': 'crop()',
        'BO': 'crop()',
        'BQ': 'crop(left)',
        'BQAQ': 'extend(right, right)',
        'BR': 'extend()',
        'BS': 'cut(0, 0, 342, 0); resize()',
        'BT': 'extend(left, right)',
        'BUMM': 'slice(444)',
        'BV': 'crop(left)',
        'BW': 'crop()',
        'BY': 'crop(left)',
        'BYAA': 'crop(left)',
        'BZ': 'crop()',
        'CA': 'extend()',
        'CC': 'extend()',
        'CD': 'crop(left)',
        'CF': 'crop(); copy(85, 0, 597, 341, 512, 341, 0, 0)',
        'CG': 'resize()',
        'CH': 'pass()',
        'CI': 'extend()',
        'CK': 'extend(right, right)',
        'CL': 'crop(left)',
        'CL-AQ': 'extend()',
        'CM': 'extend()',
        'CN': 'cut(1024, 0, 0, 341); resize(); copy(145, 73, 885, 679, 740, 606, 142, 209)',
        'CO': 'crop()',
        'CP': 'extend()',
        'CR': 'crop(left)',
        'CSHH': 'resize()',
        'CSXX': 'crop()',
        'CTKI': 'extend(right, right)',
        'CU': 'crop(left)',
        'CV': 'crop(768)',
        'CW': 'crop(left)',
        'CX': 'cut(2, 1, 2, 1); extend(right, left)',
        'CY': 'crop()',
        'CY-NC': 'crop(811)',
        'CZ': 'resize()',
        'DDDE': 'crop()',
        'DE': 'crop()',
        'DEDE': 'crop()',
        'DG': 'extend(right, right)',
        'DJ': 'crop(left)',
        'DK': 'crop(left)',
        'DM': 'crop()',
        'DO': 'crop()',
        'DYBJ': 'crop(819)',
        'DZ': 'crop()',
        'EA': 'crop()',
        'EC': 'crop()',
        'EE': 'crop()',
        'EG': 'crop()',
        'EH': 'cut(0, 0, 683, 0); resize(); copy(768, 0, 1280, 1024, 512, 1024, 512, 0)',
        'ER': 'extend(right, right)',
        'ES': 'crop(left)',
        'ET': 'crop()',
        'EU': 'crop()',
        'FI': 'crop(740)',
        'FJ': 'extend(right, right)',
        'FK': 'extend(right, right)',
        'FM': 'crop()',
        'FO': 'crop(left)',
        'FQHH': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
        'FR': 'extend()',
        'FR-AQ': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
        'FXFR': 'extend()',
        'GA': 'crop()',
        'GB': 'cross(340, resize, 684)',
        'GB-AD': 'cross(340, resize, 684)',
        'GB-ENG': 'crop()',
        'GB-NIR': 'crop()',
        'GB-SCT': 'resize()',
        'GB-WLS': 'extend()',
        'GD': 'crop()',
        '_GD': 'border(172); copy(172, 172, 1867, 1057, 885, 885, 172, 172)',
        'GE': 'cross(272, crop)',
        'GE-AB': 'crop(left)',
        'GE-SO': 'crop()',
        'GEHH': 'extend(right, right)',
        'GF': 'crop()',
        'GG': 'crop()',
        'GH': 'crop()',
        'GI': 'crop()',
        'GL': 'crop(797)',
        'GM': 'crop()',
        'GN': 'extend()',
        'GP': 'extend()',
        'GQ': 'cut(0, 0, 1024, 0); resize(); copy(683, 0, 1365, 1365, 682, 1365, 683, 0)',
        'GR': 'crop(left)',
        'GS': 'extend(right, right)',
        'GT': 'extend()',
        'GU': 'border(50)',
        'GW': 'slice(341)',
        'GY': 'resize()',
        'HK': 'crop()',
        'HM': 'extend(right, right)',
        'HN': 'crop()',
        'HR': 'crop()',
        'HT': 'crop()',
        'HU': 'crop()',
        'HVBF': 'crop()',
        'IC': 'extend()',
        'ID': 'crop()',
        'IE': 'extend()',
        'IL': 'crop()',
        'IM': 'crop()',
        'IN': 'crop()',
        'IO': 'extend(right, right)',
        'IQ': 'crop()',
        'IR': 'crop()',
        'IS': 'crop(left)',
        'IT': 'extend()',
        'JE': 'crop()',
        'JM': 'resize()',
        'JO': 'crop(left)',
        'JP': 'crop()',
        'JTUM': 'extend()',
        'KE': 'crop()',
        'KG': 'crop()',
        'KH': 'crop()',
        'KI': 'crop()',
        'KM': 'crop(left)',
        'KN': 'crop()',
        'KOHH': 'extend()',
        'KP': 'crop(683)',
        'KR': 'extend()',
        'KW': 'cut(0, 0, 512, 0); resize()',
        'KY': 'extend(right, right)',
        'KZ': 'crop(left); copy(696, 0, 1526, 1024, 840, 1024, 184, 0)',
        'LA': 'crop()',
        'LB': 'crop()',
        'LC': 'crop()',
        'LI': 'slice(455)',
        'LK': 'cut(649, 0, 0, 0); border(85)',
        'LR': 'crop(left)',
        'LS': 'crop()',
        'LT': 'crop()',
        'LU': 'crop()',
        'LV': 'crop()',
        'LY': 'crop()',
        'MA': 'crop()',
        'MC': 'crop()',
        'MD': 'extend()',
        'MD-TR': 'crop(left)',
        'ME': 'border(51)',
        'MF': 'crop()',
        'MG': 'crop(left)',
        'MH': 'extend()',
        'MIUM': 'extend()',
        'MK': 'cut(308, 0, 308, 0); resize(); copy(842, 330, 1206, 694, 364, 364, 330, 330, circle)',
        'ML': 'extend()',
        'MM': 'crop()',
        'MN': 'extend()',
        'MO': 'crop()',
        'MP': 'crop()',
        'MQ': 'cross(198, crop)',
        'MR': 'crop()',
        'MS': 'extend(right, right)',
        'MT': 'border(480)',
        'MU': 'crop()',
        'MV': 'border(341)',
        'MW': 'crop()',
        'MX': 'extend()',
        '_MY': 'crop(left)',
        'MY': 'crop(right); copy(0, 0, 1, 1, 585, 585, 0, 0); copy(0, 0, 1024, 585, 585, 334, 0, 126)',
        'MZ': 'crop(left)',
        'NA': 'crop(left)',
        'NC': 'crop(681)',
        'NE': 'crop()',
        'NF': 'extend()',
        'NG': 'extend()',
        'NHVU': 'crop()',
        'NI': 'crop()',
        'NL': 'crop()',
        'NO': 'crop(left)',
        'NO-PI': 'crop(left)',
        'NP': 'construct()',
        'NQAQ': 'crop(left)',
        'NR': 'crop(left)',
        'NTHH': 'crop()',
        'NU': 'extend(right, right)',
        'NZ': 'extend(right, right)',
        'NZ-AQ': 'extend(right, right)',
        'OM': 'crop(left)',
        'PA': 'slice()',
        'PCHH': 'crop()',
        'PE': 'extend()',
        'PF': 'crop()',
        'PG': 'extend(right, left)',
        'PH': 'crop(left)',
        'PK': 'extend()',
        'PL': 'crop()',
        'PM': 'cut(558, 0, 0, 0); extend()',
        'PN': 'extend(right, right)',
        'PR': 'crop(left)',
        'PS': 'cut(0, 0, 683, 0); resize()',
        'PT': 'crop(819)',
        'PUUM': 'crop(778)',
        'PW': 'crop(896)',
        'PY': 'crop()',
        'PZPA': 'crop()',
        'QA': 'crop(659)',
        'RE': 'cut(464, 0, 0, 0); resize()',
        'RHZW': 'cut(0, 3, 0, 2); extend()',
        'RO': 'extend()',
        'RS': 'crop(732)',
        'RS-KO': 'crop()',
        'RU': 'crop()',
        'RW': 'crop(right)',
        'SA': 'crop()',
        'SB': 'extend(left, right); copy(103, 972, 206, 1024, 103, 52, 0, 1536); copy(1842, 0, 1945, 52, 103, 52, 1945, 460)',
        'SC': 'resize()',
        'SD': 'cut(0, 0, 683, 0); resize()',
        'SE': 'crop(768)',
        'SG': 'crop(787)',
        'SH': 'extend(right, right)',
        'SI': 'crop(left)',
        'SITH': 'crop()',
        'SJ': 'crop(left)',
        'SK': 'crop(679)',
        'SKIN': 'border(165)',
        'SL': 'crop()',
        'SM': 'crop()',
        'SN': 'extend()',
        'SO': 'crop()',
        'SO-SO': 'crop()',
        'SR': 'crop()',
        'SS': 'crop(left)',
        'ST': 'resize(); copy(832, 320, 1216, 704, 384, 384, 272, 320); copy(1344, 320, 1728, 704, 384, 384, 624, 320)',
        'SUHH': 'crop(right); copy(85, 0, 597, 512, 1024, 1024, 0, 0)',
        'SV': 'crop()',
        'SX': 'crop(left)',
        'SY': 'slice(614)',
        'SZ': 'extend()',
        'TA': 'extend(right, right)',
        'TC': 'extend(right, right)',
        'TD': 'extend()',
        'TF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
        'TG': 'crop(left)',
        'TH': 'crop()',
        'TJ': 'crop()',
        'TK': 'extend()',
        'TL': 'crop(left)',
        'TM': 'crop(left)',
        'TN': 'crop()',
        'TO': 'slice(427)',
        'TPTL': 'crop(left)',
        'TR': 'crop(844)',
        'TT': 'resize()',
        'TV': 'extend(right, right)',
        'TW': 'slice()',
        'TZ': 'resize()',
        'UA': 'crop()',
        'UG': 'crop()',
        'UK': 'cross(340, resize, 684)',
        'UM': 'crop(778)',
        'US': 'crop(778)',
        'UY': 'crop(left)',
        'UZ': 'crop(left)',
        'VA': 'pass()',
        'VC': 'extend()',
        'VDVN': 'crop()',
        'VE': 'crop(); copy(0, 0, 455, 455, 455, 455, 0, 0)',
        'VG': 'extend(right, right)',
        'VI': 'extend()',
        'VN': 'crop()',
        'VU': 'crop(left)',
        'WF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
        'WKUM': 'crop(left)',
        'WS': 'slice()',
        'YDYE': 'crop(left)',
        'YE': 'crop()',
        'YT': 'extend()',
        'YUCS': 'crop()',
        'ZA': 'resize()',
        'ZM': 'cut(598, 0, 0, 0); extend()',
        'ZRCD': 'crop()',
        'ZW': 'crop(left)'
    },
    'imdb_code': {
        'Côte d\'Ivoire': 'ci',
        'Democratic Republic of the Congo': 'cd',
        'Palestine': 'ps',
        'Serbia and Montenegro': 'xfy'
    },
    'imdb_name': {
        'Côte d\'Ivoire': 'Ivory Coast', # bug in some imdb entries
        'Democratic Republic of the Congo': 'Democratic Republic of Congo', # bug in some imdb entries
        'Palestine': 'Occupied Palestinian Territory', # bug in some imdb entries
        'Serbia and Montenegro': 'Federal Republic of Yugoslavia'
    },

    # import json
    # import re
    # from ox.net import readUrl
    # html = readUrl('http://www.imdb.com/language/')
    # results = re.compile(re.compile('<a href="/language/.*?">(.*?)</a>').findall(html))
    # languages = {}
    # for result in results:
    #     languages[result] = ''
    # print json.dumps(languages, sort_keys=True)

    # http://www.imdb.com/language/
    'languages': {
        'Abkhazian': 'Abkhazia',
        'Aboriginal': 'Australia',
        'Aché': '',
        'Acholi': '',
        'Afrikaans': 'South Africa',
        'Aidoukrou': '',
        'Akan': '',
        'Albanian': 'Albania',
        'Algonquin': '',
        'American': 'United States',
        'Amharic': 'Ethiopia',
        'Apache': '',
        'Arabic': 'Saudi Arabia',
        'Aragonese': '',
        'Aramaic': 'Syria',
        'Arapaho': '',
        'Armenian': 'Armenia',
        'Assamese': 'India',
        'Assyrian Neo-Aramaic': '',
        'Athapascan': '',
        'Australian': 'Australia',
        'Awadhi': '',
        'Aymara': 'Bolivia',
        'Azerbaijani': 'Azerbaijan',
        'Bable': '',
        'Baka': '',
        'Balinese': 'Indonesia',
        'Bambara': 'Mali',
        'Basque': 'Spain',
        'Bassari': '',
        'Belarusian': 'Belarus',
        'Bemba': '',
        'Bengali': 'Bangladesh',
        'Berber': 'Morocco', # ?, one of multiple
        'Bhojpuri': '',
        'Bicolano': 'Philippines',
        'Bodo': '',
        'Bosnian': 'Bosnia and Herzegovina',
        'Brazilian': 'Brazil',
        'Breton': 'France',
        'British': 'United Kingdom',
        'Bulgarian': 'Bulgaria',
        'Burmese': 'Burma',
        'Cantonese': 'China',
        'Catalan': 'Spain',
        'Central Khmer': 'Cambodia',
        'Chaozhou': 'China',
        'Chechen': 'Russia',
        'Cherokee': 'United States',
        'Cheyenne': 'United States',
        'Chhattisgarhi': 'India',
        'Chinese': 'China',
        'Cornish': 'United Kingdom',
        'Corsican': 'France',
        'Cree': 'Canada',
        'Creek': 'United States',
        'Creole': '',
        'Creoles and pidgins': '',
        'Croatian': 'Croatia',
        'Crow': '',
        'Czech': 'Czech Republic',
        'Danish': 'Denmark',
        'Dari': 'Afghanistan',
        'Desiya': '',
        'Dinka': '',
        'Djerma': 'Niger',
        'Dogri': '',
        'Dutch': 'Netherlands',
        'Dyula': '',
        'Dzongkha': '',
        'East-Greenlandic': 'Greenland',
        'Eastern Frisian': 'Germany',
        'Egyptian (Ancient)': 'Egypt',
        'English': 'United Kingdom',
        'Esperanto': '',
        'Estonian': 'Estonia',
        'Ewe': '',
        'Faliasch': '',
        'Faroese': 'Faroe Islands',
        'Filipino': 'Philippines',
        'Finnish': 'Finland',
        'Flemish': 'Belgium',
        'Fon': '',
        'French': 'France',
        'Fulah': '',
        'Fur': '',
        'Gaelic': 'Ireland',
        'Galician': 'Spain',
        'Georgian': 'Georgia',
        'German': 'Germany',
        'Grebo': '',
        'Greek': 'Greece',
        'Greek, Ancient (to 1453)': 'Greece',
        'Greenlandic': 'Greenland',
        'Guarani': 'Paraguay',
        'Gujarati': 'India',
        'Gumatj': '',
        'Gunwinggu': 'Australia',
        'Haitian': 'Haiti',
        'Hakka': 'China',
        'Haryanvi': '',
        'Hassanya': 'Mauritania',
        'Hausa': 'Nigeria',
        'Hawaiian': 'United States',
        'Hebrew': 'Israel',
        'Hindi': 'India',
        'Hmong': '',
        'Hokkien': 'China',
        'Hopi': 'United States',
        'Hungarian': 'Hungary',
        'Iban': '',
        'Ibo': 'Nigeria',
        'Icelandic': 'Iceland',
        'Indian': 'India',
        'Indonesian': 'Indonesia',
        'Inuktitut': '',
        'Inupiaq': '',
        'Irish Gaelic': 'Ireland',
        'Italian': 'Italy',
        'Japanese': 'Japan',
        'Jola-Fonyi': '',
        'Ju\'hoan': '',
        'Kaado': '',
        'Kabuverdianu': 'Cape Verde',
        'Kabyle': '',
        'Kalmyk-Oirat': '',
        'Kannada': 'India',
        'Karajá': '',
        'Karbi': '',
        'Karen': '',
        'Kazakh': 'Kazakhstan',
        'Khanty': 'Russia',
        'Khasi': '',
        'Kikuyu': '',
        'Kinyarwanda': '',
        'Kirundi': '',
        'Klingon': '',
        'Kodava': '',
        'Konkani': 'India',
        'Korean': 'South Korea',
        'Korowai': 'Papua New Guinea',
        'Kriolu': 'Cape Verde',
        'Kru': '',
        'Kudmali': '',
        'Kuna': '',
        'Kurdish': 'Turkey', # ?, one of multiple
        'Kwakiutl': '',
        'Kyrgyz': 'Kyrgyzstan',
        'Ladakhi': 'India',
        'Ladino': '',
        'Lao': 'Laos',
        'Latin': 'Italy',
        'Latvian': 'Latvia',
        'Limbu': '',
        'Lingala': 'Democratic Republic of the Congo',
        'Lithuanian': 'Lithuania',
        'Low German': 'Germany',
        'Luxembourgish': 'Luxemburg',
        'Macedonian': 'Macedonia',
        'Macro-Jê': '',
        'Magahi': '',
        'Maithili': '',
        'Malagasy': 'Madagascar',
        'Malay': 'Malaysia',
        'Malayalam': 'India',
        'Malecite-Passamaquoddy': '',
        'Malinka': 'Guinea',
        'Maltese': 'Malta',
        'Manchu': '',
        'Mandarin': 'China',
        'Mandingo': '',
        'Manipuri': '',
        'Maori': 'New Zealand',
        'Mapudungun': 'Chile',
        'Marathi': 'India',
        'Marshallese': 'Marshall Islands',
        'Masai': '',
        'Masalit': '',
        'Maya': 'Mexico',
        'Mende': 'Sierra Leone',
        'Micmac': '',
        'Middle English': 'England',
        'Min Nan': '',
        'Minangkabau': '',
        'Mirandese': '',
        'Mizo': '',
        'Mohawk': '',
        'Mongolian': 'Mongolia',
        'Montagnais': '',
        'More': 'Burkina Faso',
        'Morisyen': '',
        'Nagpuri': 'India',
        'Nahuatl': '',
        'Nama': '',
        'Navajo': 'United States',
        'Naxi': 'China',
        'Ndebele': '',
        'Neapolitan': 'Italy',
        'Nenets': '',
        'Nepali': 'Nepal',
        'Nisga\'a': 'Canada',
        'None': '',
        'Norse, Old': '',
        'North American Indian': '',
        'Norwegian': 'Norway',
        'Nushi': '',
        'Nyaneka': '',
        'Nyanja': 'Malawi',
        'Occitan': '',
        'Ojibwa': '',
        'Ojihimba': '',
        'Old English': 'England',
        'Oriya': '',
        'Papiamento': '',
        'Parsee': 'Iran',
        'Pashtu': 'Afghanistan',
        'Pawnee': '',
        'Persian': 'Iran',
        'Peul': '',
        'Polish': 'Poland',
        'Polynesian': '',
        'Portuguese': 'Portugal',
        'Pular': '',
        'Punjabi': 'India',
        'Purepecha': '',
        'Quechua': 'Peru',
        'Quenya': '',
        'Rajasthani': 'India',
        'Rawan': '',
        'Romanian': 'Romania',
        'Romansh': 'Switzerland',
        'Romany': 'Romania',
        'Rotuman': '',
        'Russian': 'Russia',
        'Ryukyuan': 'Japan',
        'Saami': 'Finland',
        'Samoan': 'Samoa',
        'Sanskrit': '',
        'Sardinian': 'Italy',
        'Scanian': '',
        'Serbian': 'Serbia',
        'Serbo-Croatian': 'Yugoslavia',
        'Serer': '',
        'Shanghainese': 'China',
        'Shanxi': 'China',
        'Shona': 'Zimbabwe',
        'Shoshoni': 'United States',
        'Sicilian': 'Italy',
        'Sindarin': '',
        'Sindhi': 'Pakistan',
        'Sinhala': 'Sri Lanka',
        'Sioux': 'United States',
        'Slovak': 'Slovakia',
        'Slovenian': 'Slovenia',
        'Somali': 'Somalia',
        'Songhay': '',
        'Soninke': '',
        'Sorbian': 'Germany',
        'Sotho': 'Lesotho',
        'Sousson': '',
        'Spanish': 'Spain',
        'Sranan': '',
        'Swahili': 'Kenya', # ?, one of multiple
        'Swedish': 'Sweden',
        'Swiss German': 'Switzerland',
        'Sylheti': '',
        'Tagalog': 'Philippines',
        'Tajik': 'Tajikistan',
        'Tamashek': 'Algeria', # ?, one of multiple
        'Tamil': 'Sri Lanka',
        'Tarahumara': '',
        'Tatar': 'Russia',
        'Telugu': 'India',
        'Teochew': '',
        'Thai': 'Thailand',
        'Tibetan': 'China',
        'Tigrigna': '',
        'Tlingit': '',
        'Tok Pisin': '',
        'Tonga (Tonga Islands)': 'Tonga',
        'Tsonga': '',
        'Tswa': '',
        'Tswana': '',
        'Tulu': '',
        'Tupi': '',
        'Turkish': 'Turkey',
        'Turkmen': 'Turkmenistan',
        'Tuvinian': '',
        'Tzotzil': '',
        'Ukrainian': 'Ukraine',
        'Ungwatsi': '',
        'Urdu': 'Pakistan',
        'Uzbek': 'Uzbekistan',
        'Vietnamese': 'Vietnam',
        'Visayan': '',
        'Washoe': '',
        'Welsh': 'Wales',
        'Wolof': 'Senegal', # ?, one of multiple
        'Xhosa': 'South Africa',
        'Yakut': '',
        'Yapese': '',
        'Yiddish': 'Israel',
        'Yoruba': 'Nigeria',
        'Zulu': 'South Africa'
    },
    'other': ['European Union', 'Metropolitan France', 'UK'],
    'wikipedia_name': {
        # ambiguous on wikipedia
        'Cocos (Keeling) Islands': 'Cocos Islands',
        'Collectivity of Saint Martin': 'Saint Martin',
        'Federated States of Micronesia': 'Micronesia',
        'French Territory of the Afars and the Issas': 'French Afar and Issas',
        'Georgia (country)': 'Georgia',
        'Nagorno-Karabakh Republic': 'Nagorno-Karabakh',
        'People\'s Republic of China': 'China',
        'Republic of China': 'Taiwan',
        'Republic of Dahomey': 'Dahomey',
        'Republic of Ireland': 'Ireland',
        'Republic of Kosovo': 'Kosovo',
        'Republic of Macedonia': 'Macedonia',
        'Republic of Upper Volta': 'Upper Volta',
        'Sahrawi Arab Democratic Republic': 'Sahrawi',
        'Saudi-Iraqi neutral zone': 'Neutral Zone',
        'State of Palestine': 'Palestine',
        'Trust Territory of the Pacific Islands': 'Pacific Islands'
    },
    'wikipedia_url': {
        # dependencies of guernsey
        'Alderney': '',
        'Herm': '',
        'Sark': '',
        # territory of pakistan
        'Azad_Kashmir': '',
        'Gilgit-Baltistan': '',
        # wrong in http://en.wikipedia.org/wiki/List_of_sovereign_states
        'Coral_Sea_Islands_Territory': 'Coral_Sea_Islands',
        'Kingdom_of_the_Netherlands': 'Netherlands',
        'Saint-Barth%C3%A9lemy': 'Saint_Barth%C3%A9lemy',
        'Saint_Martin': 'Collectivity_of_Saint_Martin',
        # wrong in http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
        'Caribbean_Netherlands': 'Bonaire,_Saint_Eustatius_and_Saba',
        'Ceuta': 'Ceuta_and_Melilla',
        'Palestinian_territories': 'State_of_Palestine',
        'Saudi%E2%80%93Iraqi_neutral_zone': 'Saudi-Iraqi_neutral_zone',
        'Western_Sahara': 'Sahrawi_Arab_Democratic_Republic',
        # wrong in http://en.wikipedia.org/wiki/ISO_3166-3
        'Johnston_Atoll': 'Johnston_Island',
        'Midway_Atoll': 'Midway_Islands',
        # wrong in all
        '%C3%85land_Islands': 'Åland',
        'East Timor': 'Timor-Leste',
        'Cocos_(Keeling)_Islands': 'Cocos_Islands',
        'French_Southern_and_Antarctic_Lands': 'French_Southern_and_Antarctic_Territories',
        'Saint_Helena,_Ascension_and_Tristan_da_Cunha': 'Saint_Helena',
        'The_Bahamas': 'Bahamas',
        'The_Gambia': 'Gambia'
    },
    'wikipedia_urls': [
        # not in any list
        # antarctic
        'Adélie_Land', 'Antártica', 'Argentine_Antarctica',
        # dependent
        'England', 'Northern_Ireland', 'Scotland', 'Wales',
        # former
        'Korea', 'Siam', 'West_Germany',
        # other
        'East Timor', 'French_Southern_Territories', 'Peter_I_Island', 'South_Sudan', 'UK'
    ]
}

def get_cities():
    cities = wikipedia.get_cities()
    return cities

def get_countries():
    geonames_countries = geonames.get_countries()
    write_json('../json/geonames.org/countries.json', geonames_countries, True)
    imdb_countries = imdb.get_countries()
    write_json('../json/imdb.com/countries.json', imdb_countries, True)
    wikipedia_countries = wikipedia.get_countries()
    write_json('../json/wikipedia.org/countries.json', wikipedia_countries, True)
    countries = wikipedia_countries
    logs = []
    for country in countries:
        # dependencies
        country['dependencies'] = []
        country['dependency'] = []
        for c, d in geo['dependencies'].iteritems():
            c = c.split(', ')
            if country['name'] in c:
                country['dependencies'] = d
                break
            elif country['name'] in d:
                country['dependency'] = c
                break
        # disputes
        country['disputes'] = []
        country['disputed'] = []
        for c, d in geo['disputes'].iteritems():
            c = c.split(', ')
            if country['name'] in c:
                country['disputes'] = d
                break
            elif country['name'] in d:
                country['disputed'] = c
                break
        # dissolved
        country['dissolved'] = geo['dissolved'][country['name']] if country['name'] in geo['dissolved'] else []
        # google name
        if country['name'] in geo['google_name']:
            country['googleName'] = geo['google_name'][country['name']]
        else:
            country['googleName'] = country['name']
        # other
        country['other'] = country['name'] in geo['other']
        # imdb
        if country['name'] in geo['imdb_name']:
            country['imdbCode'] = geo['imdb_code'][country['name']]
            country['imdbName'] = geo['imdb_name'][country['name']]
        else:
            for imdb_country in imdb_countries:
                if imdb_country['code'].upper() == country['code'] or imdb_country['name'] == country['name']:
                    country['imdbCode'] = imdb_country['code']
                    country['imdbName'] = imdb_country['name']
                    break
        get_country_flag(country['code'], country['flagURL'])
        get_country_icon(country['code'])
        # languages
        country['languages'] = []
        for language, language_country in geo['languages'].iteritems():
            if language_country == country['name']:
                country['languages'].append(language)
    write_json('../json/countries.json', countries)
    return countries

def get_country_flag(code, url):
    # max width on wikipedia
    width = 2048
    img = read_url(url)
    png_file = '../png/flags/' + str(width) + '/' + code + '.png'
    if url[-4:] == '.svg':
        svg_file = '../svg/flags/' + code + '.svg'
        update_image = not os.path.exists(svg_file) or read_file(svg_file) != img
        if update_image:
            write_file('../svg/flags/' + code + '.svg', img)
        if not os.path.exists(png_file) or update_image:
            png = read_url(url.replace('/commons/', '/commons/thumb/') + '/' + str(width) + 'px-.png')
            write_file(png_file, png)
        png = Image.open(png_file)
    else:
        update_image = not os.path.exists(png_file) or read_file(png_file) != img
        if update_image:
            write_file(png_file, img)
            png = Image.open(png_file)
            png = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
            write_image(png_file, png)
        else:
            png = Image.open(png_file)
    for width in [256, 32]:
        file = '../png/flags/' + str(width) + '/' + code + '.png'
        if not os.path.exists(file) or update_image:
            png_ = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
            write_image(file, png_)

def get_country_icon(code):
    def _border(flag, args):
        icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
        left = int((flag.size[0] - flag.size[1]) / 2 + args[0])
        right = left + flag.size[1] - args[0] * 2
        crop_left = flag.crop((0, 0, args[0], flag.size[1]))
        crop_center = flag.crop((left, 0, right, flag.size[1]))
        crop_right = flag.crop((flag.size[0] - args[0], 0, flag.size[0], flag.size[1]))
        icon.paste(crop_left, (0, 0))
        icon.paste(crop_center, (args[0], 0))
        icon.paste(crop_right, (flag.size[1] - args[0], 0))
        return icon
    def _construct(flag):
        # nepal
        icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
        # blue
        crop = flag.crop((0, flag.size[1] - 85, 85, flag.size[1]))
        crop = crop.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS)
        icon.paste(crop, (0, 0))
        # red
        crop = flag.crop((86, flag.size[1] - 171, 171, flag.size[1] - 86))
        crop = crop.resize((flag.size[1] - 170, flag.size[1] - 170), Image.ANTIALIAS)
        icon.paste(crop, (85, 85))
        # moon
        crop = flag.crop((147, 753, 147 + 732, 1151))
        icon.paste(crop, (int((flag.size[1] - 732) / 2), 753))
        # sun
        crop = flag.crop((147, 1449, 147 + 732, 1449 + 732))
        mask_ = Image.open('../png/circle.png')
        mask = Image.new('L', (mask_.size[0], mask_.size[1]))
        mask.paste(mask_, (0, 0))
        mask = mask.resize((732, 732), Image.ANTIALIAS)
        icon.paste(crop, (int((flag.size[1] - 732) / 2), 1449), mask)
        return icon
    def _copy(flag, args):
        icon = flag
        flag = Image.open('../png/flags/2048/' + code + '.png')
        if len(args) == 8:
            args.append('rect')
        crop = flag.crop((args[0], args[1], args[2], args[3]))
        crop = crop.resize((args[4], args[5]), Image.ANTIALIAS)
        if args[8] == 'rect':
            icon.paste(crop, (args[6], args[7]))
        elif args[8] == 'circle':
            mask_ = Image.open('../png/circle.png')
            mask = Image.new('L', (mask_.size[0], mask_.size[1]))
            mask.paste(mask_, (0, 0))
            mask = mask.resize((args[4], args[5]), Image.ANTIALIAS)
            icon.paste(crop, (args[6], args[7]), mask)
        return icon
    def _crop(flag, args):
        icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
        if len(args) == 0:
            left = int((flag.size[0] - flag.size[1]) / 2)
        elif args[0] == 'left':
            left = 0
        elif args[0] == 'right':
            left = flag.size[0] - flag.size[1]
        else:
            left = int(args[0]) - int(flag.size[1] / 2)
        icon.paste(flag.crop((left, 0, left + flag.size[1], flag.size[1])), (0, 0))
        return icon
    def _cross(flag, args):
        icon = _crop(flag, [])
        if args[1] == 'crop':
            left = int((flag.size[0] - args[0]) / 4 - (flag.size[1] - args[0]) / 4)
            right = left + int((flag.size[1] - args[0]) / 2)
            crop = flag.crop((left, 0, right, flag.size[1]))
            icon.paste(crop, (0, 0))
            crop = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
            icon.paste(crop, (int((flag.size[1] + args[0]) / 2), 0))
        elif args[1] == 'resize':
            width = args[2]
            height = int((flag.size[1] - args[0]) / 2)
            offset = int((flag.size[1] + args[0]) / 2)
            crop = flag.crop((0, 0, width, height)).resize((height, height), Image.ANTIALIAS)
            icon.paste(crop, (0, 0))
            crop = flag.crop((flag.size[0] - width, 0, flag.size[0], height)).resize((height, height), Image.ANTIALIAS)
            icon.paste(crop, (offset, 0))
            crop = flag.crop((0, offset, width, flag.size[1])).resize((height, height), Image.ANTIALIAS)
            icon.paste(crop, (0, offset))
            crop = flag.crop((flag.size[0] - width, offset, flag.size[0], flag.size[1])).resize((height, height), Image.ANTIALIAS)
            icon.paste(crop, (offset, offset))
        return icon
    def _cut(flag, args):
        return flag.crop((args[0], args[1], flag.size[0] - args[2], flag.size[1] - args[3]))
    def _extend(flag, args):
        icon = Image.new('RGBA', (flag.size[0], flag.size[0]))
        top = int((flag.size[0] - flag.size[1]) / 2)
        bottom = int((flag.size[0] - flag.size[1]) / 2 + 0.5)
        icon.paste(flag, (0, top))
        if len(args) == 0:
            crop_top = flag.crop((0, 1, flag.size[0], 2))
            crop_bottom = flag.crop((0, flag.size[1] - 2, flag.size[0], flag.size[1] - 1))
        else:
            if args[0] == 'left':
                pixel = flag.crop((2, 1, 3, 2))
            else:
                pixel = flag.crop((flag.size[0] - 3, 1, flag.size[0] - 2, 2))
            crop_top = Image.new('RGBA', (flag.size[0], 1))
            for x in range(flag.size[0]):
                crop_top.paste(pixel, (x, 0))
            if args[1] == 'left':
                pixel = flag.crop((1, flag.size[1] - 3, 2, flag.size[1] - 2))
            else:
                pixel = flag.crop((flag.size[0] - 3, flag.size[1] - 2, flag.size[0] - 2, flag.size[1] - 1))
            crop_bottom = Image.new('RGBA', (flag.size[0], 1))
            for x in range(flag.size[0]):
                crop_bottom.paste(pixel, (x, 0))
        for y in range(top):
            icon.paste(crop_top, (0, y))
        for y in range(bottom):
            icon.paste(crop_bottom, (0, top + flag.size[1] + y))
        return icon
    def _pass(flag):
        return flag
    def _resize(flag, args):
        icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
        icon.paste(flag.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS))
        return icon
    def _slice(flag, args):
        icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
        if len(args) == 0:
            args = [int(flag.size[0] / 4)]
        width = int(flag.size[1] / 2)
        left = int(args[0] - width / 2)
        right = int(args[0] + width / 2)
        crop_left = flag.crop((left, 0, right, flag.size[1]))
        crop_right = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
        icon.paste(crop_left, (0, 0))
        icon.paste(crop_right, (width, 0))
        return icon
    file = '../png/icons/1024/' + code + '.png'
    if not os.path.exists(file) or True:
        flag = Image.open('../png/flags/2048/' + code + '.png')
        icon = None
        functions = geo['icon'][code].split(' # ')[0].split('; ')
        if functions[0]: # remove later
            for function in functions:
                pos = function.find('(')
                args = function[pos + 1:-1].split(', ')
                if args[0]:
                    args = map(lambda x: int(x) if x[0] in '-0123456789' else x, args)
                else:
                    args = []
                function = function[:pos]
                if function == 'border':
                    icon = _border(flag, args)
                elif function == 'construct':
                    icon = _construct(flag)
                elif function == 'copy':
                    icon = _copy(flag, args)
                elif function == 'crop':
                    icon = _crop(flag, args)
                elif function == 'cross':
                    icon = _cross(flag, args)
                elif function == 'cut':
                    icon = _cut(flag, args)
                elif function == 'extend':
                    icon = _extend(flag, args)
                elif function == 'pass':
                    icon = _pass(flag)
                elif function == 'resize':
                    icon = _resize(flag, args)
                elif function == 'slice':
                    icon = _slice(flag, args)
                flag = icon
        write_image(file, icon.resize((1024, 1024), Image.ANTIALIAS))
    else:
        icon = Image.open(file)
    for width in [256, 16]:
        file = '../png/icons/' + str(width) + '/' + code + '.png'
        if not os.path.exists(file) or True:
            write_image(file, icon.resize((width, width), Image.ANTIALIAS))
    size = 256
    mask_ = Image.open('../png/circle.png')
    mask = Image.new('L', (mask_.size[0], mask_.size[1]))
    mask.paste(mask_, (0, 0))
    for size in [256, 32]:
        marker = Image.new('RGBA', (size, size))
        icon_ = icon.resize((size, size), Image.ANTIALIAS)
        mask_ = mask.resize((size, size), Image.ANTIALIAS)
        marker.paste(icon_, (0, 0), mask_)
        file = '../png/markers/' + str(size) + '/' + code + '.png'
        if not os.path.exists(file) or True:
            write_image(file, marker)

def get_oxjs_languages():
    # geonames_countries = get_geonames_countries()
    geonames_languages = get_geonames_languages()
    imdb_languages = get_imdb_languages()
    languages = []
    for geonames_language in geonames_languages:
        language = {
            'geonames.org': geonames_language
        }
        for imdb_language in imdb_languages:
            if imdb_language['code'] in [
                language['geonames.org']['ISO_639-1'],
                language['geonames.org']['ISO_639-2'],
                language['geonames.org']['ISO_639-3']
            ]:
                language['imdb.com'] = imdb_language
                languages.append(language)
                break
    write_json('../json/oxjs.org/languages.json', languages, True)

def make_tree(places):
    def get_node(find, node=None):
        if node and find(node):
            return node
        nodes = tree if not node else node['nodes']
        for node in nodes:
            found = get_node(find, node)
            if found:
                return found
    def get_node_id(place):
        ids = map(lambda x: place[x['key']], geo['levels'])
        ids = filter(lambda x: x != '' and x != '00', ids)
        return '-'.join(ids)
    def get_parent_id(node_id):
        return '-'.join(node_id.split('-')[:-1])
    tree = []
    for l, level in enumerate(geo['levels']):
        for place in places:
            if place['feature_code'] == level['featureCode']:
                node = {
                    'geonameid': place['geonameid'],
                    'name': place['name'],
                    'node_id': get_node_id(place),
                    'nodes': [],
                }
                print node['name']
                print node['node_id']
                if l == 0:
                    parent = tree
                else:
                    parent_id = get_parent_id(node['node_id'])
                    parent = get_node(lambda x: x['node_id'] == parent_id)['nodes']
                parent.append(node)
                parent = sorted(parent, key=lambda x: x['node_id'])
                write_json('../json/tree.json', tree)
                print
    return tree

def read_file(file):
    print 'reading', file
    f = open(file)
    data = f.read()
    f.close()
    return data

def read_json(file):
    data = read_url(file) if file.startswith('http://') else read_file(file)
    return json.loads(data)

def read_table(file, keys, drop=[], filter=lambda x: True, sort=lambda x: x):
    def parse_value(str, t):
        if type(t) == float:
            str = float(str) if str else t
        elif type(t) == int:
            str = int(str) if str else t
        return str
    data = []
    if file.startswith('http://'):
        tmp_file = '_tmp.data'
        write_file(tmp_file, read_url(file, cache=False))
        f = open(tmp_file)
    else:
        print 'reading', file
        f = open(file)
    for r, row in enumerate(f):
        if row and row[0] != '#':
            item = {}
            cols = row[:-1].split('\t')
            for c, col in enumerate(cols):
                key = keys[c]
                if not key['name'] in drop:
                    if type(key['type']) == list:
                        if col:
                            col = col.split(',')
                            value = map(lambda x: parse_value(x, key['type'][0]), col)
                        else:
                            value = []
                    else:
                        value = parse_value(col, key['type'])
                    item[key['name']] = value
            if filter(item):
                data.append(item)
    print data
    return sorted(data, key=sort)

def read_url(url, cache=True):
    print 'reading', url
    data = ''
    if cache:
        data = ox.cache.readUrl(url)
    else:
        data = ox.net.readUrl(url)
    return data

def write_file(file, data):
    print 'writing', file
    write_path(file)
    f = open(file, 'w')
    f.write(data)
    f.close()
    return len(data)

def write_image(file, image):
    print 'writing', file
    write_path(file)
    image.save(file)

def write_json(file, data, reformat=False):
    if reformat:
        data = json.dumps(data, sort_keys=True)
    else:
        data = json.dumps(data, indent=4, sort_keys=True)
    if reformat:
        data = data.replace('[{', '[\n    {')
        data = data.replace('}, {', '},\n    {')
        data = data.replace('}]', '}\n]')
        #data = re.sub('{\n\s+', '{', data)
        #data = re.sub(', \n\s+"', ', "', data)
        #data = re.sub('\n\s+}', '}', data)
        #data = data.replace('{\n        ', '{')
        #data = data.replace(', \n        "', ', "')
        #data = data.replace('\n    }', '}')
    write_file(file, data)

def write_log(file, line):
    if line == None:
        data = ''
    elif not os.path.exists(file):
        data = line
    else:
        data = read_file(file) + '\n' + line
    write_file(file, data)

def write_path(file):
    path = os.path.split(file)[0]
    if path and not os.path.exists(path):
        os.makedirs(path)

import geonames
import imdb
import wikipedia

if __name__ == '__main__':

    write_json('../json/geo.json', geo)
    countries = get_countries()
    sys.exit()

    file = '../txt/geonames.org/cities1000.txt'
    geo['keys'] = read_json('../json/oxjs.org/geonames.keys.json')
    keys = geo['keys']['place']['geonames.org']
    drop = ['alternatenames']
    # filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
    filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 10000
    sort = lambda x: -x['population']
    data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
    write_json('../json/geonames.org/cities10000.json', data, False)
    print len(data)
    sys.exit()


    cities = get_cities()
    print cities, len(cities)
    sys.exit()

    countries = get_countries()
    print 'current independent', len(filter(
        lambda x: not len(x['dependency']) and not x['former'] and not x['other'], countries)
    )
    print 'former independent', len(filter(
        lambda x: not len(x['dependency']) and x['former'] and not x['other'], countries)
    )
    print 'current dependency', len(filter(
        lambda x: len(x['dependency']) and not x['former'] and not x['other'], countries)
    )
    print 'former dependency', len(filter(
        lambda x: len(x['dependency']) and x['former'] and not x['other'], countries)
    )
    print 'other', len(filter(
        lambda x: x['other'], countries)
    )
    sys.exit()

    file = '../txt/geonames.org/cities1000.txt'
    keys = geo['keys']['place']['geonames.org']
    drop = ['alternatenames']
    # filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
    filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 100000
    sort = lambda x: -x['population']
    data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
    write_json('../json/geonames.org/cities100000.json', data, False)
    print len(data)

    get_oxjs_languages()

    get_geonames_countries()
    get_geonames_languages()
    get_imdb_countries()
    get_imdb_languages()
    sys.exit()

    #places = read_json('../json/geonames.org/DE.A.json')
    #tree = make_tree(places)
    #write_json('../json/geonames.org/tree.DE.json', tree)

    file = '../txt/geonames.org/countryInfo.txt'
    keys = geo['keys']['country']['geonames.org']
    sort = lambda x: -x['Population']
    data = read_table(file, keys, sort=sort)
    write_json('../json/' + file[7:-4] + '.json', data)
    print len(data)

    file = '../txt/geonames.org/iso-languagecodes.txt'
    keys = geo['keys']['language']['geonames.org']
    sort = lambda x: x['Language_Name']
    data = read_table(file, keys, sort=sort)
    write_json('../json/' + file[7:-4] + '.json', data)
    print len(data)

    """
    languages = read_json('../json/geonames.org/iso-languagecodes.json')
    countries = read_json('../json/geonames.org/countryInfo.json')
    max_languages = max(map(lambda x: len(x['Languages']), countries))
    language_countries = {}
    for language in languages:
        language_name = language['Language_Name']
        language_iso = [language['ISO_639-3'], language['ISO_639-2'], language['ISO_639-1']]
        found = False
        for i in range(max_languages):
            for country in countries:
                if len(country['Languages']) > i:
                    country_language = country['Languages'][i].split('-')[0]
                    if country_language in language_iso:
                        if not language_name in language_countries:
                            language_countries[language_name] = []
                        language_countries[language_name].append({
                            'country': country['Country'],
                            'population': country['Population'],
                            'position': i + 1
                        })
    write_json('../json/geonames.org/languageCountriesAll.json', language_countries)
    for language, countries in language_countries.iteritems():
        language_countries[language] = countries[0]['country']
    language_countries['English'] = 'United Kingdom'
    language_countries['French'] = 'France'
    language_countries['Portuguese'] = 'Portugal'
    language_countries['Spanish'] = 'Spain'
    write_json('../json/geonames.org/languageCountries.json', language_countries)
    """

    file = '../txt/geonames.org/cities15000.txt'
    keys = geo['keys']['place']['geonames.org']
    filter = lambda x: x['population'] >= 100000
    sort = lambda x: -x['population']
    data = read_table(file, keys, filter=filter, sort=sort)
    write_json('../json/geonames.org/cities100000.json', data)
    print len(data)

    file = '../txt/geonames.org/DE.txt'
    keys = geo['keys']['place']['geonames.org']
    filter = lambda x: x['feature_class'] == 'A' or x['feature_code'] in [
        'PPLA', 'PPLA2', 'PPLA2', 'PPLA4', 'PPLC', 'PPLG'
    ]
    sort = lambda x: -x['population']
    data = read_table(file, keys, filter=filter, sort=sort)
    write_json('../json/geonames.org/DE.A,P.json', data)
    print len(data)