oxjs/tools/geo/py/geo.py

1604 lines
59 KiB
Python
Raw Normal View History

2011-05-23 19:38:52 +00:00
# -*- coding: utf-8 -*-
from __future__ import division
import Image
import json
import math
import os
import ox
import re
"""
311 countries
-- sovereign, dependent and disputed countries according to
http://en.wikipedia.org/wiki/List_of_sovereign_states
-- dissolved countries, according to
http://en.wikipedia.org/wiki/ISO_3166-3 and http://www.imdb.com/country/
-- entities with country codes, according to
http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
2011-05-24 06:15:44 +00:00
(like Ascension, Canary Islands, Ceuta and Melilla, Diego Garcia,
European Union, Metropolitan France, Tristan da Cunha, UK...)
2011-05-23 19:38:52 +00:00
writes json/geo.json and json/countries.json,
both will used by html/geo.html
"""
base_path = os.path.dirname(__file__)
if base_path:
os.chdir(base_path)
2011-05-23 19:38:52 +00:00
MIN_LAT = -math.degrees(math.atan(math.sinh(math.pi)))
geo = {
'code': {
# missing on wikipedia
'Neutral Zone': 'NTHH',
'Réunion': 'RE',
# not yet assigned, see http://en.wikipedia.org/wiki/List_of_sovereign_states
'South Sudan': 'SS',
# disputed, see http://en.wikipedia.org/wiki/List_of_sovereign_states
'Abkhazia': 'GE-AB',
'Kosovo': 'RS-KO',
'Nagorno-Karabakh': 'AZ-NK',
'Northern Cyprus': 'CY-NC',
'South Ossetia': 'GE-SO',
'Somaliland': 'SO-SO',
'Transnistria': 'MD-TR',
# dependent, see http://en.wikipedia.org/wiki/List_of_sovereign_states
'Akrotiri and Dhekelia': 'GB-AD',
'Ashmore and Cartier Islands': 'AU-AC',
'Coral Sea Islands': 'AU-CS',
'Peter I Island': 'NO-PI',
# dependent, see http://en.wikipedia.org/wiki/ISO_3166-2:GB
'England': 'GB-ENG',
'Northern Ireland': 'GB-NIR',
'Scotland': 'GB-SCT',
'Wales': 'GB-WLS',
# antarctic, see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
'Adélie Land': 'FR-AQ',
'Antártica': 'CL-AQ',
'Argentine Antarctica': 'AR-AQ',
'Australian Antarctic Territory': 'AU-AQ',
'Ross Dependency': 'NZ-AQ',
# exceptional, see http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
'Canary Islands': 'IC',
'Ceuta and Melilla': 'EA',
'UK': 'UK',
# dissolved, see http://www.imdb.com/country/
'Korea': 'KOHH',
'Netherlands Antilles': 'ANHH',
'Siam': 'SITH',
'West Germany': 'DEDE'
},
'coordinates': {
# see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
'Adélie Land': {
'south': MIN_LAT,
'west': 136.03333333,
'north': -60,
'east': 142.18333333
},
'Antarctica': {
'south': MIN_LAT,
'west': -179.99999999,
'north': -60,
'east': 179.99999999
},
'Antártica': {
'south': MIN_LAT,
'west': -90,
'north': -60,
'east': -53
},
'Argentine Antarctica': {
'south': MIN_LAT,
'west': -74,
'north': -60,
'east': -25
},
'Australian Antarctic Territory': {
'south': MIN_LAT,
'west': 44.63333333,
'north': -60,
'east': 160
},
'British Antarctic Territory': {
'south': MIN_LAT,
'west': -80,
'north': -60,
'east': -20
},
'Queen Maud Land': {
'south': MIN_LAT,
'west': 20,
'north': -60,
'east': 44.63333333
},
'Ross Dependency': {
'south': MIN_LAT,
'west': 160,
'north': -60,
'east': -150
}
},
'dependencies': {
'Argentina': ['Argentine Antarctica'],
'Argentina, Australia, Chile, France, New Zealand, Norway, United Kingdom': ['Antarctica'],
'Australia': [
'Ashmore and Cartier Islands', 'Australian Antarctic Territory', 'Christmas Island', 'Cocos Islands', 'Coral Sea Islands',
'Heard Island and McDonald Islands', 'Norfolk Island'
],
'Chile': ['Antártica'],
'China': ['Hong Kong', 'Macau'],
'Denmark': ['Faroe Islands', 'Greenland'],
'Finland': ['Åland'],
'France': [
'Adélie Land', 'Clipperton Island', 'French Afar and Issas', 'French Guiana', 'French Polynesia',
'French Southern and Antarctic Territories', 'French Southern Territories', 'Guadeloupe', 'Martinique', 'Mayotte',
'New Caledonia', 'Réunion', 'Saint Barthélemy', 'Saint Martin', 'Saint Pierre and Miquelon',
'Wallis and Futuna'
],
'France, United Kingdom': ['New Hebrides'],
'Netherlands': [
'Aruba', 'Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Netherlands Antilles', 'Sint Maarten'
],
'New Zealand': ['Cook Islands', 'Niue', 'Ross Dependency', 'Tokelau'],
'Norway': ['Bouvet Island', 'Peter I Island', 'Queen Maud Land', 'Svalbard and Jan Mayen'],
'Spain': ['Canary Islands', 'Ceuta and Melilla'],
'Soviet Union': ['Byelorussian Soviet Socialist Republic'], # fixme: what about ukraine?
'United Kingdom': [
'Akrotiri and Dhekelia', 'Anguilla', 'Ascension Island', 'Bermuda', 'British Antarctic Territory',
'British Indian Ocean Territory', 'British Virgin Islands', 'Cayman Islands', 'Diego Garcia', 'England',
'Falkland Islands', 'Gibraltar', 'Gilbert and Ellice Islands', 'Guernsey', 'Isle of Man',
'Jersey', 'Montserrat', 'Northern Ireland', 'Pitcairn Islands', 'Saint Helena',
'Scotland', 'South Georgia and the South Sandwich Islands', 'Tristan da Cunha', 'Turks and Caicos Islands', 'Wales'
],
'United Kingdom, United States': ['Canton and Enderbury Islands'],
'United States': [
'American Samoa', 'Guam', 'Northern Mariana Islands', 'Johnston Island', 'Midway Islands',
'Pacific Islands', 'Panama Canal Zone', 'Puerto Rico', 'United States Minor Outlying Islands', 'United States Miscellaneous Pacific Islands',
'United States Virgin Islands', 'Wake Island'
]
},
'disputes': {
'Azerbaijan': ['Nagorno-Karabakh'],
'Cyprus': ['Northern Cyprus'],
'Georgia': ['Abkhazia', 'South Ossetia'],
'Iraq, Saudi Arabia': ['Neutral Zone'],
'Moldova': ['Transnistria'],
'Serbia': ['Kosovo'],
'Somalia': ['Somaliland']
},
'dissolved': {
# merged
'Canton and Enderbury Islands': ['Kiribati'],
'East Germany': ['Germany'],
'Johnston Island': ['United States Minor Outlying Islands'],
'Midway Islands': ['United States Minor Outlying Islands'],
'North Vietnam': ['Vietnam'],
'Panama Canal Zone': ['Panama'],
'Sikkim': ['India'],
'South Yemen': ['Yemen'],
'United States Miscellaneous Pacific Islands': ['United States Minor Outlying Islands'],
'Wake Island': ['United States Minor Outlying Islands'],
'West Germany': ['Germany'],
# renamed
'Burma': ['Myanmar'],
'Byelorussian Soviet Socialist Republic': ['Belarus'],
'Dahomey': ['Benin'],
'East Timor': ['Timor-Leste'],
'French Afar and Issas': ['Djibouti'],
'New Hebrides': ['Vanuatu'],
'Rhodesia': ['Zimbabwe'],
'Siam': ['Thailand'],
'Upper Volta': ['Burkina Faso'],
2011-05-24 06:15:44 +00:00
'Yugoslavia': ['Serbia and Montenegro'], # fixme: shape and flag suggest 'split'
2011-05-23 19:38:52 +00:00
'Zaire': ['Democratic Republic of the Congo'],
# split
'Czechoslovakia': ['Czech Republic', 'Slovakia'],
'French Southern and Antarctic Territories': ['Adélie Land', 'French Southern Territories'],
'Gilbert and Ellice Islands': ['Kiribati', 'Tuvalu'],
'Korea': ['North Korea', 'South Korea'],
'Netherlands Antilles': ['Bonaire, Saint Eustatius and Saba', 'Curaçao', 'Sint Maarten'],
'Neutral Zone': ['Iraq', 'Saudi Arabia'],
'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
'Serbia and Montenegro': ['Serbia', 'Montenegro'],
'Soviet Union': [
'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia',
'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania', 'Moldova',
'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
]
},
'flag': {
'Australian Antarctic Territory': 'Flag_of_Australia.svg',
'Antarctica': 'Flag_of_the_Antarctic_Treaty.svg',
'Antártica': 'Flag_of_Magallanes,_Chile.svg',
'Ashmore and Cartier Islands': 'Flag_of_Australia.svg',
'Bonaire, Saint Eustatius and Saba': 'Flag_of_Bonaire.svg',
'Burma': 'Flag_of_Myanmar_(1974-2010).svg',
'Byelorussian Soviet Socialist Republic': 'Flag_of_Byelorussian_SSR.svg',
'Canton and Enderbury Islands': 'Flag_of_Gilbert_and_Ellice_Islands.svg',
'Ceuta and Melilla': 'Flag_Ceuta.svg',
'Coral Sea Islands': 'Flag_of_Australia.svg',
'Diego Garcia': 'Flag_of_the_British_Indian_Ocean_Territory.svg',
'French Guiana': 'Flag_of_French_Guiana.svg',
'Korea': 'Flag_of_Korea_1882.svg',
2011-09-09 16:40:04 +00:00
# 'Libya': 'Flag_of_the_Libyan_Jamahiriya_1977.svg',
2011-05-23 19:38:52 +00:00
'Metropolitan France': 'Flag_of_France.svg',
'Neutral Zone': 'Flag_of_the_United_Nations.svg',
2011-10-11 23:21:05 +00:00
'New Caledonia': 'Flag_of_New_Caledonia.svg',
2011-05-23 19:38:52 +00:00
'New Hebrides': 'Flag_of_Anglo-French_Joint_Naval_Commission.svg',
'Northern Ireland': 'Ulster_banner.svg',
'Panama Canal Zone': 'Panama_Canal_Zone_Flag.png',
'Peter I Island': 'Flag_of_Norway.svg',
'Réunion': 'Drapeau_Reunion_APDR.png',
'Saint Martin': 'Flag_of_Saint-Martin_(local).svg',
'Siam': 'State_Flag_of_Thailand_(1916).svg',
'Svalbard and Jan Mayen': 'Flag_of_Norway.svg',
'United States Miscellaneous Pacific Islands': 'Flag_of_the_United_States.svg',
'Wallis and Futuna': 'Flag_of_Wallis_and_Futuna.svg',
'Yugoslavia': 'Flag_of_SFR_Yugoslavia.svg'
},
'google_name': {
'Bahamas': 'The Bahamas',
'Brunei': 'Brunei Darussalam',
'Cocos Islands': 'Cocos (Keeling) Islands',
'Gambia': 'The Gambia',
'Kosovo': 'Kosova (Kosovo)',
'Macedonia': 'Former Yugoslav Republic of Macedonia',
'Myanmar': 'Burma',
'Palestine': 'Palestinian Territories',
2011-05-23 19:38:52 +00:00
'Republic of the Congo': 'Congo',
'Sahrawi': 'Western Sahara',
'United Kingdom': 'UK',
'United States': 'USA'
},
'google_query': {
'Akrotiri and Dhekelia': ['Akrotiri, Cyprus', 'Dhekelia, Cyprus'],
'Bonaire, Saint Eustatius and Saba': ['Bonaire', 'Saba, Netherlands Antilles', 'Saint Eustatius'],
'Byelorussian Soviet Socialist Republic': ['Belarus'],
'Canton and Enderbury Islands': ['Canton Island', 'Enderbury Island'],
'Ceuta and Melilla': ['Ceuta', 'Melilla'],
'Clipperton Island': ['Île de la Passion'],
'Curaçao': ['Banda Abou, Curaçao'],
'Czechoslovakia': ['Czech Republic', 'Slovakia'],
'East Germany': [
2011-09-09 16:40:04 +00:00
'Mecklenburg Vorpommern', 'Saxony', 'Thuringia'
2011-05-23 19:38:52 +00:00
],
'Dahomey': ['Benin'],
'European Union': ['Europe'],
'French Afar and Issas': ['Djibouti'],
# see http://en.wikipedia.org/wiki/French_Southern_and_Antarctic_Lands
# and http://en.wikipedia.org/wiki/Scattered_Islands_in_the_Indian_Ocean
'French Southern and Antarctic Territories': ['Adélie Land', 'Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
'French Southern Territories': ['Bassas da India', 'Glorioso Islands', 'Île Kerguelen'],
'Georgia': ['Georgia, Asia'],
# see http://en.wikipedia.org/wiki/Gilbert_Islands
'Gilbert and Ellice Islands': ['Arorae, Kiribati', 'Butaritari, Kiribati', 'Makin, Kiribati', 'Tuvalu'],
'Jamaica': ['Clarendon Parish, Jamaica', 'St. James Parish, Jamaica', 'St. Thomas Parish, Jamaica', 'Westmoreland Parish, Jamaica'], # in case results are us-biased
'Johnston Island': ['Johnston Atoll'],
'Korea': ['North Korea', 'South Korea'],
'Lebanon': ['Lebanon, Asia'], # in case results are us-biased
'Metropolitan France': ['France'],
'Midway Islands': ['Midway Atoll'],
'Neutral Zone': ['Neutral Zone, Saudi Arabia'],
'New Hebrides': ['Vanuatu'],
'North Vietnam': ['Ha Giang, Vietnam', 'Lai Chau, Vietnam', 'Thua Thien-Hue, Vietnam'],
'Northern Cyprus': ['Karpass, Cyprus', 'Kokkina, Cyprus', 'Lympia, Cyprus'],
'Pacific Islands': ['Marshall Islands', 'Micronesia', 'Northern Mariana Islands', 'Palau'],
# 'Palestine': ['71, Israel', 'El-arish Rafah, Egypt'],
2011-05-23 19:38:52 +00:00
'Panama Canal Zone': ['Amador, Panama', 'Fort Sherman, Panama'],
'Sahrawi': ['Western Sahara'],
'Saint Helena': ['Ascension Island', 'Longwood, Saint Helena', 'Tristan da Cunha'], # in case results are us-biased
'San Marino': ['San Marino, Europe'], # in case results are us-biased
'Serbia and Montenegro': ['Montenegro', 'Serbia'],
'Siam': ['Thailand'],
'Sint Maarten': ['Sint Maarten, Netherlands Antilles'],
'South Ossetia': ['Shida Kartli'],
# see http://en.wikipedia.org/wiki/Sudan#States_and_regions
2011-10-12 00:14:05 +00:00
# 'South Sudan': [
# 'Central Equatoria, Sudan', 'Eastern Equatoria, Sudan', 'Jonglei, Sudan', 'Lakes, Sudan', 'Northern Bahr el Ghazal, Sudan',
# 'Upper Nile, Sudan', 'Unity, Sudan', 'Warrap, Sudan', 'Western Bahr el Ghazal, Sudan', 'Western Equatoria, Sudan'
# ],
2011-05-23 19:38:52 +00:00
'South Yemen': ['Al-Mahrah, Yemen', 'Lahij, Yemen'],
'Soviet Union': [
'Armenia', 'Azerbaijan', 'Belarus', 'Estonia', 'Georgia, Europe',
'Latvia', 'Lithuania', 'Kazakhstan', 'Kyrgyzstan', 'Moldova',
'Russia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan'
],
2011-09-09 16:40:04 +00:00
'United Kingdom': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
2011-05-23 19:38:52 +00:00
'United States Minor Outlying Islands': ['Midway Islands'],
# Baker Island, Howland Island, Jarvis Island, Kingman Reef, Palmyra Atoll --
# but the first three each return the full United States Minor Outlying Islands
'United States Miscellaneous Pacific Islands': ['Kingman Reef', 'Palmyra Atoll'],
2011-09-09 16:40:04 +00:00
'UK': ['England', 'Northern Ireland', 'Scotland', 'Wales, United Kingdom'],
2011-05-23 19:38:52 +00:00
'Upper Volta': ['Burkina Faso'],
'Wake Island': ['Wake Atoll'],
2011-09-09 16:40:04 +00:00
'Wales': ['Wales, United Kingdom'],
2011-05-23 19:38:52 +00:00
'West Germany': ['Schleswig Holstein', 'Northrhine Westphalia', 'Bavaria, Germany'],
'Yugoslavia': [
'Bosnia and Herzegovina', 'Croatia', 'Serbia', 'Slovenia', 'Macedonia',
'Montenegro'
]
},
'icon': {
'AC': 'cross(340, resize, 684)',
'AD': 'extend()',
'AE': 'crop(683)',
'AF': 'extend()',
'AG': 'extend(left, left)',
'_AG': 'resize(); copy(580, 90, 1468, 534, 592, 296, 387, 238); copy(580, 0, 1468, 90, 592, 238, 387, 0) # *2/3',
'AI': 'extend(right, right)',
'AIDJ': 'extend()',
'AL': 'crop()',
'AM': 'crop()',
'ANHH': 'crop()',
'AO': 'crop()',
'AQ': 'crop()',
'AR': 'crop()',
'AR-AQ': 'extend(right, left)',
'AS': 'extend(left, left)',
'AT': 'crop()',
'AU': 'extend(right, right)',
'AU-AC': 'extend(right, right)',
'AU-AQ': 'extend(right, right)',
'AU-CS': 'extend(right, right)',
'AW': 'crop(left)',
'AX': 'crop(827)',
'AZ': 'crop()',
'AZ-NK': 'crop(right)',
'BA': 'crop(1054)',
'BB': 'extend()',
'BD': 'crop(922)',
'BE': 'extend()',
'BF': 'crop()',
'BG': 'crop()',
'BH': 'crop(666)',
'_BI': 'crop()',
'BI': 'resize(); copy(677, 267, 1371, 961, 696, 696, 267, 267, circle)',
'BJ': 'crop(819)',
'BL': 'extend()',
'BM': 'extend(right, right)',
'BN': 'crop()',
'BO': 'crop()',
'BQ': 'crop(left)',
'BQAQ': 'extend(right, right)',
'BR': 'extend()',
'BS': 'cut(0, 0, 342, 0); resize()',
'BT': 'extend(left, right)',
'BUMM': 'slice(444)',
'BV': 'crop(left)',
'BW': 'crop()',
'BY': 'crop(left)',
'BYAA': 'crop(left)',
'BZ': 'crop()',
'CA': 'extend()',
'CC': 'extend()',
'CD': 'crop(left)',
'CF': 'crop(); copy(85, 0, 597, 341, 512, 341, 0, 0)',
'CG': 'resize()',
'CH': 'pass()',
'CI': 'extend()',
'CK': 'extend(right, right)',
'CL': 'crop(left)',
'CL-AQ': 'extend()',
'CM': 'extend()',
2011-09-09 21:16:32 +00:00
'CN': 'cut(1024, 0, 0, 341); resize(); copy(145, 73, 885, 679, 740, 606, 142, 209)',
2011-05-23 19:38:52 +00:00
'CO': 'crop()',
'CP': 'extend()',
'CR': 'crop(left)',
'CSHH': 'resize()',
'CSXX': 'crop()',
'CTKI': 'extend(right, right)',
'CU': 'crop(left)',
'CV': 'crop(768)',
'CW': 'crop(left)',
'CX': 'cut(2, 1, 2, 1); extend(right, left)',
'CY': 'crop()',
'CY-NC': 'crop(811)',
'CZ': 'resize()',
'DDDE': 'crop()',
'DE': 'crop()',
'DEDE': 'crop()',
'DG': 'extend(right, right)',
'DJ': 'crop(left)',
'DK': 'crop(left)',
'DM': 'crop()',
'DO': 'crop()',
'DYBJ': 'crop(819)',
'DZ': 'crop()',
'EA': 'crop()',
'EC': 'crop()',
'EE': 'crop()',
'EG': 'crop()',
'EH': 'cut(0, 0, 683, 0); resize(); copy(768, 0, 1280, 1024, 512, 1024, 512, 0)',
'ER': 'extend(right, right)',
'ES': 'crop(left)',
'ET': 'crop()',
'EU': 'crop()',
'FI': 'crop(740)',
'FJ': 'extend(right, right)',
'FK': 'extend(right, right)',
'FM': 'crop()',
'FO': 'crop(left)',
'FQHH': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
'FR': 'extend()',
'FR-AQ': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
'FXFR': 'extend()',
'GA': 'crop()',
'GB': 'cross(340, resize, 684)',
'GB-AD': 'cross(340, resize, 684)',
'GB-ENG': 'crop()',
'GB-NIR': 'crop()',
'GB-SCT': 'resize()',
'GB-WLS': 'extend()',
'GD': 'crop()',
'_GD': 'border(172); copy(172, 172, 1867, 1057, 885, 885, 172, 172)',
'GE': 'cross(272, crop)',
'GE-AB': 'crop(left)',
'GE-SO': 'crop()',
'GEHH': 'extend(right, right)',
'GF': 'crop()',
'GG': 'crop()',
'GH': 'crop()',
'GI': 'crop()',
'GL': 'crop(797)',
'GM': 'crop()',
'GN': 'extend()',
'GP': 'extend()',
'GQ': 'cut(0, 0, 1024, 0); resize(); copy(683, 0, 1365, 1365, 682, 1365, 683, 0)',
'GR': 'crop(left)',
'GS': 'extend(right, right)',
'GT': 'extend()',
'GU': 'border(50)',
'GW': 'slice(341)',
'GY': 'resize()',
'HK': 'crop()',
'HM': 'extend(right, right)',
'HN': 'crop()',
'HR': 'crop()',
'HT': 'crop()',
'HU': 'crop()',
'HVBF': 'crop()',
'IC': 'extend()',
'ID': 'crop()',
'IE': 'extend()',
'IL': 'crop()',
'IM': 'crop()',
'IN': 'crop()',
'IO': 'extend(right, right)',
'IQ': 'crop()',
'IR': 'crop()',
'IS': 'crop(left)',
'IT': 'extend()',
'JE': 'crop()',
'JM': 'resize()',
'JO': 'crop(left)',
'JP': 'crop()',
'JTUM': 'extend()',
'KE': 'crop()',
'KG': 'crop()',
'KH': 'crop()',
'KI': 'crop()',
'KM': 'crop(left)',
'KN': 'crop()',
'KOHH': 'extend()',
'KP': 'crop(683)',
'KR': 'extend()',
'KW': 'cut(0, 0, 512, 0); resize()',
'KY': 'extend(right, right)',
'KZ': 'crop(left); copy(696, 0, 1526, 1024, 840, 1024, 184, 0)',
'LA': 'crop()',
'LB': 'crop()',
'LC': 'crop()',
'LI': 'slice(455)',
'LK': 'cut(649, 0, 0, 0); border(85)',
'LR': 'crop(left)',
'LS': 'crop()',
'LT': 'crop()',
'LU': 'crop()',
'LV': 'crop()',
'LY': 'crop()',
'MA': 'crop()',
'MC': 'crop()',
'MD': 'extend()',
'MD-TR': 'crop(left)',
'ME': 'border(51)',
'MF': 'crop()',
'MG': 'crop(left)',
'MH': 'extend()',
'MIUM': 'extend()',
'MK': 'cut(308, 0, 308, 0); resize(); copy(842, 330, 1206, 694, 364, 364, 330, 330, circle)',
'ML': 'extend()',
'MM': 'crop()',
'MN': 'extend()',
'MO': 'crop()',
'MP': 'crop()',
'MQ': 'cross(198, crop)',
'MR': 'crop()',
'MS': 'extend(right, right)',
'MT': 'border(480)',
'MU': 'crop()',
'MV': 'border(341)',
'MW': 'crop()',
'MX': 'extend()',
'_MY': 'crop(left)',
'MY': 'crop(right); copy(0, 0, 1, 1, 585, 585, 0, 0); copy(0, 0, 1024, 585, 585, 334, 0, 126)',
'MZ': 'crop(left)',
'NA': 'crop(left)',
'NC': 'crop(681)',
'NE': 'crop()',
'NF': 'extend()',
'NG': 'extend()',
'NHVU': 'crop()',
'NI': 'crop()',
'NL': 'crop()',
'NO': 'crop(left)',
'NO-PI': 'crop(left)',
'NP': 'construct()',
'NQAQ': 'crop(left)',
'NR': 'crop(left)',
'NTHH': 'crop()',
'NU': 'extend(right, right)',
'NZ': 'extend(right, right)',
'NZ-AQ': 'extend(right, right)',
'OM': 'crop(left)',
'PA': 'slice()',
'PCHH': 'crop()',
'PE': 'extend()',
'PF': 'crop()',
'PG': 'extend(right, left)',
'PH': 'crop(left)',
'PK': 'extend()',
'PL': 'crop()',
'PM': 'cut(558, 0, 0, 0); extend()',
'PN': 'extend(right, right)',
'PR': 'crop(left)',
'PS': 'cut(0, 0, 683, 0); resize()',
'PT': 'crop(819)',
'PUUM': 'crop(778)',
'PW': 'crop(896)',
'PY': 'crop()',
'PZPA': 'crop()',
'QA': 'crop(659)',
'RE': 'cut(464, 0, 0, 0); resize()',
'RHZW': 'cut(0, 3, 0, 2); extend()',
'RO': 'extend()',
'RS': 'crop(732)',
'RS-KO': 'crop()',
'RU': 'crop()',
'RW': 'crop(right)',
'SA': 'crop()',
2011-09-09 21:16:32 +00:00
'SB': 'extend(left, right); copy(103, 972, 206, 1024, 103, 52, 0, 1536); copy(1842, 0, 1945, 52, 103, 52, 1945, 460)',
2011-05-23 19:38:52 +00:00
'SC': 'resize()',
'SD': 'cut(0, 0, 683, 0); resize()',
'SE': 'crop(768)',
'SG': 'crop(787)',
'SH': 'extend(right, right)',
'SI': 'crop(left)',
'SITH': 'crop()',
'SJ': 'crop(left)',
'SK': 'crop(679)',
'SKIN': 'border(165)',
'SL': 'crop()',
'SM': 'crop()',
'SN': 'extend()',
'SO': 'crop()',
'SO-SO': 'crop()',
'SR': 'crop()',
'SS': 'crop(left)',
'ST': 'resize(); copy(832, 320, 1216, 704, 384, 384, 272, 320); copy(1344, 320, 1728, 704, 384, 384, 624, 320)',
2011-09-09 21:16:32 +00:00
'SUHH': 'crop(right); copy(85, 0, 597, 512, 1024, 1024, 0, 0)',
2011-05-23 19:38:52 +00:00
'SV': 'crop()',
'SX': 'crop(left)',
'SY': 'slice(614)',
'SZ': 'extend()',
'TA': 'extend(right, right)',
'TC': 'extend(right, right)',
'TD': 'extend()',
'TF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
'TG': 'crop(left)',
'TH': 'crop()',
'TJ': 'crop()',
'TK': 'extend()',
'TL': 'crop(left)',
'TM': 'crop(left)',
'TN': 'crop()',
'TO': 'slice(427)',
'TPTL': 'crop(left)',
'TR': 'crop(844)',
'TT': 'resize()',
'TV': 'extend(right, right)',
'TW': 'slice()',
'TZ': 'resize()',
'UA': 'crop()',
'UG': 'crop()',
'UK': 'cross(340, resize, 684)',
'UM': 'crop(778)',
'US': 'crop(778)',
'UY': 'crop(left)',
'UZ': 'crop(left)',
'VA': 'pass()',
'VC': 'extend()',
'VDVN': 'crop()',
'VE': 'crop(); copy(0, 0, 455, 455, 455, 455, 0, 0)',
'VG': 'extend(right, right)',
'VI': 'extend()',
'VN': 'crop()',
'VU': 'crop(left)',
'WF': 'extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328)',
'WKUM': 'crop(left)',
'WS': 'slice()',
'YDYE': 'crop(left)',
'YE': 'crop()',
'YT': 'extend()',
'YUCS': 'crop()',
'ZA': 'resize()',
'ZM': 'cut(598, 0, 0, 0); extend()',
'ZRCD': 'crop()',
'ZW': 'crop(left)'
},
'imdb_code': {
'Côte d\'Ivoire': 'ci',
'Democratic Republic of the Congo': 'cd',
'Palestine': 'ps',
'Serbia and Montenegro': 'xfy'
},
'imdb_name': {
'Côte d\'Ivoire': 'Ivory Coast', # bug in some imdb entries
'Democratic Republic of the Congo': 'Democratic Republic of Congo', # bug in some imdb entries
'Palestine': 'Occupied Palestinian Territory', # bug in some imdb entries
'Serbia and Montenegro': 'Federal Republic of Yugoslavia'
},
2011-09-09 16:40:04 +00:00
# import json
# import re
# from ox.net import readUrl
# html = readUrl('http://www.imdb.com/language/')
# results = re.compile(re.compile('<a href="/language/.*?">(.*?)</a>').findall(html))
# languages = {}
# for result in results:
# languages[result] = ''
# print json.dumps(languages, sort_keys=True)
# http://www.imdb.com/language/
'languages': {
'Abkhazian': 'Abkhazia',
'Aboriginal': 'Australia',
'Aché': '',
'Acholi': '',
'Afrikaans': 'South Africa',
'Aidoukrou': '',
'Akan': '',
'Albanian': 'Albania',
'Algonquin': '',
'American': 'United States',
'Amharic': 'Ethiopia',
2011-09-09 16:40:04 +00:00
'Apache': '',
'Arabic': 'Saudi Arabia',
'Aragonese': '',
'Aramaic': 'Syria',
'Arapaho': '',
'Armenian': 'Armenia',
'Assamese': 'India',
'Assyrian Neo-Aramaic': '',
'Athapascan': '',
'Australian': 'Australia',
'Awadhi': '',
'Aymara': 'Bolivia',
'Azerbaijani': 'Azerbaijan',
'Bable': '',
'Baka': '',
'Balinese': 'Indonesia',
'Bambara': 'Mali',
'Basque': 'Spain',
2011-09-09 16:40:04 +00:00
'Bassari': '',
'Belarusian': 'Belarus',
'Bemba': '',
'Bengali': 'Bangladesh',
2011-09-09 21:16:32 +00:00
'Berber': 'Morocco', # ?, one of multiple
2011-09-09 16:40:04 +00:00
'Bhojpuri': '',
'Bicolano': 'Philippines',
'Bodo': '',
'Bosnian': 'Bosnia and Herzegovina',
'Brazilian': 'Brazil',
'Breton': 'France',
'British': 'United Kingdom',
'Bulgarian': 'Bulgaria',
'Burmese': 'Burma',
'Cantonese': 'China',
'Catalan': 'Spain',
'Central Khmer': 'Cambodia',
2011-09-09 21:16:32 +00:00
'Chaozhou': 'China',
2011-09-09 16:40:04 +00:00
'Chechen': 'Russia',
2011-09-09 21:16:32 +00:00
'Cherokee': 'United States',
'Cheyenne': 'United States',
'Chhattisgarhi': 'India',
2011-09-09 16:40:04 +00:00
'Chinese': 'China',
2011-09-09 21:16:32 +00:00
'Cornish': 'United Kingdom',
2011-09-09 16:40:04 +00:00
'Corsican': 'France',
'Cree': 'Canada',
2011-09-09 21:16:32 +00:00
'Creek': 'United States',
2011-09-09 16:40:04 +00:00
'Creole': '',
'Creoles and pidgins': '',
'Croatian': 'Croatia',
'Crow': '',
'Czech': 'Czech Republic',
'Danish': 'Denmark',
'Dari': 'Afghanistan',
'Desiya': '',
'Dinka': '',
'Djerma': 'Niger',
2011-09-09 16:40:04 +00:00
'Dogri': '',
'Dutch': 'Netherlands',
'Dyula': '',
'Dzongkha': '',
'East-Greenlandic': 'Greenland',
'Eastern Frisian': 'Germany',
'Egyptian (Ancient)': 'Egypt',
'English': 'United Kingdom',
'Esperanto': '',
'Estonian': 'Estonia',
'Ewe': '',
'Faliasch': '',
'Faroese': 'Faroe Islands',
'Filipino': 'Philippines',
'Finnish': 'Finland',
'Flemish': 'Belgium',
'Fon': '',
'French': 'France',
'Fulah': '',
'Fur': '',
'Gaelic': 'Ireland',
'Galician': 'Spain',
2011-09-09 16:40:04 +00:00
'Georgian': 'Georgia',
'German': 'Germany',
'Grebo': '',
'Greek': 'Greece',
'Greek, Ancient (to 1453)': 'Greece',
'Greenlandic': 'Greenland',
'Guarani': 'Paraguay',
'Gujarati': 'India',
'Gumatj': '',
2011-09-09 21:16:32 +00:00
'Gunwinggu': 'Australia',
2011-09-09 16:40:04 +00:00
'Haitian': 'Haiti',
'Hakka': 'China',
'Haryanvi': '',
2011-09-09 21:16:32 +00:00
'Hassanya': 'Mauritania',
'Hausa': 'Nigeria',
2011-09-09 16:40:04 +00:00
'Hawaiian': 'United States',
'Hebrew': 'Israel',
'Hindi': 'India',
'Hmong': '',
2011-09-09 21:16:32 +00:00
'Hokkien': 'China',
2011-09-09 16:40:04 +00:00
'Hopi': 'United States',
'Hungarian': 'Hungary',
'Iban': '',
'Ibo': 'Nigeria',
'Icelandic': 'Iceland',
'Indian': 'India',
'Indonesian': 'Indonesia',
'Inuktitut': '',
'Inupiaq': '',
'Irish Gaelic': 'Ireland',
'Italian': 'Italy',
'Japanese': 'Japan',
'Jola-Fonyi': '',
'Ju\'hoan': '',
'Kaado': '',
'Kabuverdianu': 'Cape Verde',
'Kabyle': '',
'Kalmyk-Oirat': '',
'Kannada': 'India',
'Karajá': '',
'Karbi': '',
'Karen': '',
'Kazakh': 'Kazakhstan',
'Khanty': 'Russia',
'Khasi': '',
'Kikuyu': '',
'Kinyarwanda': '',
'Kirundi': '',
'Klingon': '',
'Kodava': '',
'Konkani': 'India',
'Korean': 'South Korea',
'Korowai': 'Papua New Guinea',
2011-09-09 21:16:32 +00:00
'Kriolu': 'Cape Verde',
2011-09-09 16:40:04 +00:00
'Kru': '',
'Kudmali': '',
'Kuna': '',
2011-09-09 21:16:32 +00:00
'Kurdish': 'Turkey', # ?, one of multiple
2011-09-09 16:40:04 +00:00
'Kwakiutl': '',
'Kyrgyz': 'Kyrgyzstan',
2011-09-09 21:16:32 +00:00
'Ladakhi': 'India',
2011-09-09 16:40:04 +00:00
'Ladino': '',
'Lao': 'Laos',
'Latin': 'Italy',
'Latvian': 'Latvia',
'Limbu': '',
2011-09-09 21:16:32 +00:00
'Lingala': 'Democratic Republic of the Congo',
2011-09-09 16:40:04 +00:00
'Lithuanian': 'Lithuania',
'Low German': 'Germany',
'Luxembourgish': 'Luxemburg',
'Macedonian': 'Macedonia',
'Macro-Jê': '',
'Magahi': '',
'Maithili': '',
'Malagasy': 'Madagascar',
'Malay': 'Malaysia',
'Malayalam': 'India',
2011-09-09 16:40:04 +00:00
'Malecite-Passamaquoddy': '',
2011-09-09 21:16:32 +00:00
'Malinka': 'Guinea',
2011-09-09 16:40:04 +00:00
'Maltese': 'Malta',
'Manchu': '',
'Mandarin': 'China',
'Mandingo': '',
'Manipuri': '',
'Maori': 'New Zealand',
'Mapudungun': 'Chile',
'Marathi': 'India',
'Marshallese': 'Marshall Islands',
'Masai': '',
'Masalit': '',
'Maya': 'Mexico',
'Mende': 'Sierra Leone',
'Micmac': '',
'Middle English': 'England',
'Min Nan': '',
'Minangkabau': '',
'Mirandese': '',
'Mizo': '',
'Mohawk': '',
'Mongolian': 'Mongolia',
'Montagnais': '',
'More': 'Burkina Faso',
'Morisyen': '',
2011-09-09 21:16:32 +00:00
'Nagpuri': 'India',
2011-09-09 16:40:04 +00:00
'Nahuatl': '',
'Nama': '',
2011-09-09 21:16:32 +00:00
'Navajo': 'United States',
2011-09-09 16:40:04 +00:00
'Naxi': 'China',
'Ndebele': '',
'Neapolitan': 'Italy',
'Nenets': '',
'Nepali': 'Nepal',
2011-09-09 21:16:32 +00:00
'Nisga\'a': 'Canada',
2011-09-09 16:40:04 +00:00
'None': '',
'Norse, Old': '',
'North American Indian': '',
'Norwegian': 'Norway',
'Nushi': '',
'Nyaneka': '',
'Nyanja': 'Malawi',
'Occitan': '',
'Ojibwa': '',
'Ojihimba': '',
'Old English': 'England',
'Oriya': '',
'Papiamento': '',
'Parsee': 'Iran',
'Pashtu': 'Afghanistan',
'Pawnee': '',
'Persian': 'Iran',
'Peul': '',
'Polish': 'Poland',
'Polynesian': '',
'Portuguese': 'Portugal',
'Pular': '',
'Punjabi': 'India',
'Purepecha': '',
'Quechua': 'Peru',
'Quenya': '',
'Rajasthani': 'India',
'Rawan': '',
'Romanian': 'Romania',
'Romansh': 'Switzerland',
'Romany': 'Romania',
'Rotuman': '',
'Russian': 'Russia',
'Ryukyuan': 'Japan',
'Saami': 'Finland',
'Samoan': 'Samoa',
'Sanskrit': '',
'Sardinian': 'Italy',
'Scanian': '',
'Serbian': 'Serbia',
'Serbo-Croatian': 'Yugoslavia',
'Serer': '',
'Shanghainese': 'China',
2011-09-09 21:16:32 +00:00
'Shanxi': 'China',
2011-09-09 16:40:04 +00:00
'Shona': 'Zimbabwe',
2011-09-09 21:16:32 +00:00
'Shoshoni': 'United States',
2011-09-09 16:40:04 +00:00
'Sicilian': 'Italy',
'Sindarin': '',
2011-09-09 21:16:32 +00:00
'Sindhi': 'Pakistan',
2011-09-09 16:40:04 +00:00
'Sinhala': 'Sri Lanka',
2011-09-09 21:16:32 +00:00
'Sioux': 'United States',
2011-09-09 16:40:04 +00:00
'Slovak': 'Slovakia',
'Slovenian': 'Slovenia',
'Somali': 'Somalia',
'Songhay': '',
'Soninke': '',
'Sorbian': 'Germany',
2011-09-09 21:16:32 +00:00
'Sotho': 'Lesotho',
2011-09-09 16:40:04 +00:00
'Sousson': '',
'Spanish': 'Spain',
'Sranan': '',
2011-09-09 21:16:32 +00:00
'Swahili': 'Kenya', # ?, one of multiple
2011-09-09 16:40:04 +00:00
'Swedish': 'Sweden',
'Swiss German': 'Switzerland',
'Sylheti': '',
'Tagalog': 'Philippines',
'Tajik': 'Tajikistan',
2011-09-09 21:16:32 +00:00
'Tamashek': 'Algeria', # ?, one of multiple
2011-09-09 16:40:04 +00:00
'Tamil': 'Sri Lanka',
'Tarahumara': '',
'Tatar': 'Russia',
'Telugu': 'India',
'Teochew': '',
'Thai': 'Thailand',
'Tibetan': 'China',
'Tigrigna': '',
'Tlingit': '',
'Tok Pisin': '',
'Tonga (Tonga Islands)': 'Tonga',
'Tsonga': '',
'Tswa': '',
'Tswana': '',
'Tulu': '',
'Tupi': '',
'Turkish': 'Turkey',
'Turkmen': 'Turkmenistan',
'Tuvinian': '',
'Tzotzil': '',
'Ukrainian': 'Ukraine',
'Ungwatsi': '',
'Urdu': 'Pakistan',
'Uzbek': 'Uzbekistan',
'Vietnamese': 'Vietnam',
'Visayan': '',
'Washoe': '',
'Welsh': 'Wales',
2011-09-09 21:16:32 +00:00
'Wolof': 'Senegal', # ?, one of multiple
2011-09-09 16:40:04 +00:00
'Xhosa': 'South Africa',
'Yakut': '',
'Yapese': '',
'Yiddish': 'Israel',
2011-09-09 21:16:32 +00:00
'Yoruba': 'Nigeria',
2011-09-09 16:40:04 +00:00
'Zulu': 'South Africa'
},
2011-05-23 19:38:52 +00:00
'other': ['European Union', 'Metropolitan France', 'UK'],
'wikipedia_name': {
# ambiguous on wikipedia
'Cocos (Keeling) Islands': 'Cocos Islands',
'Collectivity of Saint Martin': 'Saint Martin',
'Federated States of Micronesia': 'Micronesia',
'French Territory of the Afars and the Issas': 'French Afar and Issas',
'Georgia (country)': 'Georgia',
'Nagorno-Karabakh Republic': 'Nagorno-Karabakh',
'People\'s Republic of China': 'China',
'Republic of China': 'Taiwan',
'Republic of Dahomey': 'Dahomey',
'Republic of Ireland': 'Ireland',
'Republic of Kosovo': 'Kosovo',
'Republic of Macedonia': 'Macedonia',
'Republic of Upper Volta': 'Upper Volta',
'Sahrawi Arab Democratic Republic': 'Sahrawi',
'Saudi-Iraqi neutral zone': 'Neutral Zone',
'State of Palestine': 'Palestine',
'Trust Territory of the Pacific Islands': 'Pacific Islands'
},
'wikipedia_url': {
# dependencies of guernsey
'Alderney': '',
'Herm': '',
'Sark': '',
# territory of pakistan
'Azad_Kashmir': '',
'Gilgit-Baltistan': '',
# wrong in http://en.wikipedia.org/wiki/List_of_sovereign_states
'Coral_Sea_Islands_Territory': 'Coral_Sea_Islands',
'Kingdom_of_the_Netherlands': 'Netherlands',
'Saint-Barth%C3%A9lemy': 'Saint_Barth%C3%A9lemy',
'Saint_Martin': 'Collectivity_of_Saint_Martin',
# wrong in http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
'Caribbean_Netherlands': 'Bonaire,_Saint_Eustatius_and_Saba',
'Ceuta': 'Ceuta_and_Melilla',
'Palestinian_territories': 'State_of_Palestine',
'Saudi%E2%80%93Iraqi_neutral_zone': 'Saudi-Iraqi_neutral_zone',
'Western_Sahara': 'Sahrawi_Arab_Democratic_Republic',
# wrong in http://en.wikipedia.org/wiki/ISO_3166-3
'Johnston_Atoll': 'Johnston_Island',
'Midway_Atoll': 'Midway_Islands',
# wrong in all
'%C3%85land_Islands': 'Åland',
'East Timor': 'Timor-Leste',
'Cocos_(Keeling)_Islands': 'Cocos_Islands',
'French_Southern_and_Antarctic_Lands': 'French_Southern_and_Antarctic_Territories',
'Saint_Helena,_Ascension_and_Tristan_da_Cunha': 'Saint_Helena',
'The_Bahamas': 'Bahamas',
'The_Gambia': 'Gambia'
},
'wikipedia_urls': [
# not in any list
# antarctic
'Adélie_Land', 'Antártica', 'Argentine_Antarctica',
# dependent
'England', 'Northern_Ireland', 'Scotland', 'Wales',
# former
'Korea', 'Siam', 'West_Germany',
# other
'East Timor', 'French_Southern_Territories', 'Peter_I_Island', 'South_Sudan', 'UK'
]
}
def get_cities():
cities = wikipedia.get_cities()
return cities
def get_countries():
geonames_countries = geonames.get_countries()
write_json('../json/geonames.org/countries.json', geonames_countries, True)
imdb_countries = imdb.get_countries()
write_json('../json/imdb.com/countries.json', imdb_countries, True)
wikipedia_countries = wikipedia.get_countries()
write_json('../json/wikipedia.org/countries.json', wikipedia_countries, True)
countries = wikipedia_countries
logs = []
for country in countries:
# dependencies
country['dependencies'] = []
country['dependency'] = []
for c, d in geo['dependencies'].iteritems():
c = c.split(', ')
if country['name'] in c:
country['dependencies'] = d
break
elif country['name'] in d:
country['dependency'] = c
break
# disputes
country['disputes'] = []
country['disputed'] = []
for c, d in geo['disputes'].iteritems():
c = c.split(', ')
if country['name'] in c:
country['disputes'] = d
break
elif country['name'] in d:
country['disputed'] = c
break
# dissolved
country['dissolved'] = geo['dissolved'][country['name']] if country['name'] in geo['dissolved'] else []
# google name
if country['name'] in geo['google_name']:
country['googleName'] = geo['google_name'][country['name']]
else:
country['googleName'] = country['name']
2011-05-23 19:38:52 +00:00
# other
country['other'] = country['name'] in geo['other']
# imdb
if country['name'] in geo['imdb_name']:
country['imdbCode'] = geo['imdb_code'][country['name']]
country['imdbName'] = geo['imdb_name'][country['name']]
else:
for imdb_country in imdb_countries:
if imdb_country['code'].upper() == country['code'] or imdb_country['name'] == country['name']:
country['imdbCode'] = imdb_country['code']
country['imdbName'] = imdb_country['name']
break
2011-05-23 19:38:52 +00:00
get_country_flag(country['code'], country['flagURL'])
get_country_icon(country['code'])
2011-09-09 16:40:04 +00:00
# languages
country['languages'] = []
for language, language_country in geo['languages'].iteritems():
if language_country == country['name']:
country['languages'].append(language)
2011-05-23 19:38:52 +00:00
write_json('../json/countries.json', countries)
return countries
def get_country_flag(code, url):
# max width on wikipedia
width = 2048
2011-10-12 00:14:05 +00:00
img = read_url(url, cache=False)
png_file = '../png/flags/' + str(width) + '/' + code + '.png'
2011-05-23 19:38:52 +00:00
if url[-4:] == '.svg':
svg_file = '../svg/flags/' + code + '.svg'
update_image = not os.path.exists(svg_file) or read_file(svg_file) != img
if update_image:
write_file('../svg/flags/' + code + '.svg', img)
if not os.path.exists(png_file) or update_image:
2011-05-23 19:38:52 +00:00
png = read_url(url.replace('/commons/', '/commons/thumb/') + '/' + str(width) + 'px-.png')
write_file(png_file, png)
png = Image.open(png_file)
2011-05-23 19:38:52 +00:00
else:
update_image = not os.path.exists(png_file) or read_file(png_file) != img
if update_image:
write_file(png_file, img)
png = Image.open(png_file)
png = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
write_image(png_file, png)
else:
png = Image.open(png_file)
2011-05-23 19:38:52 +00:00
for width in [256, 32]:
file = '../png/flags/' + str(width) + '/' + code + '.png'
if not os.path.exists(file) or update_image:
2011-05-23 19:38:52 +00:00
png_ = png.resize((width, int(round(width / png.size[0] * png.size[1]))), Image.ANTIALIAS)
write_image(file, png_)
def get_country_icon(code):
def _border(flag, args):
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
left = int((flag.size[0] - flag.size[1]) / 2 + args[0])
right = left + flag.size[1] - args[0] * 2
crop_left = flag.crop((0, 0, args[0], flag.size[1]))
crop_center = flag.crop((left, 0, right, flag.size[1]))
crop_right = flag.crop((flag.size[0] - args[0], 0, flag.size[0], flag.size[1]))
icon.paste(crop_left, (0, 0))
icon.paste(crop_center, (args[0], 0))
icon.paste(crop_right, (flag.size[1] - args[0], 0))
return icon
def _construct(flag):
# nepal
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
# blue
crop = flag.crop((0, flag.size[1] - 85, 85, flag.size[1]))
crop = crop.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS)
icon.paste(crop, (0, 0))
# red
crop = flag.crop((86, flag.size[1] - 171, 171, flag.size[1] - 86))
crop = crop.resize((flag.size[1] - 170, flag.size[1] - 170), Image.ANTIALIAS)
icon.paste(crop, (85, 85))
# moon
crop = flag.crop((147, 753, 147 + 732, 1151))
icon.paste(crop, (int((flag.size[1] - 732) / 2), 753))
# sun
crop = flag.crop((147, 1449, 147 + 732, 1449 + 732))
mask_ = Image.open('../png/circle.png')
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
mask.paste(mask_, (0, 0))
mask = mask.resize((732, 732), Image.ANTIALIAS)
icon.paste(crop, (int((flag.size[1] - 732) / 2), 1449), mask)
return icon
def _copy(flag, args):
icon = flag
flag = Image.open('../png/flags/2048/' + code + '.png')
if len(args) == 8:
args.append('rect')
crop = flag.crop((args[0], args[1], args[2], args[3]))
crop = crop.resize((args[4], args[5]), Image.ANTIALIAS)
if args[8] == 'rect':
icon.paste(crop, (args[6], args[7]))
elif args[8] == 'circle':
mask_ = Image.open('../png/circle.png')
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
mask.paste(mask_, (0, 0))
mask = mask.resize((args[4], args[5]), Image.ANTIALIAS)
icon.paste(crop, (args[6], args[7]), mask)
return icon
def _crop(flag, args):
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
if len(args) == 0:
left = int((flag.size[0] - flag.size[1]) / 2)
elif args[0] == 'left':
left = 0
elif args[0] == 'right':
left = flag.size[0] - flag.size[1]
else:
left = int(args[0]) - int(flag.size[1] / 2)
icon.paste(flag.crop((left, 0, left + flag.size[1], flag.size[1])), (0, 0))
return icon
def _cross(flag, args):
icon = _crop(flag, [])
if args[1] == 'crop':
left = int((flag.size[0] - args[0]) / 4 - (flag.size[1] - args[0]) / 4)
right = left + int((flag.size[1] - args[0]) / 2)
crop = flag.crop((left, 0, right, flag.size[1]))
icon.paste(crop, (0, 0))
crop = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
icon.paste(crop, (int((flag.size[1] + args[0]) / 2), 0))
elif args[1] == 'resize':
width = args[2]
height = int((flag.size[1] - args[0]) / 2)
offset = int((flag.size[1] + args[0]) / 2)
crop = flag.crop((0, 0, width, height)).resize((height, height), Image.ANTIALIAS)
icon.paste(crop, (0, 0))
crop = flag.crop((flag.size[0] - width, 0, flag.size[0], height)).resize((height, height), Image.ANTIALIAS)
icon.paste(crop, (offset, 0))
crop = flag.crop((0, offset, width, flag.size[1])).resize((height, height), Image.ANTIALIAS)
icon.paste(crop, (0, offset))
crop = flag.crop((flag.size[0] - width, offset, flag.size[0], flag.size[1])).resize((height, height), Image.ANTIALIAS)
icon.paste(crop, (offset, offset))
return icon
def _cut(flag, args):
return flag.crop((args[0], args[1], flag.size[0] - args[2], flag.size[1] - args[3]))
def _extend(flag, args):
icon = Image.new('RGBA', (flag.size[0], flag.size[0]))
top = int((flag.size[0] - flag.size[1]) / 2)
bottom = int((flag.size[0] - flag.size[1]) / 2 + 0.5)
icon.paste(flag, (0, top))
if len(args) == 0:
crop_top = flag.crop((0, 1, flag.size[0], 2))
crop_bottom = flag.crop((0, flag.size[1] - 2, flag.size[0], flag.size[1] - 1))
else:
if args[0] == 'left':
pixel = flag.crop((2, 1, 3, 2))
else:
pixel = flag.crop((flag.size[0] - 3, 1, flag.size[0] - 2, 2))
crop_top = Image.new('RGBA', (flag.size[0], 1))
for x in range(flag.size[0]):
crop_top.paste(pixel, (x, 0))
if args[1] == 'left':
pixel = flag.crop((1, flag.size[1] - 3, 2, flag.size[1] - 2))
else:
pixel = flag.crop((flag.size[0] - 3, flag.size[1] - 2, flag.size[0] - 2, flag.size[1] - 1))
crop_bottom = Image.new('RGBA', (flag.size[0], 1))
for x in range(flag.size[0]):
crop_bottom.paste(pixel, (x, 0))
for y in range(top):
icon.paste(crop_top, (0, y))
for y in range(bottom):
icon.paste(crop_bottom, (0, top + flag.size[1] + y))
return icon
def _pass(flag):
return flag
def _resize(flag, args):
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
icon.paste(flag.resize((flag.size[1], flag.size[1]), Image.ANTIALIAS))
return icon
def _slice(flag, args):
icon = Image.new('RGBA', (flag.size[1], flag.size[1]))
if len(args) == 0:
args = [int(flag.size[0] / 4)]
width = int(flag.size[1] / 2)
left = int(args[0] - width / 2)
right = int(args[0] + width / 2)
crop_left = flag.crop((left, 0, right, flag.size[1]))
crop_right = flag.crop((flag.size[0] - right, 0, flag.size[0] - left, flag.size[1]))
icon.paste(crop_left, (0, 0))
icon.paste(crop_right, (width, 0))
return icon
file = '../png/icons/1024/' + code + '.png'
2011-09-09 16:40:04 +00:00
if not os.path.exists(file) or True:
2011-05-23 19:38:52 +00:00
flag = Image.open('../png/flags/2048/' + code + '.png')
icon = None
functions = geo['icon'][code].split(' # ')[0].split('; ')
if functions[0]: # remove later
for function in functions:
pos = function.find('(')
args = function[pos + 1:-1].split(', ')
if args[0]:
args = map(lambda x: int(x) if x[0] in '-0123456789' else x, args)
else:
args = []
function = function[:pos]
if function == 'border':
icon = _border(flag, args)
elif function == 'construct':
icon = _construct(flag)
elif function == 'copy':
icon = _copy(flag, args)
elif function == 'crop':
icon = _crop(flag, args)
elif function == 'cross':
icon = _cross(flag, args)
elif function == 'cut':
icon = _cut(flag, args)
elif function == 'extend':
icon = _extend(flag, args)
elif function == 'pass':
icon = _pass(flag)
elif function == 'resize':
icon = _resize(flag, args)
elif function == 'slice':
icon = _slice(flag, args)
flag = icon
write_image(file, icon.resize((1024, 1024), Image.ANTIALIAS))
else:
icon = Image.open(file)
for width in [256, 16]:
file = '../png/icons/' + str(width) + '/' + code + '.png'
2011-09-09 16:40:04 +00:00
if not os.path.exists(file) or True:
2011-05-23 19:38:52 +00:00
write_image(file, icon.resize((width, width), Image.ANTIALIAS))
size = 256
mask_ = Image.open('../png/circle.png')
mask = Image.new('L', (mask_.size[0], mask_.size[1]))
mask.paste(mask_, (0, 0))
for size in [256, 32]:
marker = Image.new('RGBA', (size, size))
icon_ = icon.resize((size, size), Image.ANTIALIAS)
mask_ = mask.resize((size, size), Image.ANTIALIAS)
marker.paste(icon_, (0, 0), mask_)
2011-09-09 16:40:04 +00:00
file = '../png/markers/' + str(size) + '/' + code + '.png'
if not os.path.exists(file) or True:
write_image(file, marker)
2011-05-23 19:38:52 +00:00
def get_oxjs_languages():
# geonames_countries = get_geonames_countries()
geonames_languages = get_geonames_languages()
imdb_languages = get_imdb_languages()
languages = []
for geonames_language in geonames_languages:
language = {
'geonames.org': geonames_language
}
for imdb_language in imdb_languages:
if imdb_language['code'] in [
language['geonames.org']['ISO_639-1'],
language['geonames.org']['ISO_639-2'],
language['geonames.org']['ISO_639-3']
]:
language['imdb.com'] = imdb_language
languages.append(language)
break
write_json('../json/oxjs.org/languages.json', languages, True)
def make_tree(places):
def get_node(find, node=None):
if node and find(node):
return node
nodes = tree if not node else node['nodes']
for node in nodes:
found = get_node(find, node)
if found:
return found
def get_node_id(place):
ids = map(lambda x: place[x['key']], geo['levels'])
ids = filter(lambda x: x != '' and x != '00', ids)
return '-'.join(ids)
def get_parent_id(node_id):
return '-'.join(node_id.split('-')[:-1])
tree = []
for l, level in enumerate(geo['levels']):
for place in places:
if place['feature_code'] == level['featureCode']:
node = {
'geonameid': place['geonameid'],
'name': place['name'],
'node_id': get_node_id(place),
'nodes': [],
}
print node['name']
print node['node_id']
if l == 0:
parent = tree
else:
parent_id = get_parent_id(node['node_id'])
parent = get_node(lambda x: x['node_id'] == parent_id)['nodes']
parent.append(node)
parent = sorted(parent, key=lambda x: x['node_id'])
write_json('../json/tree.json', tree)
print
return tree
def read_file(file):
print 'reading', file
f = open(file)
data = f.read()
f.close()
return data
def read_json(file):
data = read_url(file) if file.startswith('http://') else read_file(file)
return json.loads(data)
def read_table(file, keys, drop=[], filter=lambda x: True, sort=lambda x: x):
def parse_value(str, t):
if type(t) == float:
str = float(str) if str else t
elif type(t) == int:
str = int(str) if str else t
return str
data = []
if file.startswith('http://'):
tmp_file = '_tmp.data'
write_file(tmp_file, read_url(file, cache=False))
f = open(tmp_file)
else:
print 'reading', file
f = open(file)
for r, row in enumerate(f):
if row and row[0] != '#':
item = {}
cols = row[:-1].split('\t')
for c, col in enumerate(cols):
key = keys[c]
if not key['name'] in drop:
if type(key['type']) == list:
if col:
col = col.split(',')
value = map(lambda x: parse_value(x, key['type'][0]), col)
else:
value = []
else:
value = parse_value(col, key['type'])
item[key['name']] = value
if filter(item):
data.append(item)
print data
return sorted(data, key=sort)
def read_url(url, cache=True):
print 'reading', url
data = ''
if cache:
data = ox.cache.readUrl(url)
else:
data = ox.net.readUrl(url)
return data
def write_file(file, data):
print 'writing', file
write_path(file)
f = open(file, 'w')
f.write(data)
f.close()
return len(data)
def write_image(file, image):
print 'writing', file
write_path(file)
image.save(file)
def write_json(file, data, reformat=False):
if reformat:
data = json.dumps(data, sort_keys=True)
else:
data = json.dumps(data, indent=4, sort_keys=True)
if reformat:
data = data.replace('[{', '[\n {')
data = data.replace('}, {', '},\n {')
data = data.replace('}]', '}\n]')
#data = re.sub('{\n\s+', '{', data)
#data = re.sub(', \n\s+"', ', "', data)
#data = re.sub('\n\s+}', '}', data)
#data = data.replace('{\n ', '{')
#data = data.replace(', \n "', ', "')
#data = data.replace('\n }', '}')
write_file(file, data)
def write_log(file, line):
if line == None:
data = ''
elif not os.path.exists(file):
data = line
else:
data = read_file(file) + '\n' + line
write_file(file, data)
def write_path(file):
path = os.path.split(file)[0]
if path and not os.path.exists(path):
os.makedirs(path)
import geonames
import imdb
import wikipedia
if __name__ == '__main__':
2011-09-09 16:40:04 +00:00
write_json('../json/geo.json', geo)
countries = get_countries()
sys.exit()
2011-05-29 12:22:54 +00:00
file = '../txt/geonames.org/cities1000.txt'
geo['keys'] = read_json('../json/oxjs.org/geonames.keys.json')
keys = geo['keys']['place']['geonames.org']
drop = ['alternatenames']
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 10000
sort = lambda x: -x['population']
data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
write_json('../json/geonames.org/cities10000.json', data, False)
print len(data)
sys.exit()
2011-05-23 19:38:52 +00:00
cities = get_cities()
print cities, len(cities)
sys.exit()
countries = get_countries()
print 'current independent', len(filter(
lambda x: not len(x['dependency']) and not x['former'] and not x['other'], countries)
)
print 'former independent', len(filter(
lambda x: not len(x['dependency']) and x['former'] and not x['other'], countries)
)
print 'current dependency', len(filter(
lambda x: len(x['dependency']) and not x['former'] and not x['other'], countries)
)
print 'former dependency', len(filter(
lambda x: len(x['dependency']) and x['former'] and not x['other'], countries)
)
print 'other', len(filter(
lambda x: x['other'], countries)
)
sys.exit()
file = '../txt/geonames.org/cities1000.txt'
keys = geo['keys']['place']['geonames.org']
drop = ['alternatenames']
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
filter = lambda x: x['feature_code'] == 'PPLC' or x['population'] >= 100000
sort = lambda x: -x['population']
data = read_table(file, keys, drop=drop, filter=filter, sort=sort)
write_json('../json/geonames.org/cities100000.json', data, False)
print len(data)
get_oxjs_languages()
get_geonames_countries()
get_geonames_languages()
get_imdb_countries()
get_imdb_languages()
sys.exit()
#places = read_json('../json/geonames.org/DE.A.json')
#tree = make_tree(places)
#write_json('../json/geonames.org/tree.DE.json', tree)
file = '../txt/geonames.org/countryInfo.txt'
keys = geo['keys']['country']['geonames.org']
sort = lambda x: -x['Population']
data = read_table(file, keys, sort=sort)
write_json('../json/' + file[7:-4] + '.json', data)
print len(data)
file = '../txt/geonames.org/iso-languagecodes.txt'
keys = geo['keys']['language']['geonames.org']
sort = lambda x: x['Language_Name']
data = read_table(file, keys, sort=sort)
write_json('../json/' + file[7:-4] + '.json', data)
print len(data)
"""
languages = read_json('../json/geonames.org/iso-languagecodes.json')
countries = read_json('../json/geonames.org/countryInfo.json')
max_languages = max(map(lambda x: len(x['Languages']), countries))
language_countries = {}
for language in languages:
language_name = language['Language_Name']
language_iso = [language['ISO_639-3'], language['ISO_639-2'], language['ISO_639-1']]
found = False
for i in range(max_languages):
for country in countries:
if len(country['Languages']) > i:
country_language = country['Languages'][i].split('-')[0]
if country_language in language_iso:
if not language_name in language_countries:
language_countries[language_name] = []
language_countries[language_name].append({
'country': country['Country'],
'population': country['Population'],
'position': i + 1
})
write_json('../json/geonames.org/languageCountriesAll.json', language_countries)
for language, countries in language_countries.iteritems():
language_countries[language] = countries[0]['country']
language_countries['English'] = 'United Kingdom'
language_countries['French'] = 'France'
language_countries['Portuguese'] = 'Portugal'
language_countries['Spanish'] = 'Spain'
write_json('../json/geonames.org/languageCountries.json', language_countries)
"""
file = '../txt/geonames.org/cities15000.txt'
keys = geo['keys']['place']['geonames.org']
filter = lambda x: x['population'] >= 100000
sort = lambda x: -x['population']
data = read_table(file, keys, filter=filter, sort=sort)
write_json('../json/geonames.org/cities100000.json', data)
print len(data)
file = '../txt/geonames.org/DE.txt'
keys = geo['keys']['place']['geonames.org']
filter = lambda x: x['feature_class'] == 'A' or x['feature_code'] in [
'PPLA', 'PPLA2', 'PPLA2', 'PPLA4', 'PPLC', 'PPLG'
]
sort = lambda x: -x['population']
data = read_table(file, keys, filter=filter, sort=sort)
write_json('../json/geonames.org/DE.A,P.json', data)
print len(data)