2011-05-23 19:38:52 +00:00
# -*- coding: utf-8 -*-
from __future__ import division
import Image
import json
import math
import os
import ox
import re
"""
run this , then html / geo . html , then fix JSON
311 countries
- - sovereign , dependent and disputed countries according to
http : / / en . wikipedia . org / wiki / List_of_sovereign_states
- - dissolved countries , according to
http : / / en . wikipedia . org / wiki / ISO_3166 - 3 and http : / / www . imdb . com / country /
- - entities with country codes , according to
http : / / en . wikipedia . org / wiki / ISO_3166 - 1 _alpha - 2
2011-05-24 06:15:44 +00:00
( like Ascension , Canary Islands , Ceuta and Melilla , Diego Garcia ,
European Union , Metropolitan France , Tristan da Cunha , UK . . . )
2011-05-23 19:38:52 +00:00
writes json / geo . json and json / countries . json ,
both will used by html / geo . html
"""
MIN_LAT = - math . degrees ( math . atan ( math . sinh ( math . pi ) ) )
geo = {
' code ' : {
# missing on wikipedia
' Neutral Zone ' : ' NTHH ' ,
' Réunion ' : ' RE ' ,
# not yet assigned, see http://en.wikipedia.org/wiki/List_of_sovereign_states
' South Sudan ' : ' SS ' ,
# disputed, see http://en.wikipedia.org/wiki/List_of_sovereign_states
' Abkhazia ' : ' GE-AB ' ,
' Kosovo ' : ' RS-KO ' ,
' Nagorno-Karabakh ' : ' AZ-NK ' ,
' Northern Cyprus ' : ' CY-NC ' ,
' South Ossetia ' : ' GE-SO ' ,
' Somaliland ' : ' SO-SO ' ,
' Transnistria ' : ' MD-TR ' ,
# dependent, see http://en.wikipedia.org/wiki/List_of_sovereign_states
' Akrotiri and Dhekelia ' : ' GB-AD ' ,
' Ashmore and Cartier Islands ' : ' AU-AC ' ,
' Coral Sea Islands ' : ' AU-CS ' ,
' Peter I Island ' : ' NO-PI ' ,
# dependent, see http://en.wikipedia.org/wiki/ISO_3166-2:GB
' England ' : ' GB-ENG ' ,
' Northern Ireland ' : ' GB-NIR ' ,
' Scotland ' : ' GB-SCT ' ,
' Wales ' : ' GB-WLS ' ,
# antarctic, see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
' Adélie Land ' : ' FR-AQ ' ,
' Antártica ' : ' CL-AQ ' ,
' Argentine Antarctica ' : ' AR-AQ ' ,
' Australian Antarctic Territory ' : ' AU-AQ ' ,
' Ross Dependency ' : ' NZ-AQ ' ,
# exceptional, see http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
' Canary Islands ' : ' IC ' ,
' Ceuta and Melilla ' : ' EA ' ,
' UK ' : ' UK ' ,
# dissolved, see http://www.imdb.com/country/
' Korea ' : ' KOHH ' ,
' Netherlands Antilles ' : ' ANHH ' ,
' Siam ' : ' SITH ' ,
' West Germany ' : ' DEDE '
} ,
' coordinates ' : {
# see http://en.wikipedia.org/wiki/Territorial_claims_in_Antarctica
' Adélie Land ' : {
' south ' : MIN_LAT ,
' west ' : 136.03333333 ,
' north ' : - 60 ,
' east ' : 142.18333333
} ,
' Antarctica ' : {
' south ' : MIN_LAT ,
' west ' : - 179.99999999 ,
' north ' : - 60 ,
' east ' : 179.99999999
} ,
' Antártica ' : {
' south ' : MIN_LAT ,
' west ' : - 90 ,
' north ' : - 60 ,
' east ' : - 53
} ,
' Argentine Antarctica ' : {
' south ' : MIN_LAT ,
' west ' : - 74 ,
' north ' : - 60 ,
' east ' : - 25
} ,
' Australian Antarctic Territory ' : {
' south ' : MIN_LAT ,
' west ' : 44.63333333 ,
' north ' : - 60 ,
' east ' : 160
} ,
' British Antarctic Territory ' : {
' south ' : MIN_LAT ,
' west ' : - 80 ,
' north ' : - 60 ,
' east ' : - 20
} ,
' Queen Maud Land ' : {
' south ' : MIN_LAT ,
' west ' : 20 ,
' north ' : - 60 ,
' east ' : 44.63333333
} ,
' Ross Dependency ' : {
' south ' : MIN_LAT ,
' west ' : 160 ,
' north ' : - 60 ,
' east ' : - 150
}
} ,
' dependencies ' : {
' Argentina ' : [ ' Argentine Antarctica ' ] ,
' Argentina, Australia, Chile, France, New Zealand, Norway, United Kingdom ' : [ ' Antarctica ' ] ,
' Australia ' : [
' Ashmore and Cartier Islands ' , ' Australian Antarctic Territory ' , ' Christmas Island ' , ' Cocos Islands ' , ' Coral Sea Islands ' ,
' Heard Island and McDonald Islands ' , ' Norfolk Island '
] ,
' Chile ' : [ ' Antártica ' ] ,
' China ' : [ ' Hong Kong ' , ' Macau ' ] ,
' Denmark ' : [ ' Faroe Islands ' , ' Greenland ' ] ,
' Finland ' : [ ' Åland ' ] ,
' France ' : [
' Adélie Land ' , ' Clipperton Island ' , ' French Afar and Issas ' , ' French Guiana ' , ' French Polynesia ' ,
' French Southern and Antarctic Territories ' , ' French Southern Territories ' , ' Guadeloupe ' , ' Martinique ' , ' Mayotte ' ,
' New Caledonia ' , ' Réunion ' , ' Saint Barthélemy ' , ' Saint Martin ' , ' Saint Pierre and Miquelon ' ,
' Wallis and Futuna '
] ,
' France, United Kingdom ' : [ ' New Hebrides ' ] ,
' Netherlands ' : [
' Aruba ' , ' Bonaire, Saint Eustatius and Saba ' , ' Curaçao ' , ' Netherlands Antilles ' , ' Sint Maarten '
] ,
' New Zealand ' : [ ' Cook Islands ' , ' Niue ' , ' Ross Dependency ' , ' Tokelau ' ] ,
' Norway ' : [ ' Bouvet Island ' , ' Peter I Island ' , ' Queen Maud Land ' , ' Svalbard and Jan Mayen ' ] ,
' Spain ' : [ ' Canary Islands ' , ' Ceuta and Melilla ' ] ,
' Soviet Union ' : [ ' Byelorussian Soviet Socialist Republic ' ] , # fixme: what about ukraine?
' United Kingdom ' : [
' Akrotiri and Dhekelia ' , ' Anguilla ' , ' Ascension Island ' , ' Bermuda ' , ' British Antarctic Territory ' ,
' British Indian Ocean Territory ' , ' British Virgin Islands ' , ' Cayman Islands ' , ' Diego Garcia ' , ' England ' ,
' Falkland Islands ' , ' Gibraltar ' , ' Gilbert and Ellice Islands ' , ' Guernsey ' , ' Isle of Man ' ,
' Jersey ' , ' Montserrat ' , ' Northern Ireland ' , ' Pitcairn Islands ' , ' Saint Helena ' ,
' Scotland ' , ' South Georgia and the South Sandwich Islands ' , ' Tristan da Cunha ' , ' Turks and Caicos Islands ' , ' Wales '
] ,
' United Kingdom, United States ' : [ ' Canton and Enderbury Islands ' ] ,
' United States ' : [
' American Samoa ' , ' Guam ' , ' Northern Mariana Islands ' , ' Johnston Island ' , ' Midway Islands ' ,
' Pacific Islands ' , ' Panama Canal Zone ' , ' Puerto Rico ' , ' United States Minor Outlying Islands ' , ' United States Miscellaneous Pacific Islands ' ,
' United States Virgin Islands ' , ' Wake Island '
]
} ,
' disputes ' : {
' Azerbaijan ' : [ ' Nagorno-Karabakh ' ] ,
' Cyprus ' : [ ' Northern Cyprus ' ] ,
' Georgia ' : [ ' Abkhazia ' , ' South Ossetia ' ] ,
' Iraq, Saudi Arabia ' : [ ' Neutral Zone ' ] ,
' Moldova ' : [ ' Transnistria ' ] ,
' Serbia ' : [ ' Kosovo ' ] ,
' Somalia ' : [ ' Somaliland ' ]
} ,
' dissolved ' : {
# merged
' Canton and Enderbury Islands ' : [ ' Kiribati ' ] ,
' East Germany ' : [ ' Germany ' ] ,
' Johnston Island ' : [ ' United States Minor Outlying Islands ' ] ,
' Midway Islands ' : [ ' United States Minor Outlying Islands ' ] ,
' North Vietnam ' : [ ' Vietnam ' ] ,
' Panama Canal Zone ' : [ ' Panama ' ] ,
' Sikkim ' : [ ' India ' ] ,
' South Yemen ' : [ ' Yemen ' ] ,
' United States Miscellaneous Pacific Islands ' : [ ' United States Minor Outlying Islands ' ] ,
' Wake Island ' : [ ' United States Minor Outlying Islands ' ] ,
' West Germany ' : [ ' Germany ' ] ,
# renamed
' Burma ' : [ ' Myanmar ' ] ,
' Byelorussian Soviet Socialist Republic ' : [ ' Belarus ' ] ,
' Dahomey ' : [ ' Benin ' ] ,
' East Timor ' : [ ' Timor-Leste ' ] ,
' French Afar and Issas ' : [ ' Djibouti ' ] ,
' New Hebrides ' : [ ' Vanuatu ' ] ,
' Rhodesia ' : [ ' Zimbabwe ' ] ,
' Siam ' : [ ' Thailand ' ] ,
' Upper Volta ' : [ ' Burkina Faso ' ] ,
2011-05-24 06:15:44 +00:00
' Yugoslavia ' : [ ' Serbia and Montenegro ' ] , # fixme: shape and flag suggest 'split'
2011-05-23 19:38:52 +00:00
' Zaire ' : [ ' Democratic Republic of the Congo ' ] ,
# split
' Czechoslovakia ' : [ ' Czech Republic ' , ' Slovakia ' ] ,
' French Southern and Antarctic Territories ' : [ ' Adélie Land ' , ' French Southern Territories ' ] ,
' Gilbert and Ellice Islands ' : [ ' Kiribati ' , ' Tuvalu ' ] ,
' Korea ' : [ ' North Korea ' , ' South Korea ' ] ,
' Netherlands Antilles ' : [ ' Bonaire, Saint Eustatius and Saba ' , ' Curaçao ' , ' Sint Maarten ' ] ,
' Neutral Zone ' : [ ' Iraq ' , ' Saudi Arabia ' ] ,
' Pacific Islands ' : [ ' Marshall Islands ' , ' Micronesia ' , ' Northern Mariana Islands ' , ' Palau ' ] ,
' Serbia and Montenegro ' : [ ' Serbia ' , ' Montenegro ' ] ,
' Soviet Union ' : [
' Armenia ' , ' Azerbaijan ' , ' Belarus ' , ' Estonia ' , ' Georgia ' ,
' Kazakhstan ' , ' Kyrgyzstan ' , ' Latvia ' , ' Lithuania ' , ' Moldova ' ,
' Russia ' , ' Tajikistan ' , ' Turkmenistan ' , ' Ukraine ' , ' Uzbekistan '
]
} ,
' flag ' : {
' Australian Antarctic Territory ' : ' Flag_of_Australia.svg ' ,
' Antarctica ' : ' Flag_of_the_Antarctic_Treaty.svg ' ,
' Antártica ' : ' Flag_of_Magallanes,_Chile.svg ' ,
' Ashmore and Cartier Islands ' : ' Flag_of_Australia.svg ' ,
' Bonaire, Saint Eustatius and Saba ' : ' Flag_of_Bonaire.svg ' ,
' Burma ' : ' Flag_of_Myanmar_(1974-2010).svg ' ,
' Byelorussian Soviet Socialist Republic ' : ' Flag_of_Byelorussian_SSR.svg ' ,
' Canton and Enderbury Islands ' : ' Flag_of_Gilbert_and_Ellice_Islands.svg ' ,
' Ceuta and Melilla ' : ' Flag_Ceuta.svg ' ,
' Coral Sea Islands ' : ' Flag_of_Australia.svg ' ,
' Diego Garcia ' : ' Flag_of_the_British_Indian_Ocean_Territory.svg ' ,
' French Guiana ' : ' Flag_of_French_Guiana.svg ' ,
' Korea ' : ' Flag_of_Korea_1882.svg ' ,
2011-09-09 16:40:04 +00:00
# 'Libya': 'Flag_of_the_Libyan_Jamahiriya_1977.svg',
2011-05-23 19:38:52 +00:00
' Metropolitan France ' : ' Flag_of_France.svg ' ,
' Neutral Zone ' : ' Flag_of_the_United_Nations.svg ' ,
' New Hebrides ' : ' Flag_of_Anglo-French_Joint_Naval_Commission.svg ' ,
' Northern Ireland ' : ' Ulster_banner.svg ' ,
' Panama Canal Zone ' : ' Panama_Canal_Zone_Flag.png ' ,
' Peter I Island ' : ' Flag_of_Norway.svg ' ,
' Réunion ' : ' Drapeau_Reunion_APDR.png ' ,
' Saint Martin ' : ' Flag_of_Saint-Martin_(local).svg ' ,
' Siam ' : ' State_Flag_of_Thailand_(1916).svg ' ,
' Svalbard and Jan Mayen ' : ' Flag_of_Norway.svg ' ,
' United States Miscellaneous Pacific Islands ' : ' Flag_of_the_United_States.svg ' ,
' Wallis and Futuna ' : ' Flag_of_Wallis_and_Futuna.svg ' ,
' Yugoslavia ' : ' Flag_of_SFR_Yugoslavia.svg '
} ,
' google_name ' : {
' Bahamas ' : ' The Bahamas ' ,
' Brunei ' : ' Brunei Darussalam ' ,
' Cocos Islands ' : ' Cocos (Keeling) Islands ' ,
' Gambia ' : ' The Gambia ' ,
' Kosovo ' : ' Kosova (Kosovo) ' ,
' Macedonia ' : ' Former Yugoslav Republic of Macedonia ' ,
' Myanmar ' : ' Burma ' ,
' Republic of the Congo ' : ' Congo ' ,
' Sahrawi ' : ' Western Sahara ' ,
' United Kingdom ' : ' UK ' ,
' United States ' : ' USA '
} ,
' google_query ' : {
' Akrotiri and Dhekelia ' : [ ' Akrotiri, Cyprus ' , ' Dhekelia, Cyprus ' ] ,
' Bonaire, Saint Eustatius and Saba ' : [ ' Bonaire ' , ' Saba, Netherlands Antilles ' , ' Saint Eustatius ' ] ,
' Byelorussian Soviet Socialist Republic ' : [ ' Belarus ' ] ,
' Canton and Enderbury Islands ' : [ ' Canton Island ' , ' Enderbury Island ' ] ,
' Ceuta and Melilla ' : [ ' Ceuta ' , ' Melilla ' ] ,
' Clipperton Island ' : [ ' Île de la Passion ' ] ,
' Curaçao ' : [ ' Banda Abou, Curaçao ' ] ,
' Czechoslovakia ' : [ ' Czech Republic ' , ' Slovakia ' ] ,
' East Germany ' : [
2011-09-09 16:40:04 +00:00
' Mecklenburg Vorpommern ' , ' Saxony ' , ' Thuringia '
2011-05-23 19:38:52 +00:00
] ,
' Dahomey ' : [ ' Benin ' ] ,
' European Union ' : [ ' Europe ' ] ,
' French Afar and Issas ' : [ ' Djibouti ' ] ,
# see http://en.wikipedia.org/wiki/French_Southern_and_Antarctic_Lands
# and http://en.wikipedia.org/wiki/Scattered_Islands_in_the_Indian_Ocean
' French Southern and Antarctic Territories ' : [ ' Adélie Land ' , ' Bassas da India ' , ' Glorioso Islands ' , ' Île Kerguelen ' ] ,
' French Southern Territories ' : [ ' Bassas da India ' , ' Glorioso Islands ' , ' Île Kerguelen ' ] ,
' Georgia ' : [ ' Georgia, Asia ' ] ,
# see http://en.wikipedia.org/wiki/Gilbert_Islands
' Gilbert and Ellice Islands ' : [ ' Arorae, Kiribati ' , ' Butaritari, Kiribati ' , ' Makin, Kiribati ' , ' Tuvalu ' ] ,
' Jamaica ' : [ ' Clarendon Parish, Jamaica ' , ' St. James Parish, Jamaica ' , ' St. Thomas Parish, Jamaica ' , ' Westmoreland Parish, Jamaica ' ] , # in case results are us-biased
' Johnston Island ' : [ ' Johnston Atoll ' ] ,
' Korea ' : [ ' North Korea ' , ' South Korea ' ] ,
' Lebanon ' : [ ' Lebanon, Asia ' ] , # in case results are us-biased
' Metropolitan France ' : [ ' France ' ] ,
' Midway Islands ' : [ ' Midway Atoll ' ] ,
' Neutral Zone ' : [ ' Neutral Zone, Saudi Arabia ' ] ,
' New Hebrides ' : [ ' Vanuatu ' ] ,
' North Vietnam ' : [ ' Ha Giang, Vietnam ' , ' Lai Chau, Vietnam ' , ' Thua Thien-Hue, Vietnam ' ] ,
' Northern Cyprus ' : [ ' Karpass, Cyprus ' , ' Kokkina, Cyprus ' , ' Lympia, Cyprus ' ] ,
' Pacific Islands ' : [ ' Marshall Islands ' , ' Micronesia ' , ' Northern Mariana Islands ' , ' Palau ' ] ,
' Palestine ' : [ ' 71, Israel ' , ' El-arish Rafah, Egypt ' ] ,
' Panama Canal Zone ' : [ ' Amador, Panama ' , ' Fort Sherman, Panama ' ] ,
' Sahrawi ' : [ ' Western Sahara ' ] ,
' Saint Helena ' : [ ' Ascension Island ' , ' Longwood, Saint Helena ' , ' Tristan da Cunha ' ] , # in case results are us-biased
' San Marino ' : [ ' San Marino, Europe ' ] , # in case results are us-biased
' Serbia and Montenegro ' : [ ' Montenegro ' , ' Serbia ' ] ,
' Siam ' : [ ' Thailand ' ] ,
' Sint Maarten ' : [ ' Sint Maarten, Netherlands Antilles ' ] ,
' South Ossetia ' : [ ' Shida Kartli ' ] ,
# see http://en.wikipedia.org/wiki/Sudan#States_and_regions
' South Sudan ' : [
' Central Equatoria, Sudan ' , ' Eastern Equatoria, Sudan ' , ' Jonglei, Sudan ' , ' Lakes, Sudan ' , ' Northern Bahr el Ghazal, Sudan ' ,
' Upper Nile, Sudan ' , ' Unity, Sudan ' , ' Warrap, Sudan ' , ' Western Bahr el Ghazal, Sudan ' , ' Western Equatoria, Sudan '
] ,
' South Yemen ' : [ ' Al-Mahrah, Yemen ' , ' Lahij, Yemen ' ] ,
' Soviet Union ' : [
' Armenia ' , ' Azerbaijan ' , ' Belarus ' , ' Estonia ' , ' Georgia, Europe ' ,
' Latvia ' , ' Lithuania ' , ' Kazakhstan ' , ' Kyrgyzstan ' , ' Moldova ' ,
' Russia ' , ' Tajikistan ' , ' Turkmenistan ' , ' Ukraine ' , ' Uzbekistan '
] ,
2011-09-09 16:40:04 +00:00
' United Kingdom ' : [ ' England ' , ' Northern Ireland ' , ' Scotland ' , ' Wales, United Kingdom ' ] ,
2011-05-23 19:38:52 +00:00
' United States Minor Outlying Islands ' : [ ' Midway Islands ' ] ,
# Baker Island, Howland Island, Jarvis Island, Kingman Reef, Palmyra Atoll --
# but the first three each return the full United States Minor Outlying Islands
' United States Miscellaneous Pacific Islands ' : [ ' Kingman Reef ' , ' Palmyra Atoll ' ] ,
2011-09-09 16:40:04 +00:00
' UK ' : [ ' England ' , ' Northern Ireland ' , ' Scotland ' , ' Wales, United Kingdom ' ] ,
2011-05-23 19:38:52 +00:00
' Upper Volta ' : [ ' Burkina Faso ' ] ,
' Wake Island ' : [ ' Wake Atoll ' ] ,
2011-09-09 16:40:04 +00:00
' Wales ' : [ ' Wales, United Kingdom ' ] ,
2011-05-23 19:38:52 +00:00
' West Germany ' : [ ' Schleswig Holstein ' , ' Northrhine Westphalia ' , ' Bavaria, Germany ' ] ,
' Yugoslavia ' : [
' Bosnia and Herzegovina ' , ' Croatia ' , ' Serbia ' , ' Slovenia ' , ' Macedonia ' ,
' Montenegro '
]
} ,
' icon ' : {
' AC ' : ' cross(340, resize, 684) ' ,
' AD ' : ' extend() ' ,
' AE ' : ' crop(683) ' ,
' AF ' : ' extend() ' ,
' AG ' : ' extend(left, left) ' ,
' _AG ' : ' resize(); copy(580, 90, 1468, 534, 592, 296, 387, 238); copy(580, 0, 1468, 90, 592, 238, 387, 0) # *2/3 ' ,
' AI ' : ' extend(right, right) ' ,
' AIDJ ' : ' extend() ' ,
' AL ' : ' crop() ' ,
' AM ' : ' crop() ' ,
' ANHH ' : ' crop() ' ,
' AO ' : ' crop() ' ,
' AQ ' : ' crop() ' ,
' AR ' : ' crop() ' ,
' AR-AQ ' : ' extend(right, left) ' ,
' AS ' : ' extend(left, left) ' ,
' AT ' : ' crop() ' ,
' AU ' : ' extend(right, right) ' ,
' AU-AC ' : ' extend(right, right) ' ,
' AU-AQ ' : ' extend(right, right) ' ,
' AU-CS ' : ' extend(right, right) ' ,
' AW ' : ' crop(left) ' ,
' AX ' : ' crop(827) ' ,
' AZ ' : ' crop() ' ,
' AZ-NK ' : ' crop(right) ' ,
' BA ' : ' crop(1054) ' ,
' BB ' : ' extend() ' ,
' BD ' : ' crop(922) ' ,
' BE ' : ' extend() ' ,
' BF ' : ' crop() ' ,
' BG ' : ' crop() ' ,
' BH ' : ' crop(666) ' ,
' _BI ' : ' crop() ' ,
' BI ' : ' resize(); copy(677, 267, 1371, 961, 696, 696, 267, 267, circle) ' ,
' BJ ' : ' crop(819) ' ,
' BL ' : ' extend() ' ,
' BM ' : ' extend(right, right) ' ,
' BN ' : ' crop() ' ,
' BO ' : ' crop() ' ,
' BQ ' : ' crop(left) ' ,
' BQAQ ' : ' extend(right, right) ' ,
' BR ' : ' extend() ' ,
' BS ' : ' cut(0, 0, 342, 0); resize() ' ,
' BT ' : ' extend(left, right) ' ,
' BUMM ' : ' slice(444) ' ,
' BV ' : ' crop(left) ' ,
' BW ' : ' crop() ' ,
' BY ' : ' crop(left) ' ,
' BYAA ' : ' crop(left) ' ,
' BZ ' : ' crop() ' ,
' CA ' : ' extend() ' ,
' CC ' : ' extend() ' ,
' CD ' : ' crop(left) ' ,
' CF ' : ' crop(); copy(85, 0, 597, 341, 512, 341, 0, 0) ' ,
' CG ' : ' resize() ' ,
' CH ' : ' pass() ' ,
' CI ' : ' extend() ' ,
' CK ' : ' extend(right, right) ' ,
' CL ' : ' crop(left) ' ,
' CL-AQ ' : ' extend() ' ,
' CM ' : ' extend() ' ,
2011-09-09 21:16:32 +00:00
' CN ' : ' cut(1024, 0, 0, 341); resize(); copy(145, 73, 885, 679, 740, 606, 142, 209) ' ,
2011-05-23 19:38:52 +00:00
' CO ' : ' crop() ' ,
' CP ' : ' extend() ' ,
' CR ' : ' crop(left) ' ,
' CSHH ' : ' resize() ' ,
' CSXX ' : ' crop() ' ,
' CTKI ' : ' extend(right, right) ' ,
' CU ' : ' crop(left) ' ,
' CV ' : ' crop(768) ' ,
' CW ' : ' crop(left) ' ,
' CX ' : ' cut(2, 1, 2, 1); extend(right, left) ' ,
' CY ' : ' crop() ' ,
' CY-NC ' : ' crop(811) ' ,
' CZ ' : ' resize() ' ,
' DDDE ' : ' crop() ' ,
' DE ' : ' crop() ' ,
' DEDE ' : ' crop() ' ,
' DG ' : ' extend(right, right) ' ,
' DJ ' : ' crop(left) ' ,
' DK ' : ' crop(left) ' ,
' DM ' : ' crop() ' ,
' DO ' : ' crop() ' ,
' DYBJ ' : ' crop(819) ' ,
' DZ ' : ' crop() ' ,
' EA ' : ' crop() ' ,
' EC ' : ' crop() ' ,
' EE ' : ' crop() ' ,
' EG ' : ' crop() ' ,
' EH ' : ' cut(0, 0, 683, 0); resize(); copy(768, 0, 1280, 1024, 512, 1024, 512, 0) ' ,
' ER ' : ' extend(right, right) ' ,
' ES ' : ' crop(left) ' ,
' ET ' : ' crop() ' ,
' EU ' : ' crop() ' ,
' FI ' : ' crop(740) ' ,
' FJ ' : ' extend(right, right) ' ,
' FK ' : ' extend(right, right) ' ,
' FM ' : ' crop() ' ,
' FO ' : ' crop(left) ' ,
' FQHH ' : ' extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328) ' ,
' FR ' : ' extend() ' ,
' FR-AQ ' : ' extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328) ' ,
' FXFR ' : ' extend() ' ,
' GA ' : ' crop() ' ,
' GB ' : ' cross(340, resize, 684) ' ,
' GB-AD ' : ' cross(340, resize, 684) ' ,
' GB-ENG ' : ' crop() ' ,
' GB-NIR ' : ' crop() ' ,
' GB-SCT ' : ' resize() ' ,
' GB-WLS ' : ' extend() ' ,
' GD ' : ' crop() ' ,
' _GD ' : ' border(172); copy(172, 172, 1867, 1057, 885, 885, 172, 172) ' ,
' GE ' : ' cross(272, crop) ' ,
' GE-AB ' : ' crop(left) ' ,
' GE-SO ' : ' crop() ' ,
' GEHH ' : ' extend(right, right) ' ,
' GF ' : ' crop() ' ,
' GG ' : ' crop() ' ,
' GH ' : ' crop() ' ,
' GI ' : ' crop() ' ,
' GL ' : ' crop(797) ' ,
' GM ' : ' crop() ' ,
' GN ' : ' extend() ' ,
' GP ' : ' extend() ' ,
' GQ ' : ' cut(0, 0, 1024, 0); resize(); copy(683, 0, 1365, 1365, 682, 1365, 683, 0) ' ,
' GR ' : ' crop(left) ' ,
' GS ' : ' extend(right, right) ' ,
' GT ' : ' extend() ' ,
' GU ' : ' border(50) ' ,
' GW ' : ' slice(341) ' ,
' GY ' : ' resize() ' ,
' HK ' : ' crop() ' ,
' HM ' : ' extend(right, right) ' ,
' HN ' : ' crop() ' ,
' HR ' : ' crop() ' ,
' HT ' : ' crop() ' ,
' HU ' : ' crop() ' ,
' HVBF ' : ' crop() ' ,
' IC ' : ' extend() ' ,
' ID ' : ' crop() ' ,
' IE ' : ' extend() ' ,
' IL ' : ' crop() ' ,
' IM ' : ' crop() ' ,
' IN ' : ' crop() ' ,
' IO ' : ' extend(right, right) ' ,
' IQ ' : ' crop() ' ,
' IR ' : ' crop() ' ,
' IS ' : ' crop(left) ' ,
' IT ' : ' extend() ' ,
' JE ' : ' crop() ' ,
' JM ' : ' resize() ' ,
' JO ' : ' crop(left) ' ,
' JP ' : ' crop() ' ,
' JTUM ' : ' extend() ' ,
' KE ' : ' crop() ' ,
' KG ' : ' crop() ' ,
' KH ' : ' crop() ' ,
' KI ' : ' crop() ' ,
' KM ' : ' crop(left) ' ,
' KN ' : ' crop() ' ,
' KOHH ' : ' extend() ' ,
' KP ' : ' crop(683) ' ,
' KR ' : ' extend() ' ,
' KW ' : ' cut(0, 0, 512, 0); resize() ' ,
' KY ' : ' extend(right, right) ' ,
' KZ ' : ' crop(left); copy(696, 0, 1526, 1024, 840, 1024, 184, 0) ' ,
' LA ' : ' crop() ' ,
' LB ' : ' crop() ' ,
' LC ' : ' crop() ' ,
' LI ' : ' slice(455) ' ,
' LK ' : ' cut(649, 0, 0, 0); border(85) ' ,
' LR ' : ' crop(left) ' ,
' LS ' : ' crop() ' ,
' LT ' : ' crop() ' ,
' LU ' : ' crop() ' ,
' LV ' : ' crop() ' ,
' LY ' : ' crop() ' ,
' MA ' : ' crop() ' ,
' MC ' : ' crop() ' ,
' MD ' : ' extend() ' ,
' MD-TR ' : ' crop(left) ' ,
' ME ' : ' border(51) ' ,
' MF ' : ' crop() ' ,
' MG ' : ' crop(left) ' ,
' MH ' : ' extend() ' ,
' MIUM ' : ' extend() ' ,
' MK ' : ' cut(308, 0, 308, 0); resize(); copy(842, 330, 1206, 694, 364, 364, 330, 330, circle) ' ,
' ML ' : ' extend() ' ,
' MM ' : ' crop() ' ,
' MN ' : ' extend() ' ,
' MO ' : ' crop() ' ,
' MP ' : ' crop() ' ,
' MQ ' : ' cross(198, crop) ' ,
' MR ' : ' crop() ' ,
' MS ' : ' extend(right, right) ' ,
' MT ' : ' border(480) ' ,
' MU ' : ' crop() ' ,
' MV ' : ' border(341) ' ,
' MW ' : ' crop() ' ,
' MX ' : ' extend() ' ,
' _MY ' : ' crop(left) ' ,
' MY ' : ' crop(right); copy(0, 0, 1, 1, 585, 585, 0, 0); copy(0, 0, 1024, 585, 585, 334, 0, 126) ' ,
' MZ ' : ' crop(left) ' ,
' NA ' : ' crop(left) ' ,
' NC ' : ' crop(681) ' ,
' NE ' : ' crop() ' ,
' NF ' : ' extend() ' ,
' NG ' : ' extend() ' ,
' NHVU ' : ' crop() ' ,
' NI ' : ' crop() ' ,
' NL ' : ' crop() ' ,
' NO ' : ' crop(left) ' ,
' NO-PI ' : ' crop(left) ' ,
' NP ' : ' construct() ' ,
' NQAQ ' : ' crop(left) ' ,
' NR ' : ' crop(left) ' ,
' NTHH ' : ' crop() ' ,
' NU ' : ' extend(right, right) ' ,
' NZ ' : ' extend(right, right) ' ,
' NZ-AQ ' : ' extend(right, right) ' ,
' OM ' : ' crop(left) ' ,
' PA ' : ' slice() ' ,
' PCHH ' : ' crop() ' ,
' PE ' : ' extend() ' ,
' PF ' : ' crop() ' ,
' PG ' : ' extend(right, left) ' ,
' PH ' : ' crop(left) ' ,
' PK ' : ' extend() ' ,
' PL ' : ' crop() ' ,
' PM ' : ' cut(558, 0, 0, 0); extend() ' ,
' PN ' : ' extend(right, right) ' ,
' PR ' : ' crop(left) ' ,
' PS ' : ' cut(0, 0, 683, 0); resize() ' ,
' PT ' : ' crop(819) ' ,
' PUUM ' : ' crop(778) ' ,
' PW ' : ' crop(896) ' ,
' PY ' : ' crop() ' ,
' PZPA ' : ' crop() ' ,
' QA ' : ' crop(659) ' ,
' RE ' : ' cut(464, 0, 0, 0); resize() ' ,
' RHZW ' : ' cut(0, 3, 0, 2); extend() ' ,
' RO ' : ' extend() ' ,
' RS ' : ' crop(732) ' ,
' RS-KO ' : ' crop() ' ,
' RU ' : ' crop() ' ,
' RW ' : ' crop(right) ' ,
' SA ' : ' crop() ' ,
2011-09-09 21:16:32 +00:00
' SB ' : ' extend(left, right); copy(103, 972, 206, 1024, 103, 52, 0, 1536); copy(1842, 0, 1945, 52, 103, 52, 1945, 460) ' ,
2011-05-23 19:38:52 +00:00
' SC ' : ' resize() ' ,
' SD ' : ' cut(0, 0, 683, 0); resize() ' ,
' SE ' : ' crop(768) ' ,
' SG ' : ' crop(787) ' ,
' SH ' : ' extend(right, right) ' ,
' SI ' : ' crop(left) ' ,
' SITH ' : ' crop() ' ,
' SJ ' : ' crop(left) ' ,
' SK ' : ' crop(679) ' ,
' SKIN ' : ' border(165) ' ,
' SL ' : ' crop() ' ,
' SM ' : ' crop() ' ,
' SN ' : ' extend() ' ,
' SO ' : ' crop() ' ,
' SO-SO ' : ' crop() ' ,
' SR ' : ' crop() ' ,
' SS ' : ' crop(left) ' ,
' ST ' : ' resize(); copy(832, 320, 1216, 704, 384, 384, 272, 320); copy(1344, 320, 1728, 704, 384, 384, 624, 320) ' ,
2011-09-09 21:16:32 +00:00
' SUHH ' : ' crop(right); copy(85, 0, 597, 512, 1024, 1024, 0, 0) ' ,
2011-05-23 19:38:52 +00:00
' SV ' : ' crop() ' ,
' SX ' : ' crop(left) ' ,
' SY ' : ' slice(614) ' ,
' SZ ' : ' extend() ' ,
' TA ' : ' extend(right, right) ' ,
' TC ' : ' extend(right, right) ' ,
' TD ' : ' extend() ' ,
' TF ' : ' extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328) ' ,
' TG ' : ' crop(left) ' ,
' TH ' : ' crop() ' ,
' TJ ' : ' crop() ' ,
' TK ' : ' extend() ' ,
' TL ' : ' crop(left) ' ,
' TM ' : ' crop(left) ' ,
' TN ' : ' crop() ' ,
' TO ' : ' slice(427) ' ,
' TPTL ' : ' crop(left) ' ,
' TR ' : ' crop(844) ' ,
' TT ' : ' resize() ' ,
' TV ' : ' extend(right, right) ' ,
' TW ' : ' slice() ' ,
' TZ ' : ' resize() ' ,
' UA ' : ' crop() ' ,
' UG ' : ' crop() ' ,
' UK ' : ' cross(340, resize, 684) ' ,
' UM ' : ' crop(778) ' ,
' US ' : ' crop(778) ' ,
' UY ' : ' crop(left) ' ,
' UZ ' : ' crop(left) ' ,
' VA ' : ' pass() ' ,
' VC ' : ' extend() ' ,
' VDVN ' : ' crop() ' ,
' VE ' : ' crop(); copy(0, 0, 455, 455, 455, 455, 0, 0) ' ,
' VG ' : ' extend(right, right) ' ,
' VI ' : ' extend() ' ,
' VN ' : ' crop() ' ,
' VU ' : ' crop(left) ' ,
' WF ' : ' extend(right, right); copy(0, 539, 826, 552, 826, 13, 0, 328) ' ,
' WKUM ' : ' crop(left) ' ,
' WS ' : ' slice() ' ,
' YDYE ' : ' crop(left) ' ,
' YE ' : ' crop() ' ,
' YT ' : ' extend() ' ,
' YUCS ' : ' crop() ' ,
' ZA ' : ' resize() ' ,
' ZM ' : ' cut(598, 0, 0, 0); extend() ' ,
' ZRCD ' : ' crop() ' ,
' ZW ' : ' crop(left) '
} ,
2011-09-09 23:05:04 +00:00
' imdb_code ' : {
' Côte d \' Ivoire ' : ' ci ' ,
' Democratic Republic of the Congo ' : ' cd ' ,
' Palestine ' : ' ps ' ,
' Serbia and Montenegro ' : ' xfy '
} ,
' imdb_name ' : {
' Côte d \' Ivoire ' : ' Ivory Coast ' , # bug in some imdb entries
' Democratic Republic of the Congo ' : ' Democratic Republic of Congo ' , # bug in some imdb entries
' Palestine ' : ' Occupied Palestinian Territory ' , # bug in some imdb entries
' Serbia and Montenegro ' : ' Federal Republic of Yugoslavia '
} ,
2011-09-09 16:40:04 +00:00
# import json
# import re
# from ox.net import readUrl
# html = readUrl('http://www.imdb.com/language/')
# results = re.compile(re.compile('<a href="/language/.*?">(.*?)</a>').findall(html))
# languages = {}
# for result in results:
# languages[result] = ''
# print json.dumps(languages, sort_keys=True)
# http://www.imdb.com/language/
' languages ' : {
' Abkhazian ' : ' Abkhazia ' ,
' Aboriginal ' : ' Australia ' ,
' Aché ' : ' ' ,
' Acholi ' : ' ' ,
' Afrikaans ' : ' South Africa ' ,
' Aidoukrou ' : ' ' ,
' Akan ' : ' ' ,
' Albanian ' : ' Albania ' ,
' Algonquin ' : ' ' ,
' American ' : ' United States ' ,
2011-09-09 23:05:04 +00:00
' Amharic ' : ' Ethiopia ' ,
2011-09-09 16:40:04 +00:00
' Apache ' : ' ' ,
' Arabic ' : ' Saudi Arabia ' ,
' Aragonese ' : ' ' ,
' Aramaic ' : ' Syria ' ,
' Arapaho ' : ' ' ,
' Armenian ' : ' Armenia ' ,
' Assamese ' : ' India ' ,
' Assyrian Neo-Aramaic ' : ' ' ,
' Athapascan ' : ' ' ,
' Australian ' : ' Australia ' ,
' Awadhi ' : ' ' ,
' Aymara ' : ' Bolivia ' ,
' Azerbaijani ' : ' Azerbaijan ' ,
' Bable ' : ' ' ,
' Baka ' : ' ' ,
' Balinese ' : ' Indonesia ' ,
' Bambara ' : ' Mali ' ,
2011-09-09 23:05:04 +00:00
' Basque ' : ' Spain ' ,
2011-09-09 16:40:04 +00:00
' Bassari ' : ' ' ,
' Belarusian ' : ' Belarus ' ,
' Bemba ' : ' ' ,
' Bengali ' : ' Bangladesh ' ,
2011-09-09 21:16:32 +00:00
' Berber ' : ' Morocco ' , # ?, one of multiple
2011-09-09 16:40:04 +00:00
' Bhojpuri ' : ' ' ,
' Bicolano ' : ' Philippines ' ,
' Bodo ' : ' ' ,
' Bosnian ' : ' Bosnia and Herzegovina ' ,
' Brazilian ' : ' Brazil ' ,
' Breton ' : ' France ' ,
' British ' : ' United Kingdom ' ,
' Bulgarian ' : ' Bulgaria ' ,
' Burmese ' : ' Burma ' ,
' Cantonese ' : ' China ' ,
' Catalan ' : ' Spain ' ,
' Central Khmer ' : ' Cambodia ' ,
2011-09-09 21:16:32 +00:00
' Chaozhou ' : ' China ' ,
2011-09-09 16:40:04 +00:00
' Chechen ' : ' Russia ' ,
2011-09-09 21:16:32 +00:00
' Cherokee ' : ' United States ' ,
' Cheyenne ' : ' United States ' ,
' Chhattisgarhi ' : ' India ' ,
2011-09-09 16:40:04 +00:00
' Chinese ' : ' China ' ,
2011-09-09 21:16:32 +00:00
' Cornish ' : ' United Kingdom ' ,
2011-09-09 16:40:04 +00:00
' Corsican ' : ' France ' ,
' Cree ' : ' Canada ' ,
2011-09-09 21:16:32 +00:00
' Creek ' : ' United States ' ,
2011-09-09 16:40:04 +00:00
' Creole ' : ' ' ,
' Creoles and pidgins ' : ' ' ,
' Croatian ' : ' Croatia ' ,
' Crow ' : ' ' ,
' Czech ' : ' Czech Republic ' ,
' Danish ' : ' Denmark ' ,
' Dari ' : ' Afghanistan ' ,
' Desiya ' : ' ' ,
' Dinka ' : ' ' ,
2011-09-09 23:05:04 +00:00
' Djerma ' : ' Niger ' ,
2011-09-09 16:40:04 +00:00
' Dogri ' : ' ' ,
' Dutch ' : ' Netherlands ' ,
' Dyula ' : ' ' ,
' Dzongkha ' : ' ' ,
' East-Greenlandic ' : ' Greenland ' ,
' Eastern Frisian ' : ' Germany ' ,
' Egyptian (Ancient) ' : ' Egypt ' ,
' English ' : ' United Kingdom ' ,
' Esperanto ' : ' ' ,
' Estonian ' : ' Estonia ' ,
' Ewe ' : ' ' ,
' Faliasch ' : ' ' ,
' Faroese ' : ' Faroe Islands ' ,
' Filipino ' : ' Philippines ' ,
' Finnish ' : ' Finland ' ,
' Flemish ' : ' Belgium ' ,
' Fon ' : ' ' ,
' French ' : ' France ' ,
' Fulah ' : ' ' ,
' Fur ' : ' ' ,
' Gaelic ' : ' Ireland ' ,
2011-09-09 23:05:04 +00:00
' Galician ' : ' Spain ' ,
2011-09-09 16:40:04 +00:00
' Georgian ' : ' Georgia ' ,
' German ' : ' Germany ' ,
' Grebo ' : ' ' ,
' Greek ' : ' Greece ' ,
' Greek, Ancient (to 1453) ' : ' Greece ' ,
' Greenlandic ' : ' Greenland ' ,
' Guarani ' : ' Paraguay ' ,
' Gujarati ' : ' India ' ,
' Gumatj ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Gunwinggu ' : ' Australia ' ,
2011-09-09 16:40:04 +00:00
' Haitian ' : ' Haiti ' ,
' Hakka ' : ' China ' ,
' Haryanvi ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Hassanya ' : ' Mauritania ' ,
' Hausa ' : ' Nigeria ' ,
2011-09-09 16:40:04 +00:00
' Hawaiian ' : ' United States ' ,
' Hebrew ' : ' Israel ' ,
' Hindi ' : ' India ' ,
' Hmong ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Hokkien ' : ' China ' ,
2011-09-09 16:40:04 +00:00
' Hopi ' : ' United States ' ,
' Hungarian ' : ' Hungary ' ,
' Iban ' : ' ' ,
' Ibo ' : ' Nigeria ' ,
' Icelandic ' : ' Iceland ' ,
' Indian ' : ' India ' ,
' Indonesian ' : ' Indonesia ' ,
' Inuktitut ' : ' ' ,
' Inupiaq ' : ' ' ,
' Irish Gaelic ' : ' Ireland ' ,
' Italian ' : ' Italy ' ,
' Japanese ' : ' Japan ' ,
' Jola-Fonyi ' : ' ' ,
' Ju \' hoan ' : ' ' ,
' Kaado ' : ' ' ,
' Kabuverdianu ' : ' Cape Verde ' ,
' Kabyle ' : ' ' ,
' Kalmyk-Oirat ' : ' ' ,
' Kannada ' : ' India ' ,
' Karajá ' : ' ' ,
' Karbi ' : ' ' ,
' Karen ' : ' ' ,
' Kazakh ' : ' Kazakhstan ' ,
' Khanty ' : ' Russia ' ,
' Khasi ' : ' ' ,
' Kikuyu ' : ' ' ,
' Kinyarwanda ' : ' ' ,
' Kirundi ' : ' ' ,
' Klingon ' : ' ' ,
' Kodava ' : ' ' ,
' Konkani ' : ' India ' ,
' Korean ' : ' South Korea ' ,
' Korowai ' : ' Papua New Guinea ' ,
2011-09-09 21:16:32 +00:00
' Kriolu ' : ' Cape Verde ' ,
2011-09-09 16:40:04 +00:00
' Kru ' : ' ' ,
' Kudmali ' : ' ' ,
' Kuna ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Kurdish ' : ' Turkey ' , # ?, one of multiple
2011-09-09 16:40:04 +00:00
' Kwakiutl ' : ' ' ,
' Kyrgyz ' : ' Kyrgyzstan ' ,
2011-09-09 21:16:32 +00:00
' Ladakhi ' : ' India ' ,
2011-09-09 16:40:04 +00:00
' Ladino ' : ' ' ,
' Lao ' : ' Laos ' ,
' Latin ' : ' Italy ' ,
' Latvian ' : ' Latvia ' ,
' Limbu ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Lingala ' : ' Democratic Republic of the Congo ' ,
2011-09-09 16:40:04 +00:00
' Lithuanian ' : ' Lithuania ' ,
' Low German ' : ' Germany ' ,
' Luxembourgish ' : ' Luxemburg ' ,
' Macedonian ' : ' Macedonia ' ,
' Macro-Jê ' : ' ' ,
' Magahi ' : ' ' ,
' Maithili ' : ' ' ,
' Malagasy ' : ' Madagascar ' ,
' Malay ' : ' Malaysia ' ,
2011-09-09 23:05:04 +00:00
' Malayalam ' : ' India ' ,
2011-09-09 16:40:04 +00:00
' Malecite-Passamaquoddy ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Malinka ' : ' Guinea ' ,
2011-09-09 16:40:04 +00:00
' Maltese ' : ' Malta ' ,
' Manchu ' : ' ' ,
' Mandarin ' : ' China ' ,
' Mandingo ' : ' ' ,
' Manipuri ' : ' ' ,
' Maori ' : ' New Zealand ' ,
' Mapudungun ' : ' Chile ' ,
' Marathi ' : ' India ' ,
' Marshallese ' : ' Marshall Islands ' ,
' Masai ' : ' ' ,
' Masalit ' : ' ' ,
' Maya ' : ' Mexico ' ,
' Mende ' : ' Sierra Leone ' ,
' Micmac ' : ' ' ,
' Middle English ' : ' England ' ,
' Min Nan ' : ' ' ,
' Minangkabau ' : ' ' ,
' Mirandese ' : ' ' ,
' Mizo ' : ' ' ,
' Mohawk ' : ' ' ,
' Mongolian ' : ' Mongolia ' ,
' Montagnais ' : ' ' ,
' More ' : ' Burkina Faso ' ,
' Morisyen ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Nagpuri ' : ' India ' ,
2011-09-09 16:40:04 +00:00
' Nahuatl ' : ' ' ,
' Nama ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Navajo ' : ' United States ' ,
2011-09-09 16:40:04 +00:00
' Naxi ' : ' China ' ,
' Ndebele ' : ' ' ,
' Neapolitan ' : ' Italy ' ,
' Nenets ' : ' ' ,
' Nepali ' : ' Nepal ' ,
2011-09-09 21:16:32 +00:00
' Nisga \' a ' : ' Canada ' ,
2011-09-09 16:40:04 +00:00
' None ' : ' ' ,
' Norse, Old ' : ' ' ,
' North American Indian ' : ' ' ,
' Norwegian ' : ' Norway ' ,
' Nushi ' : ' ' ,
' Nyaneka ' : ' ' ,
' Nyanja ' : ' Malawi ' ,
' Occitan ' : ' ' ,
' Ojibwa ' : ' ' ,
' Ojihimba ' : ' ' ,
' Old English ' : ' England ' ,
' Oriya ' : ' ' ,
' Papiamento ' : ' ' ,
' Parsee ' : ' Iran ' ,
' Pashtu ' : ' Afghanistan ' ,
' Pawnee ' : ' ' ,
' Persian ' : ' Iran ' ,
' Peul ' : ' ' ,
' Polish ' : ' Poland ' ,
' Polynesian ' : ' ' ,
' Portuguese ' : ' Portugal ' ,
' Pular ' : ' ' ,
' Punjabi ' : ' India ' ,
' Purepecha ' : ' ' ,
' Quechua ' : ' Peru ' ,
' Quenya ' : ' ' ,
' Rajasthani ' : ' India ' ,
' Rawan ' : ' ' ,
' Romanian ' : ' Romania ' ,
' Romansh ' : ' Switzerland ' ,
' Romany ' : ' Romania ' ,
' Rotuman ' : ' ' ,
' Russian ' : ' Russia ' ,
' Ryukyuan ' : ' Japan ' ,
' Saami ' : ' Finland ' ,
' Samoan ' : ' Samoa ' ,
' Sanskrit ' : ' ' ,
' Sardinian ' : ' Italy ' ,
' Scanian ' : ' ' ,
' Serbian ' : ' Serbia ' ,
' Serbo-Croatian ' : ' Yugoslavia ' ,
' Serer ' : ' ' ,
' Shanghainese ' : ' China ' ,
2011-09-09 21:16:32 +00:00
' Shanxi ' : ' China ' ,
2011-09-09 16:40:04 +00:00
' Shona ' : ' Zimbabwe ' ,
2011-09-09 21:16:32 +00:00
' Shoshoni ' : ' United States ' ,
2011-09-09 16:40:04 +00:00
' Sicilian ' : ' Italy ' ,
' Sindarin ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Sindhi ' : ' Pakistan ' ,
2011-09-09 16:40:04 +00:00
' Sinhala ' : ' Sri Lanka ' ,
2011-09-09 21:16:32 +00:00
' Sioux ' : ' United States ' ,
2011-09-09 16:40:04 +00:00
' Slovak ' : ' Slovakia ' ,
' Slovenian ' : ' Slovenia ' ,
' Somali ' : ' Somalia ' ,
' Songhay ' : ' ' ,
' Soninke ' : ' ' ,
' Sorbian ' : ' Germany ' ,
2011-09-09 21:16:32 +00:00
' Sotho ' : ' Lesotho ' ,
2011-09-09 16:40:04 +00:00
' Sousson ' : ' ' ,
' Spanish ' : ' Spain ' ,
' Sranan ' : ' ' ,
2011-09-09 21:16:32 +00:00
' Swahili ' : ' Kenya ' , # ?, one of multiple
2011-09-09 16:40:04 +00:00
' Swedish ' : ' Sweden ' ,
' Swiss German ' : ' Switzerland ' ,
' Sylheti ' : ' ' ,
' Tagalog ' : ' Philippines ' ,
' Tajik ' : ' Tajikistan ' ,
2011-09-09 21:16:32 +00:00
' Tamashek ' : ' Algeria ' , # ?, one of multiple
2011-09-09 16:40:04 +00:00
' Tamil ' : ' Sri Lanka ' ,
' Tarahumara ' : ' ' ,
' Tatar ' : ' Russia ' ,
' Telugu ' : ' India ' ,
' Teochew ' : ' ' ,
' Thai ' : ' Thailand ' ,
' Tibetan ' : ' China ' ,
' Tigrigna ' : ' ' ,
' Tlingit ' : ' ' ,
' Tok Pisin ' : ' ' ,
' Tonga (Tonga Islands) ' : ' Tonga ' ,
' Tsonga ' : ' ' ,
' Tswa ' : ' ' ,
' Tswana ' : ' ' ,
' Tulu ' : ' ' ,
' Tupi ' : ' ' ,
' Turkish ' : ' Turkey ' ,
' Turkmen ' : ' Turkmenistan ' ,
' Tuvinian ' : ' ' ,
' Tzotzil ' : ' ' ,
' Ukrainian ' : ' Ukraine ' ,
' Ungwatsi ' : ' ' ,
' Urdu ' : ' Pakistan ' ,
' Uzbek ' : ' Uzbekistan ' ,
' Vietnamese ' : ' Vietnam ' ,
' Visayan ' : ' ' ,
' Washoe ' : ' ' ,
' Welsh ' : ' Wales ' ,
2011-09-09 21:16:32 +00:00
' Wolof ' : ' Senegal ' , # ?, one of multiple
2011-09-09 16:40:04 +00:00
' Xhosa ' : ' South Africa ' ,
' Yakut ' : ' ' ,
' Yapese ' : ' ' ,
' Yiddish ' : ' Israel ' ,
2011-09-09 21:16:32 +00:00
' Yoruba ' : ' Nigeria ' ,
2011-09-09 16:40:04 +00:00
' Zulu ' : ' South Africa '
} ,
2011-05-23 19:38:52 +00:00
' other ' : [ ' European Union ' , ' Metropolitan France ' , ' UK ' ] ,
' wikipedia_name ' : {
# ambiguous on wikipedia
' Cocos (Keeling) Islands ' : ' Cocos Islands ' ,
' Collectivity of Saint Martin ' : ' Saint Martin ' ,
' Federated States of Micronesia ' : ' Micronesia ' ,
' French Territory of the Afars and the Issas ' : ' French Afar and Issas ' ,
' Georgia (country) ' : ' Georgia ' ,
' Nagorno-Karabakh Republic ' : ' Nagorno-Karabakh ' ,
' People \' s Republic of China ' : ' China ' ,
' Republic of China ' : ' Taiwan ' ,
' Republic of Dahomey ' : ' Dahomey ' ,
' Republic of Ireland ' : ' Ireland ' ,
' Republic of Kosovo ' : ' Kosovo ' ,
' Republic of Macedonia ' : ' Macedonia ' ,
' Republic of Upper Volta ' : ' Upper Volta ' ,
' Sahrawi Arab Democratic Republic ' : ' Sahrawi ' ,
' Saudi-Iraqi neutral zone ' : ' Neutral Zone ' ,
' State of Palestine ' : ' Palestine ' ,
' Trust Territory of the Pacific Islands ' : ' Pacific Islands '
} ,
' wikipedia_url ' : {
# dependencies of guernsey
' Alderney ' : ' ' ,
' Herm ' : ' ' ,
' Sark ' : ' ' ,
# territory of pakistan
' Azad_Kashmir ' : ' ' ,
' Gilgit-Baltistan ' : ' ' ,
# wrong in http://en.wikipedia.org/wiki/List_of_sovereign_states
' Coral_Sea_Islands_Territory ' : ' Coral_Sea_Islands ' ,
' Kingdom_of_the_Netherlands ' : ' Netherlands ' ,
' Saint-Barth % C3 % A9lemy ' : ' Saint_Barth % C3 % A9lemy ' ,
' Saint_Martin ' : ' Collectivity_of_Saint_Martin ' ,
# wrong in http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
' Caribbean_Netherlands ' : ' Bonaire,_Saint_Eustatius_and_Saba ' ,
' Ceuta ' : ' Ceuta_and_Melilla ' ,
' Palestinian_territories ' : ' State_of_Palestine ' ,
' Saudi %E 2 %80% 93Iraqi_neutral_zone ' : ' Saudi-Iraqi_neutral_zone ' ,
' Western_Sahara ' : ' Sahrawi_Arab_Democratic_Republic ' ,
# wrong in http://en.wikipedia.org/wiki/ISO_3166-3
' Johnston_Atoll ' : ' Johnston_Island ' ,
' Midway_Atoll ' : ' Midway_Islands ' ,
# wrong in all
' % C3 %85la nd_Islands ' : ' Åland ' ,
' East Timor ' : ' Timor-Leste ' ,
' Cocos_(Keeling)_Islands ' : ' Cocos_Islands ' ,
' French_Southern_and_Antarctic_Lands ' : ' French_Southern_and_Antarctic_Territories ' ,
' Saint_Helena,_Ascension_and_Tristan_da_Cunha ' : ' Saint_Helena ' ,
' The_Bahamas ' : ' Bahamas ' ,
' The_Gambia ' : ' Gambia '
} ,
' wikipedia_urls ' : [
# not in any list
# antarctic
' Adélie_Land ' , ' Antártica ' , ' Argentine_Antarctica ' ,
# dependent
' England ' , ' Northern_Ireland ' , ' Scotland ' , ' Wales ' ,
# former
' Korea ' , ' Siam ' , ' West_Germany ' ,
# other
' East Timor ' , ' French_Southern_Territories ' , ' Peter_I_Island ' , ' South_Sudan ' , ' UK '
]
}
def get_cities ( ) :
cities = wikipedia . get_cities ( )
return cities
def get_countries ( ) :
geonames_countries = geonames . get_countries ( )
write_json ( ' ../json/geonames.org/countries.json ' , geonames_countries , True )
imdb_countries = imdb . get_countries ( )
write_json ( ' ../json/imdb.com/countries.json ' , imdb_countries , True )
wikipedia_countries = wikipedia . get_countries ( )
write_json ( ' ../json/wikipedia.org/countries.json ' , wikipedia_countries , True )
countries = wikipedia_countries
logs = [ ]
for country in countries :
# dependencies
country [ ' dependencies ' ] = [ ]
country [ ' dependency ' ] = [ ]
for c , d in geo [ ' dependencies ' ] . iteritems ( ) :
c = c . split ( ' , ' )
if country [ ' name ' ] in c :
country [ ' dependencies ' ] = d
break
elif country [ ' name ' ] in d :
country [ ' dependency ' ] = c
break
# disputes
country [ ' disputes ' ] = [ ]
country [ ' disputed ' ] = [ ]
for c , d in geo [ ' disputes ' ] . iteritems ( ) :
c = c . split ( ' , ' )
if country [ ' name ' ] in c :
country [ ' disputes ' ] = d
break
elif country [ ' name ' ] in d :
country [ ' disputed ' ] = c
break
# dissolved
country [ ' dissolved ' ] = geo [ ' dissolved ' ] [ country [ ' name ' ] ] if country [ ' name ' ] in geo [ ' dissolved ' ] else [ ]
# google name
if country [ ' name ' ] in geo [ ' google_name ' ] :
country [ ' googleName ' ] = geo [ ' google_name ' ] [ country [ ' name ' ] ]
# other
country [ ' other ' ] = country [ ' name ' ] in geo [ ' other ' ]
# imdb
2011-09-09 23:05:04 +00:00
if country [ ' name ' ] in geo [ ' imdb_name ' ] :
country [ ' imdbCode ' ] = geo [ ' imdb_code ' ] [ country [ ' name ' ] ]
country [ ' imdbName ' ] = geo [ ' imdb_name ' ] [ country [ ' name ' ] ]
else :
for imdb_country in imdb_countries :
if imdb_country [ ' code ' ] . upper ( ) == country [ ' code ' ] or imdb_country [ ' name ' ] == country [ ' name ' ] :
country [ ' imdbCode ' ] = imdb_country [ ' code ' ]
country [ ' imdbName ' ] = imdb_country [ ' name ' ]
break
2011-05-23 19:38:52 +00:00
get_country_flag ( country [ ' code ' ] , country [ ' flagURL ' ] )
get_country_icon ( country [ ' code ' ] )
2011-09-09 16:40:04 +00:00
# languages
country [ ' languages ' ] = [ ]
for language , language_country in geo [ ' languages ' ] . iteritems ( ) :
if language_country == country [ ' name ' ] :
country [ ' languages ' ] . append ( language )
2011-05-23 19:38:52 +00:00
write_json ( ' ../json/countries.json ' , countries )
return countries
def get_country_flag ( code , url ) :
# max width on wikipedia
width = 2048
img = read_url ( url )
file = ' ../png/flags/ ' + str ( width ) + ' / ' + code + ' .png '
if url [ - 4 : ] == ' .svg ' :
write_file ( ' ../svg/flags/ ' + code + ' .svg ' , img )
# fixme: remove conditional later
# (wikipedia tended to time out)
if not os . path . exists ( file ) :
png = read_url ( url . replace ( ' /commons/ ' , ' /commons/thumb/ ' ) + ' / ' + str ( width ) + ' px-.png ' )
write_file ( file , png )
png = Image . open ( file )
else :
write_file ( file , img )
png = Image . open ( file )
png = png . resize ( ( width , int ( round ( width / png . size [ 0 ] * png . size [ 1 ] ) ) ) , Image . ANTIALIAS )
write_image ( file , png )
for width in [ 256 , 32 ] :
file = ' ../png/flags/ ' + str ( width ) + ' / ' + code + ' .png '
2011-09-09 16:40:04 +00:00
if not os . path . exists ( file ) or True :
2011-05-23 19:38:52 +00:00
png_ = png . resize ( ( width , int ( round ( width / png . size [ 0 ] * png . size [ 1 ] ) ) ) , Image . ANTIALIAS )
write_image ( file , png_ )
def get_country_icon ( code ) :
def _border ( flag , args ) :
icon = Image . new ( ' RGBA ' , ( flag . size [ 1 ] , flag . size [ 1 ] ) )
left = int ( ( flag . size [ 0 ] - flag . size [ 1 ] ) / 2 + args [ 0 ] )
right = left + flag . size [ 1 ] - args [ 0 ] * 2
crop_left = flag . crop ( ( 0 , 0 , args [ 0 ] , flag . size [ 1 ] ) )
crop_center = flag . crop ( ( left , 0 , right , flag . size [ 1 ] ) )
crop_right = flag . crop ( ( flag . size [ 0 ] - args [ 0 ] , 0 , flag . size [ 0 ] , flag . size [ 1 ] ) )
icon . paste ( crop_left , ( 0 , 0 ) )
icon . paste ( crop_center , ( args [ 0 ] , 0 ) )
icon . paste ( crop_right , ( flag . size [ 1 ] - args [ 0 ] , 0 ) )
return icon
def _construct ( flag ) :
# nepal
icon = Image . new ( ' RGBA ' , ( flag . size [ 1 ] , flag . size [ 1 ] ) )
# blue
crop = flag . crop ( ( 0 , flag . size [ 1 ] - 85 , 85 , flag . size [ 1 ] ) )
crop = crop . resize ( ( flag . size [ 1 ] , flag . size [ 1 ] ) , Image . ANTIALIAS )
icon . paste ( crop , ( 0 , 0 ) )
# red
crop = flag . crop ( ( 86 , flag . size [ 1 ] - 171 , 171 , flag . size [ 1 ] - 86 ) )
crop = crop . resize ( ( flag . size [ 1 ] - 170 , flag . size [ 1 ] - 170 ) , Image . ANTIALIAS )
icon . paste ( crop , ( 85 , 85 ) )
# moon
crop = flag . crop ( ( 147 , 753 , 147 + 732 , 1151 ) )
icon . paste ( crop , ( int ( ( flag . size [ 1 ] - 732 ) / 2 ) , 753 ) )
# sun
crop = flag . crop ( ( 147 , 1449 , 147 + 732 , 1449 + 732 ) )
mask_ = Image . open ( ' ../png/circle.png ' )
mask = Image . new ( ' L ' , ( mask_ . size [ 0 ] , mask_ . size [ 1 ] ) )
mask . paste ( mask_ , ( 0 , 0 ) )
mask = mask . resize ( ( 732 , 732 ) , Image . ANTIALIAS )
icon . paste ( crop , ( int ( ( flag . size [ 1 ] - 732 ) / 2 ) , 1449 ) , mask )
return icon
def _copy ( flag , args ) :
icon = flag
flag = Image . open ( ' ../png/flags/2048/ ' + code + ' .png ' )
if len ( args ) == 8 :
args . append ( ' rect ' )
crop = flag . crop ( ( args [ 0 ] , args [ 1 ] , args [ 2 ] , args [ 3 ] ) )
crop = crop . resize ( ( args [ 4 ] , args [ 5 ] ) , Image . ANTIALIAS )
if args [ 8 ] == ' rect ' :
icon . paste ( crop , ( args [ 6 ] , args [ 7 ] ) )
elif args [ 8 ] == ' circle ' :
mask_ = Image . open ( ' ../png/circle.png ' )
mask = Image . new ( ' L ' , ( mask_ . size [ 0 ] , mask_ . size [ 1 ] ) )
mask . paste ( mask_ , ( 0 , 0 ) )
mask = mask . resize ( ( args [ 4 ] , args [ 5 ] ) , Image . ANTIALIAS )
icon . paste ( crop , ( args [ 6 ] , args [ 7 ] ) , mask )
return icon
def _crop ( flag , args ) :
icon = Image . new ( ' RGBA ' , ( flag . size [ 1 ] , flag . size [ 1 ] ) )
if len ( args ) == 0 :
left = int ( ( flag . size [ 0 ] - flag . size [ 1 ] ) / 2 )
elif args [ 0 ] == ' left ' :
left = 0
elif args [ 0 ] == ' right ' :
left = flag . size [ 0 ] - flag . size [ 1 ]
else :
left = int ( args [ 0 ] ) - int ( flag . size [ 1 ] / 2 )
icon . paste ( flag . crop ( ( left , 0 , left + flag . size [ 1 ] , flag . size [ 1 ] ) ) , ( 0 , 0 ) )
return icon
def _cross ( flag , args ) :
icon = _crop ( flag , [ ] )
if args [ 1 ] == ' crop ' :
left = int ( ( flag . size [ 0 ] - args [ 0 ] ) / 4 - ( flag . size [ 1 ] - args [ 0 ] ) / 4 )
right = left + int ( ( flag . size [ 1 ] - args [ 0 ] ) / 2 )
crop = flag . crop ( ( left , 0 , right , flag . size [ 1 ] ) )
icon . paste ( crop , ( 0 , 0 ) )
crop = flag . crop ( ( flag . size [ 0 ] - right , 0 , flag . size [ 0 ] - left , flag . size [ 1 ] ) )
icon . paste ( crop , ( int ( ( flag . size [ 1 ] + args [ 0 ] ) / 2 ) , 0 ) )
elif args [ 1 ] == ' resize ' :
width = args [ 2 ]
height = int ( ( flag . size [ 1 ] - args [ 0 ] ) / 2 )
offset = int ( ( flag . size [ 1 ] + args [ 0 ] ) / 2 )
crop = flag . crop ( ( 0 , 0 , width , height ) ) . resize ( ( height , height ) , Image . ANTIALIAS )
icon . paste ( crop , ( 0 , 0 ) )
crop = flag . crop ( ( flag . size [ 0 ] - width , 0 , flag . size [ 0 ] , height ) ) . resize ( ( height , height ) , Image . ANTIALIAS )
icon . paste ( crop , ( offset , 0 ) )
crop = flag . crop ( ( 0 , offset , width , flag . size [ 1 ] ) ) . resize ( ( height , height ) , Image . ANTIALIAS )
icon . paste ( crop , ( 0 , offset ) )
crop = flag . crop ( ( flag . size [ 0 ] - width , offset , flag . size [ 0 ] , flag . size [ 1 ] ) ) . resize ( ( height , height ) , Image . ANTIALIAS )
icon . paste ( crop , ( offset , offset ) )
return icon
def _cut ( flag , args ) :
return flag . crop ( ( args [ 0 ] , args [ 1 ] , flag . size [ 0 ] - args [ 2 ] , flag . size [ 1 ] - args [ 3 ] ) )
def _extend ( flag , args ) :
icon = Image . new ( ' RGBA ' , ( flag . size [ 0 ] , flag . size [ 0 ] ) )
top = int ( ( flag . size [ 0 ] - flag . size [ 1 ] ) / 2 )
bottom = int ( ( flag . size [ 0 ] - flag . size [ 1 ] ) / 2 + 0.5 )
icon . paste ( flag , ( 0 , top ) )
if len ( args ) == 0 :
crop_top = flag . crop ( ( 0 , 1 , flag . size [ 0 ] , 2 ) )
crop_bottom = flag . crop ( ( 0 , flag . size [ 1 ] - 2 , flag . size [ 0 ] , flag . size [ 1 ] - 1 ) )
else :
if args [ 0 ] == ' left ' :
pixel = flag . crop ( ( 2 , 1 , 3 , 2 ) )
else :
pixel = flag . crop ( ( flag . size [ 0 ] - 3 , 1 , flag . size [ 0 ] - 2 , 2 ) )
crop_top = Image . new ( ' RGBA ' , ( flag . size [ 0 ] , 1 ) )
for x in range ( flag . size [ 0 ] ) :
crop_top . paste ( pixel , ( x , 0 ) )
if args [ 1 ] == ' left ' :
pixel = flag . crop ( ( 1 , flag . size [ 1 ] - 3 , 2 , flag . size [ 1 ] - 2 ) )
else :
pixel = flag . crop ( ( flag . size [ 0 ] - 3 , flag . size [ 1 ] - 2 , flag . size [ 0 ] - 2 , flag . size [ 1 ] - 1 ) )
crop_bottom = Image . new ( ' RGBA ' , ( flag . size [ 0 ] , 1 ) )
for x in range ( flag . size [ 0 ] ) :
crop_bottom . paste ( pixel , ( x , 0 ) )
for y in range ( top ) :
icon . paste ( crop_top , ( 0 , y ) )
for y in range ( bottom ) :
icon . paste ( crop_bottom , ( 0 , top + flag . size [ 1 ] + y ) )
return icon
def _pass ( flag ) :
return flag
def _resize ( flag , args ) :
icon = Image . new ( ' RGBA ' , ( flag . size [ 1 ] , flag . size [ 1 ] ) )
icon . paste ( flag . resize ( ( flag . size [ 1 ] , flag . size [ 1 ] ) , Image . ANTIALIAS ) )
return icon
def _slice ( flag , args ) :
icon = Image . new ( ' RGBA ' , ( flag . size [ 1 ] , flag . size [ 1 ] ) )
if len ( args ) == 0 :
args = [ int ( flag . size [ 0 ] / 4 ) ]
width = int ( flag . size [ 1 ] / 2 )
left = int ( args [ 0 ] - width / 2 )
right = int ( args [ 0 ] + width / 2 )
crop_left = flag . crop ( ( left , 0 , right , flag . size [ 1 ] ) )
crop_right = flag . crop ( ( flag . size [ 0 ] - right , 0 , flag . size [ 0 ] - left , flag . size [ 1 ] ) )
icon . paste ( crop_left , ( 0 , 0 ) )
icon . paste ( crop_right , ( width , 0 ) )
return icon
file = ' ../png/icons/1024/ ' + code + ' .png '
2011-09-09 16:40:04 +00:00
if not os . path . exists ( file ) or True :
2011-05-23 19:38:52 +00:00
flag = Image . open ( ' ../png/flags/2048/ ' + code + ' .png ' )
icon = None
functions = geo [ ' icon ' ] [ code ] . split ( ' # ' ) [ 0 ] . split ( ' ; ' )
if functions [ 0 ] : # remove later
for function in functions :
pos = function . find ( ' ( ' )
args = function [ pos + 1 : - 1 ] . split ( ' , ' )
if args [ 0 ] :
args = map ( lambda x : int ( x ) if x [ 0 ] in ' -0123456789 ' else x , args )
else :
args = [ ]
function = function [ : pos ]
if function == ' border ' :
icon = _border ( flag , args )
elif function == ' construct ' :
icon = _construct ( flag )
elif function == ' copy ' :
icon = _copy ( flag , args )
elif function == ' crop ' :
icon = _crop ( flag , args )
elif function == ' cross ' :
icon = _cross ( flag , args )
elif function == ' cut ' :
icon = _cut ( flag , args )
elif function == ' extend ' :
icon = _extend ( flag , args )
elif function == ' pass ' :
icon = _pass ( flag )
elif function == ' resize ' :
icon = _resize ( flag , args )
elif function == ' slice ' :
icon = _slice ( flag , args )
flag = icon
write_image ( file , icon . resize ( ( 1024 , 1024 ) , Image . ANTIALIAS ) )
else :
icon = Image . open ( file )
for width in [ 256 , 16 ] :
file = ' ../png/icons/ ' + str ( width ) + ' / ' + code + ' .png '
2011-09-09 16:40:04 +00:00
if not os . path . exists ( file ) or True :
2011-05-23 19:38:52 +00:00
write_image ( file , icon . resize ( ( width , width ) , Image . ANTIALIAS ) )
size = 256
mask_ = Image . open ( ' ../png/circle.png ' )
mask = Image . new ( ' L ' , ( mask_ . size [ 0 ] , mask_ . size [ 1 ] ) )
mask . paste ( mask_ , ( 0 , 0 ) )
for size in [ 256 , 32 ] :
marker = Image . new ( ' RGBA ' , ( size , size ) )
icon_ = icon . resize ( ( size , size ) , Image . ANTIALIAS )
mask_ = mask . resize ( ( size , size ) , Image . ANTIALIAS )
marker . paste ( icon_ , ( 0 , 0 ) , mask_ )
2011-09-09 16:40:04 +00:00
file = ' ../png/markers/ ' + str ( size ) + ' / ' + code + ' .png '
if not os . path . exists ( file ) or True :
write_image ( file , marker )
2011-05-23 19:38:52 +00:00
def get_oxjs_languages ( ) :
# geonames_countries = get_geonames_countries()
geonames_languages = get_geonames_languages ( )
imdb_languages = get_imdb_languages ( )
languages = [ ]
for geonames_language in geonames_languages :
language = {
' geonames.org ' : geonames_language
}
for imdb_language in imdb_languages :
if imdb_language [ ' code ' ] in [
language [ ' geonames.org ' ] [ ' ISO_639-1 ' ] ,
language [ ' geonames.org ' ] [ ' ISO_639-2 ' ] ,
language [ ' geonames.org ' ] [ ' ISO_639-3 ' ]
] :
language [ ' imdb.com ' ] = imdb_language
languages . append ( language )
break
write_json ( ' ../json/oxjs.org/languages.json ' , languages , True )
def make_tree ( places ) :
def get_node ( find , node = None ) :
if node and find ( node ) :
return node
nodes = tree if not node else node [ ' nodes ' ]
for node in nodes :
found = get_node ( find , node )
if found :
return found
def get_node_id ( place ) :
ids = map ( lambda x : place [ x [ ' key ' ] ] , geo [ ' levels ' ] )
ids = filter ( lambda x : x != ' ' and x != ' 00 ' , ids )
return ' - ' . join ( ids )
def get_parent_id ( node_id ) :
return ' - ' . join ( node_id . split ( ' - ' ) [ : - 1 ] )
tree = [ ]
for l , level in enumerate ( geo [ ' levels ' ] ) :
for place in places :
if place [ ' feature_code ' ] == level [ ' featureCode ' ] :
node = {
' geonameid ' : place [ ' geonameid ' ] ,
' name ' : place [ ' name ' ] ,
' node_id ' : get_node_id ( place ) ,
' nodes ' : [ ] ,
}
print node [ ' name ' ]
print node [ ' node_id ' ]
if l == 0 :
parent = tree
else :
parent_id = get_parent_id ( node [ ' node_id ' ] )
parent = get_node ( lambda x : x [ ' node_id ' ] == parent_id ) [ ' nodes ' ]
parent . append ( node )
parent = sorted ( parent , key = lambda x : x [ ' node_id ' ] )
write_json ( ' ../json/tree.json ' , tree )
print
return tree
def read_file ( file ) :
print ' reading ' , file
f = open ( file )
data = f . read ( )
f . close ( )
return data
def read_json ( file ) :
data = read_url ( file ) if file . startswith ( ' http:// ' ) else read_file ( file )
return json . loads ( data )
def read_table ( file , keys , drop = [ ] , filter = lambda x : True , sort = lambda x : x ) :
def parse_value ( str , t ) :
if type ( t ) == float :
str = float ( str ) if str else t
elif type ( t ) == int :
str = int ( str ) if str else t
return str
data = [ ]
if file . startswith ( ' http:// ' ) :
tmp_file = ' _tmp.data '
write_file ( tmp_file , read_url ( file , cache = False ) )
f = open ( tmp_file )
else :
print ' reading ' , file
f = open ( file )
for r , row in enumerate ( f ) :
if row and row [ 0 ] != ' # ' :
item = { }
cols = row [ : - 1 ] . split ( ' \t ' )
for c , col in enumerate ( cols ) :
key = keys [ c ]
if not key [ ' name ' ] in drop :
if type ( key [ ' type ' ] ) == list :
if col :
col = col . split ( ' , ' )
value = map ( lambda x : parse_value ( x , key [ ' type ' ] [ 0 ] ) , col )
else :
value = [ ]
else :
value = parse_value ( col , key [ ' type ' ] )
item [ key [ ' name ' ] ] = value
if filter ( item ) :
data . append ( item )
print data
return sorted ( data , key = sort )
def read_url ( url , cache = True ) :
print ' reading ' , url
data = ' '
if cache :
data = ox . cache . readUrl ( url )
else :
data = ox . net . readUrl ( url )
return data
def write_file ( file , data ) :
print ' writing ' , file
write_path ( file )
f = open ( file , ' w ' )
f . write ( data )
f . close ( )
return len ( data )
def write_image ( file , image ) :
print ' writing ' , file
write_path ( file )
image . save ( file )
def write_json ( file , data , reformat = False ) :
if reformat :
data = json . dumps ( data , sort_keys = True )
else :
data = json . dumps ( data , indent = 4 , sort_keys = True )
if reformat :
data = data . replace ( ' [ { ' , ' [ \n { ' )
data = data . replace ( ' }, { ' , ' }, \n { ' )
data = data . replace ( ' }] ' , ' } \n ] ' )
#data = re.sub('{\n\s+', '{', data)
#data = re.sub(', \n\s+"', ', "', data)
#data = re.sub('\n\s+}', '}', data)
#data = data.replace('{\n ', '{')
#data = data.replace(', \n "', ', "')
#data = data.replace('\n }', '}')
write_file ( file , data )
def write_log ( file , line ) :
if line == None :
data = ' '
elif not os . path . exists ( file ) :
data = line
else :
data = read_file ( file ) + ' \n ' + line
write_file ( file , data )
def write_path ( file ) :
path = os . path . split ( file ) [ 0 ]
if path and not os . path . exists ( path ) :
os . makedirs ( path )
import geonames
import imdb
import wikipedia
if __name__ == ' __main__ ' :
2011-09-09 16:40:04 +00:00
write_json ( ' ../json/geo.json ' , geo )
countries = get_countries ( )
sys . exit ( )
2011-05-29 12:22:54 +00:00
file = ' ../txt/geonames.org/cities1000.txt '
geo [ ' keys ' ] = read_json ( ' ../json/oxjs.org/geonames.keys.json ' )
keys = geo [ ' keys ' ] [ ' place ' ] [ ' geonames.org ' ]
drop = [ ' alternatenames ' ]
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
filter = lambda x : x [ ' feature_code ' ] == ' PPLC ' or x [ ' population ' ] > = 10000
sort = lambda x : - x [ ' population ' ]
data = read_table ( file , keys , drop = drop , filter = filter , sort = sort )
write_json ( ' ../json/geonames.org/cities10000.json ' , data , False )
print len ( data )
sys . exit ( )
2011-05-23 19:38:52 +00:00
cities = get_cities ( )
print cities , len ( cities )
sys . exit ( )
countries = get_countries ( )
print ' current independent ' , len ( filter (
lambda x : not len ( x [ ' dependency ' ] ) and not x [ ' former ' ] and not x [ ' other ' ] , countries )
)
print ' former independent ' , len ( filter (
lambda x : not len ( x [ ' dependency ' ] ) and x [ ' former ' ] and not x [ ' other ' ] , countries )
)
print ' current dependency ' , len ( filter (
lambda x : len ( x [ ' dependency ' ] ) and not x [ ' former ' ] and not x [ ' other ' ] , countries )
)
print ' former dependency ' , len ( filter (
lambda x : len ( x [ ' dependency ' ] ) and x [ ' former ' ] and not x [ ' other ' ] , countries )
)
print ' other ' , len ( filter (
lambda x : x [ ' other ' ] , countries )
)
sys . exit ( )
file = ' ../txt/geonames.org/cities1000.txt '
keys = geo [ ' keys ' ] [ ' place ' ] [ ' geonames.org ' ]
drop = [ ' alternatenames ' ]
# filter = lambda x: x['feature_code'] != 'PPLX' and x['population'] >= 100000
filter = lambda x : x [ ' feature_code ' ] == ' PPLC ' or x [ ' population ' ] > = 100000
sort = lambda x : - x [ ' population ' ]
data = read_table ( file , keys , drop = drop , filter = filter , sort = sort )
write_json ( ' ../json/geonames.org/cities100000.json ' , data , False )
print len ( data )
get_oxjs_languages ( )
get_geonames_countries ( )
get_geonames_languages ( )
get_imdb_countries ( )
get_imdb_languages ( )
sys . exit ( )
#places = read_json('../json/geonames.org/DE.A.json')
#tree = make_tree(places)
#write_json('../json/geonames.org/tree.DE.json', tree)
file = ' ../txt/geonames.org/countryInfo.txt '
keys = geo [ ' keys ' ] [ ' country ' ] [ ' geonames.org ' ]
sort = lambda x : - x [ ' Population ' ]
data = read_table ( file , keys , sort = sort )
write_json ( ' ../json/ ' + file [ 7 : - 4 ] + ' .json ' , data )
print len ( data )
file = ' ../txt/geonames.org/iso-languagecodes.txt '
keys = geo [ ' keys ' ] [ ' language ' ] [ ' geonames.org ' ]
sort = lambda x : x [ ' Language_Name ' ]
data = read_table ( file , keys , sort = sort )
write_json ( ' ../json/ ' + file [ 7 : - 4 ] + ' .json ' , data )
print len ( data )
"""
languages = read_json ( ' ../json/geonames.org/iso-languagecodes.json ' )
countries = read_json ( ' ../json/geonames.org/countryInfo.json ' )
max_languages = max ( map ( lambda x : len ( x [ ' Languages ' ] ) , countries ) )
language_countries = { }
for language in languages :
language_name = language [ ' Language_Name ' ]
language_iso = [ language [ ' ISO_639-3 ' ] , language [ ' ISO_639-2 ' ] , language [ ' ISO_639-1 ' ] ]
found = False
for i in range ( max_languages ) :
for country in countries :
if len ( country [ ' Languages ' ] ) > i :
country_language = country [ ' Languages ' ] [ i ] . split ( ' - ' ) [ 0 ]
if country_language in language_iso :
if not language_name in language_countries :
language_countries [ language_name ] = [ ]
language_countries [ language_name ] . append ( {
' country ' : country [ ' Country ' ] ,
' population ' : country [ ' Population ' ] ,
' position ' : i + 1
} )
write_json ( ' ../json/geonames.org/languageCountriesAll.json ' , language_countries )
for language , countries in language_countries . iteritems ( ) :
language_countries [ language ] = countries [ 0 ] [ ' country ' ]
language_countries [ ' English ' ] = ' United Kingdom '
language_countries [ ' French ' ] = ' France '
language_countries [ ' Portuguese ' ] = ' Portugal '
language_countries [ ' Spanish ' ] = ' Spain '
write_json ( ' ../json/geonames.org/languageCountries.json ' , language_countries )
"""
file = ' ../txt/geonames.org/cities15000.txt '
keys = geo [ ' keys ' ] [ ' place ' ] [ ' geonames.org ' ]
filter = lambda x : x [ ' population ' ] > = 100000
sort = lambda x : - x [ ' population ' ]
data = read_table ( file , keys , filter = filter , sort = sort )
write_json ( ' ../json/geonames.org/cities100000.json ' , data )
print len ( data )
file = ' ../txt/geonames.org/DE.txt '
keys = geo [ ' keys ' ] [ ' place ' ] [ ' geonames.org ' ]
filter = lambda x : x [ ' feature_class ' ] == ' A ' or x [ ' feature_code ' ] in [
' PPLA ' , ' PPLA2 ' , ' PPLA2 ' , ' PPLA4 ' , ' PPLC ' , ' PPLG '
]
sort = lambda x : - x [ ' population ' ]
data = read_table ( file , keys , filter = filter , sort = sort )
write_json ( ' ../json/geonames.org/DE.A,P.json ' , data )
print len ( data )