update Geo module

This commit is contained in:
rolux 2012-04-02 23:13:05 +02:00
parent 5a8d836233
commit a7c50d840b

View file

@ -8,11 +8,20 @@ import sys
import urllib import urllib
CACHE = sys.argv[-1] == '-cache' CACHE = sys.argv[-1] == '-cache'
try: try:
DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc')) DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
except: except:
print 'parse error, see jsonc/debug.json'
ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc'))) ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
sys.exit() sys.exit()
GEO = {}
for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
GEO[country['code']] = {}
for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
GEO[country['code']][key] = country[key]
LOGS = {} LOGS = {}
def decode_wikipedia_id(id): def decode_wikipedia_id(id):
@ -77,12 +86,20 @@ def get_countries():
# Country data # Country data
countries = sorted(countries, key=sort) countries = sorted(countries, key=sort)
countries = map(lambda x: get_country_data(x), countries) countries = map(lambda x: get_country_data(x), countries)
# Independence
for i, country in enumerate(countries):
if 'created' in country and not 'dependency' in country:
name = country['created']['country'][0]
data = filter(lambda x: x['name'] == name, countries)[0]
if 'dependency' in data:
countries[i]['independence'] = {
'country': data['dependency'],
'date': country['created']['date']
}
sys.exit()
# Flags # Flags
countries = sorted(countries, key=sort) countries = sorted(countries, key=sort)
flags = get_flags(countries) flags = get_flags(countries)
for country in countries:
if country['code'] in flags:
country['flag'] = flags[country['code']]
return countries return countries
def get_country_data(country): def get_country_data(country):
@ -97,9 +114,6 @@ def get_country_data(country):
match = re.search('"/wiki/\.(\w{2})"', html) match = re.search('"/wiki/\.(\w{2})"', html)
if match: if match:
country['code'] = match.group(1).upper() country['code'] = match.group(1).upper()
# alias
if country['code'] in DATA['alias']:
country['alias'] = True
# continents and regions # continents and regions
for continent, regions in DATA['continents'].iteritems(): for continent, regions in DATA['continents'].iteritems():
for region, countries in regions.iteritems(): for region, countries in regions.iteritems():
@ -107,19 +121,9 @@ def get_country_data(country):
country['continent'] = continent country['continent'] = continent
country['region'] = region country['region'] = region
break break
# created and dissolved # created
if name in DATA['created']: if name in DATA['created']:
country['created'] = DATA['created'][name] country['created'] = DATA['created'][name]
if name in DATA['dissolved']:
country['dissolved'] = DATA['dissolved'][name]
for c, d in DATA['dissolved'].iteritems():
if d['dissolved'] in ['merged', 'split']:
cs = d['country'] if type(d['country']) == list else [d['country']]
if name in cs:
country['created'] = {
'country': c,
'date': d['date']
}
# dependencies # dependencies
for c, d in DATA['dependencies'].iteritems(): for c, d in DATA['dependencies'].iteritems():
c = c.split(', ') c = c.split(', ')
@ -134,6 +138,12 @@ def get_country_data(country):
country['disputes'] = d if not 'disputes' in country else country['disputes'] + d country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
elif name in d: elif name in d:
country['disputed'] = c if not 'disputed' in country else country['disputed'] + c country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
# dissolved
if name in DATA['dissolved']:
country['dissolved'] = DATA['dissolved'][name]
# exception
if country['code'] in DATA['exception']:
country['exception'] = True
# flag # flag
if name in DATA['flag']: if name in DATA['flag']:
file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg' file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
@ -150,6 +160,9 @@ def get_country_data(country):
# imdb # imdb
if name in DATA['imdb']: if name in DATA['imdb']:
country['imdbName'] = DATA['imdb'][name] country['imdbName'] = DATA['imdb'][name]
# independence
if name in DATA['independence']:
country['independence'] = DATA['independence'][name]
# languages # languages
for language, c in DATA['languages'].iteritems(): for language, c in DATA['languages'].iteritems():
if c == name: if c == name:
@ -157,6 +170,10 @@ def get_country_data(country):
country['languages'] = [language] country['languages'] = [language]
else: else:
country['languages'].append(language) country['languages'].append(language)
# area, lat, lng, south, west, north, east
if country['code'] in GEO:
for key in GEO[country['code']]:
country[key] = GEO[country['code']][key]
return country return country
def get_flag(id): def get_flag(id):
@ -238,6 +255,77 @@ def get_imdb_languages():
LOGS['new languages'].append(language) LOGS['new languages'].append(language)
ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True) ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
def parse_txt():
data = {
'created': {},
'dissolved': {},
'independence': {}
}
f = open('../txt/countries.txt')
lines = map(lambda x: x.strip(), f.readlines())
f.close()
for line in filter(lambda x: x[0] != '#', lines):
date, country_a, operator, country_b = re.compile(
'([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
).match(line).groups()
countries_a = country_a.split(' / ')
countries_b = country_b.split(' / ')
if operator == '*':
data['independence'][country_b] = {
'country': countries_a,
'date': date
}
elif operator == '=':
data['dissolved'][country_a] = {
'country': countries_b,
'date': date,
'dissolved': 'renamed'
}
data['created'][country_b] = {
'country': countries_a,
'date': date,
'created': 'renamed'
}
elif operator == '+':
for country in countries_a:
data['dissolved'][country] = {
'country': countries_b,
'date': date,
'dissolved': 'joined'
}
elif operator == '-':
for country in countries_b:
data['created'][country] = {
'country': countries_a,
'date': date,
'created': 'split'
}
elif operator == '>':
for country in countries_a:
data['dissolved'][country] = {
'country': countries_b,
'date': date,
'dissolved': 'merged'
}
data['created'][country_b] = {
'country': countries_a,
'date': date,
'created': 'merged'
}
elif operator == '<':
data['dissolved'][country_a] = {
'country': countries_b,
'date': date,
'dissolved': 'split'
}
for country in countries_b:
data['created'][country] = {
'country': countries_a,
'date': date,
'created': 'merged'
}
return data
def read_url(url): def read_url(url):
print 'reading', url print 'reading', url
return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url) return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
@ -252,6 +340,10 @@ def read_wikipedia_url(id):
return html return html
if __name__ == '__main__': if __name__ == '__main__':
data = parse_txt()
DATA['created'] = data['created']
DATA['dissolved'] = data['dissolved']
DATA['independence'] = data['independence']
countries = get_countries() countries = get_countries()
ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True) ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
LOGS['total'] = len(countries) LOGS['total'] = len(countries)
@ -260,14 +352,19 @@ if __name__ == '__main__':
LOGS['current independent'] = 0 LOGS['current independent'] = 0
LOGS['current dependent'] = 0 LOGS['current dependent'] = 0
LOGS['current disputed'] = 0 LOGS['current disputed'] = 0
LOGS['current exception'] = 0
LOGS['dissolved independent'] = 0 LOGS['dissolved independent'] = 0
LOGS['dissolved dependent'] = 0 LOGS['dissolved dependent'] = 0
LOGS['dissolved disputed'] = 0 LOGS['dissolved disputed'] = 0
LOGS['alias'] = 0 LOGS['dissolved exception'] = 0
for country in countries: for country in countries:
key = 'alias' if 'alias' in country else ' '.join([ key = ' '.join([
'dissolved' if 'dissolved' in country else 'current', 'dissolved' if 'dissolved' in country else 'current',
'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent') 'exception' if 'exception' in country else (
'disputed' if 'disputed' in country else (
'dependent' if 'dependency' in country else 'independent'
)
)
]) ])
LOGS[key] += 1 LOGS[key] += 1
get_imdb_countries(countries) get_imdb_countries(countries)