update Geo module
This commit is contained in:
parent
5a8d836233
commit
a7c50d840b
1 changed files with 117 additions and 20 deletions
|
@ -8,11 +8,20 @@ import sys
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
CACHE = sys.argv[-1] == '-cache'
|
CACHE = sys.argv[-1] == '-cache'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
|
DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
|
||||||
except:
|
except:
|
||||||
|
print 'parse error, see jsonc/debug.json'
|
||||||
ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
|
ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
GEO = {}
|
||||||
|
for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
|
||||||
|
GEO[country['code']] = {}
|
||||||
|
for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
|
||||||
|
GEO[country['code']][key] = country[key]
|
||||||
|
|
||||||
LOGS = {}
|
LOGS = {}
|
||||||
|
|
||||||
def decode_wikipedia_id(id):
|
def decode_wikipedia_id(id):
|
||||||
|
@ -77,12 +86,20 @@ def get_countries():
|
||||||
# Country data
|
# Country data
|
||||||
countries = sorted(countries, key=sort)
|
countries = sorted(countries, key=sort)
|
||||||
countries = map(lambda x: get_country_data(x), countries)
|
countries = map(lambda x: get_country_data(x), countries)
|
||||||
|
# Independence
|
||||||
|
for i, country in enumerate(countries):
|
||||||
|
if 'created' in country and not 'dependency' in country:
|
||||||
|
name = country['created']['country'][0]
|
||||||
|
data = filter(lambda x: x['name'] == name, countries)[0]
|
||||||
|
if 'dependency' in data:
|
||||||
|
countries[i]['independence'] = {
|
||||||
|
'country': data['dependency'],
|
||||||
|
'date': country['created']['date']
|
||||||
|
}
|
||||||
|
sys.exit()
|
||||||
# Flags
|
# Flags
|
||||||
countries = sorted(countries, key=sort)
|
countries = sorted(countries, key=sort)
|
||||||
flags = get_flags(countries)
|
flags = get_flags(countries)
|
||||||
for country in countries:
|
|
||||||
if country['code'] in flags:
|
|
||||||
country['flag'] = flags[country['code']]
|
|
||||||
return countries
|
return countries
|
||||||
|
|
||||||
def get_country_data(country):
|
def get_country_data(country):
|
||||||
|
@ -97,9 +114,6 @@ def get_country_data(country):
|
||||||
match = re.search('"/wiki/\.(\w{2})"', html)
|
match = re.search('"/wiki/\.(\w{2})"', html)
|
||||||
if match:
|
if match:
|
||||||
country['code'] = match.group(1).upper()
|
country['code'] = match.group(1).upper()
|
||||||
# alias
|
|
||||||
if country['code'] in DATA['alias']:
|
|
||||||
country['alias'] = True
|
|
||||||
# continents and regions
|
# continents and regions
|
||||||
for continent, regions in DATA['continents'].iteritems():
|
for continent, regions in DATA['continents'].iteritems():
|
||||||
for region, countries in regions.iteritems():
|
for region, countries in regions.iteritems():
|
||||||
|
@ -107,19 +121,9 @@ def get_country_data(country):
|
||||||
country['continent'] = continent
|
country['continent'] = continent
|
||||||
country['region'] = region
|
country['region'] = region
|
||||||
break
|
break
|
||||||
# created and dissolved
|
# created
|
||||||
if name in DATA['created']:
|
if name in DATA['created']:
|
||||||
country['created'] = DATA['created'][name]
|
country['created'] = DATA['created'][name]
|
||||||
if name in DATA['dissolved']:
|
|
||||||
country['dissolved'] = DATA['dissolved'][name]
|
|
||||||
for c, d in DATA['dissolved'].iteritems():
|
|
||||||
if d['dissolved'] in ['merged', 'split']:
|
|
||||||
cs = d['country'] if type(d['country']) == list else [d['country']]
|
|
||||||
if name in cs:
|
|
||||||
country['created'] = {
|
|
||||||
'country': c,
|
|
||||||
'date': d['date']
|
|
||||||
}
|
|
||||||
# dependencies
|
# dependencies
|
||||||
for c, d in DATA['dependencies'].iteritems():
|
for c, d in DATA['dependencies'].iteritems():
|
||||||
c = c.split(', ')
|
c = c.split(', ')
|
||||||
|
@ -134,6 +138,12 @@ def get_country_data(country):
|
||||||
country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
|
country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
|
||||||
elif name in d:
|
elif name in d:
|
||||||
country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
|
country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
|
||||||
|
# dissolved
|
||||||
|
if name in DATA['dissolved']:
|
||||||
|
country['dissolved'] = DATA['dissolved'][name]
|
||||||
|
# exception
|
||||||
|
if country['code'] in DATA['exception']:
|
||||||
|
country['exception'] = True
|
||||||
# flag
|
# flag
|
||||||
if name in DATA['flag']:
|
if name in DATA['flag']:
|
||||||
file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
|
file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
|
||||||
|
@ -150,6 +160,9 @@ def get_country_data(country):
|
||||||
# imdb
|
# imdb
|
||||||
if name in DATA['imdb']:
|
if name in DATA['imdb']:
|
||||||
country['imdbName'] = DATA['imdb'][name]
|
country['imdbName'] = DATA['imdb'][name]
|
||||||
|
# independence
|
||||||
|
if name in DATA['independence']:
|
||||||
|
country['independence'] = DATA['independence'][name]
|
||||||
# languages
|
# languages
|
||||||
for language, c in DATA['languages'].iteritems():
|
for language, c in DATA['languages'].iteritems():
|
||||||
if c == name:
|
if c == name:
|
||||||
|
@ -157,6 +170,10 @@ def get_country_data(country):
|
||||||
country['languages'] = [language]
|
country['languages'] = [language]
|
||||||
else:
|
else:
|
||||||
country['languages'].append(language)
|
country['languages'].append(language)
|
||||||
|
# area, lat, lng, south, west, north, east
|
||||||
|
if country['code'] in GEO:
|
||||||
|
for key in GEO[country['code']]:
|
||||||
|
country[key] = GEO[country['code']][key]
|
||||||
return country
|
return country
|
||||||
|
|
||||||
def get_flag(id):
|
def get_flag(id):
|
||||||
|
@ -238,6 +255,77 @@ def get_imdb_languages():
|
||||||
LOGS['new languages'].append(language)
|
LOGS['new languages'].append(language)
|
||||||
ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
|
ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
def parse_txt():
|
||||||
|
data = {
|
||||||
|
'created': {},
|
||||||
|
'dissolved': {},
|
||||||
|
'independence': {}
|
||||||
|
}
|
||||||
|
f = open('../txt/countries.txt')
|
||||||
|
lines = map(lambda x: x.strip(), f.readlines())
|
||||||
|
f.close()
|
||||||
|
for line in filter(lambda x: x[0] != '#', lines):
|
||||||
|
date, country_a, operator, country_b = re.compile(
|
||||||
|
'([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
|
||||||
|
).match(line).groups()
|
||||||
|
countries_a = country_a.split(' / ')
|
||||||
|
countries_b = country_b.split(' / ')
|
||||||
|
if operator == '*':
|
||||||
|
data['independence'][country_b] = {
|
||||||
|
'country': countries_a,
|
||||||
|
'date': date
|
||||||
|
}
|
||||||
|
elif operator == '=':
|
||||||
|
data['dissolved'][country_a] = {
|
||||||
|
'country': countries_b,
|
||||||
|
'date': date,
|
||||||
|
'dissolved': 'renamed'
|
||||||
|
}
|
||||||
|
data['created'][country_b] = {
|
||||||
|
'country': countries_a,
|
||||||
|
'date': date,
|
||||||
|
'created': 'renamed'
|
||||||
|
}
|
||||||
|
elif operator == '+':
|
||||||
|
for country in countries_a:
|
||||||
|
data['dissolved'][country] = {
|
||||||
|
'country': countries_b,
|
||||||
|
'date': date,
|
||||||
|
'dissolved': 'joined'
|
||||||
|
}
|
||||||
|
elif operator == '-':
|
||||||
|
for country in countries_b:
|
||||||
|
data['created'][country] = {
|
||||||
|
'country': countries_a,
|
||||||
|
'date': date,
|
||||||
|
'created': 'split'
|
||||||
|
}
|
||||||
|
elif operator == '>':
|
||||||
|
for country in countries_a:
|
||||||
|
data['dissolved'][country] = {
|
||||||
|
'country': countries_b,
|
||||||
|
'date': date,
|
||||||
|
'dissolved': 'merged'
|
||||||
|
}
|
||||||
|
data['created'][country_b] = {
|
||||||
|
'country': countries_a,
|
||||||
|
'date': date,
|
||||||
|
'created': 'merged'
|
||||||
|
}
|
||||||
|
elif operator == '<':
|
||||||
|
data['dissolved'][country_a] = {
|
||||||
|
'country': countries_b,
|
||||||
|
'date': date,
|
||||||
|
'dissolved': 'split'
|
||||||
|
}
|
||||||
|
for country in countries_b:
|
||||||
|
data['created'][country] = {
|
||||||
|
'country': countries_a,
|
||||||
|
'date': date,
|
||||||
|
'created': 'merged'
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
|
||||||
def read_url(url):
|
def read_url(url):
|
||||||
print 'reading', url
|
print 'reading', url
|
||||||
return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
|
return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
|
||||||
|
@ -252,6 +340,10 @@ def read_wikipedia_url(id):
|
||||||
return html
|
return html
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
data = parse_txt()
|
||||||
|
DATA['created'] = data['created']
|
||||||
|
DATA['dissolved'] = data['dissolved']
|
||||||
|
DATA['independence'] = data['independence']
|
||||||
countries = get_countries()
|
countries = get_countries()
|
||||||
ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
|
ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
|
||||||
LOGS['total'] = len(countries)
|
LOGS['total'] = len(countries)
|
||||||
|
@ -260,14 +352,19 @@ if __name__ == '__main__':
|
||||||
LOGS['current independent'] = 0
|
LOGS['current independent'] = 0
|
||||||
LOGS['current dependent'] = 0
|
LOGS['current dependent'] = 0
|
||||||
LOGS['current disputed'] = 0
|
LOGS['current disputed'] = 0
|
||||||
|
LOGS['current exception'] = 0
|
||||||
LOGS['dissolved independent'] = 0
|
LOGS['dissolved independent'] = 0
|
||||||
LOGS['dissolved dependent'] = 0
|
LOGS['dissolved dependent'] = 0
|
||||||
LOGS['dissolved disputed'] = 0
|
LOGS['dissolved disputed'] = 0
|
||||||
LOGS['alias'] = 0
|
LOGS['dissolved exception'] = 0
|
||||||
for country in countries:
|
for country in countries:
|
||||||
key = 'alias' if 'alias' in country else ' '.join([
|
key = ' '.join([
|
||||||
'dissolved' if 'dissolved' in country else 'current',
|
'dissolved' if 'dissolved' in country else 'current',
|
||||||
'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent')
|
'exception' if 'exception' in country else (
|
||||||
|
'disputed' if 'disputed' in country else (
|
||||||
|
'dependent' if 'dependency' in country else 'independent'
|
||||||
|
)
|
||||||
|
)
|
||||||
])
|
])
|
||||||
LOGS[key] += 1
|
LOGS[key] += 1
|
||||||
get_imdb_countries(countries)
|
get_imdb_countries(countries)
|
||||||
|
|
Loading…
Reference in a new issue