update Geo module
This commit is contained in:
parent
5a8d836233
commit
a7c50d840b
1 changed files with 117 additions and 20 deletions
|
@ -8,11 +8,20 @@ import sys
|
|||
import urllib
|
||||
|
||||
CACHE = sys.argv[-1] == '-cache'
|
||||
|
||||
try:
|
||||
DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
|
||||
except:
|
||||
print 'parse error, see jsonc/debug.json'
|
||||
ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
|
||||
sys.exit()
|
||||
|
||||
GEO = {}
|
||||
for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
|
||||
GEO[country['code']] = {}
|
||||
for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
|
||||
GEO[country['code']][key] = country[key]
|
||||
|
||||
LOGS = {}
|
||||
|
||||
def decode_wikipedia_id(id):
|
||||
|
@ -77,12 +86,20 @@ def get_countries():
|
|||
# Country data
|
||||
countries = sorted(countries, key=sort)
|
||||
countries = map(lambda x: get_country_data(x), countries)
|
||||
# Independence
|
||||
for i, country in enumerate(countries):
|
||||
if 'created' in country and not 'dependency' in country:
|
||||
name = country['created']['country'][0]
|
||||
data = filter(lambda x: x['name'] == name, countries)[0]
|
||||
if 'dependency' in data:
|
||||
countries[i]['independence'] = {
|
||||
'country': data['dependency'],
|
||||
'date': country['created']['date']
|
||||
}
|
||||
sys.exit()
|
||||
# Flags
|
||||
countries = sorted(countries, key=sort)
|
||||
flags = get_flags(countries)
|
||||
for country in countries:
|
||||
if country['code'] in flags:
|
||||
country['flag'] = flags[country['code']]
|
||||
return countries
|
||||
|
||||
def get_country_data(country):
|
||||
|
@ -97,9 +114,6 @@ def get_country_data(country):
|
|||
match = re.search('"/wiki/\.(\w{2})"', html)
|
||||
if match:
|
||||
country['code'] = match.group(1).upper()
|
||||
# alias
|
||||
if country['code'] in DATA['alias']:
|
||||
country['alias'] = True
|
||||
# continents and regions
|
||||
for continent, regions in DATA['continents'].iteritems():
|
||||
for region, countries in regions.iteritems():
|
||||
|
@ -107,19 +121,9 @@ def get_country_data(country):
|
|||
country['continent'] = continent
|
||||
country['region'] = region
|
||||
break
|
||||
# created and dissolved
|
||||
# created
|
||||
if name in DATA['created']:
|
||||
country['created'] = DATA['created'][name]
|
||||
if name in DATA['dissolved']:
|
||||
country['dissolved'] = DATA['dissolved'][name]
|
||||
for c, d in DATA['dissolved'].iteritems():
|
||||
if d['dissolved'] in ['merged', 'split']:
|
||||
cs = d['country'] if type(d['country']) == list else [d['country']]
|
||||
if name in cs:
|
||||
country['created'] = {
|
||||
'country': c,
|
||||
'date': d['date']
|
||||
}
|
||||
# dependencies
|
||||
for c, d in DATA['dependencies'].iteritems():
|
||||
c = c.split(', ')
|
||||
|
@ -134,6 +138,12 @@ def get_country_data(country):
|
|||
country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
|
||||
elif name in d:
|
||||
country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
|
||||
# dissolved
|
||||
if name in DATA['dissolved']:
|
||||
country['dissolved'] = DATA['dissolved'][name]
|
||||
# exception
|
||||
if country['code'] in DATA['exception']:
|
||||
country['exception'] = True
|
||||
# flag
|
||||
if name in DATA['flag']:
|
||||
file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
|
||||
|
@ -150,6 +160,9 @@ def get_country_data(country):
|
|||
# imdb
|
||||
if name in DATA['imdb']:
|
||||
country['imdbName'] = DATA['imdb'][name]
|
||||
# independence
|
||||
if name in DATA['independence']:
|
||||
country['independence'] = DATA['independence'][name]
|
||||
# languages
|
||||
for language, c in DATA['languages'].iteritems():
|
||||
if c == name:
|
||||
|
@ -157,6 +170,10 @@ def get_country_data(country):
|
|||
country['languages'] = [language]
|
||||
else:
|
||||
country['languages'].append(language)
|
||||
# area, lat, lng, south, west, north, east
|
||||
if country['code'] in GEO:
|
||||
for key in GEO[country['code']]:
|
||||
country[key] = GEO[country['code']][key]
|
||||
return country
|
||||
|
||||
def get_flag(id):
|
||||
|
@ -238,6 +255,77 @@ def get_imdb_languages():
|
|||
LOGS['new languages'].append(language)
|
||||
ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
|
||||
|
||||
def parse_txt():
|
||||
data = {
|
||||
'created': {},
|
||||
'dissolved': {},
|
||||
'independence': {}
|
||||
}
|
||||
f = open('../txt/countries.txt')
|
||||
lines = map(lambda x: x.strip(), f.readlines())
|
||||
f.close()
|
||||
for line in filter(lambda x: x[0] != '#', lines):
|
||||
date, country_a, operator, country_b = re.compile(
|
||||
'([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
|
||||
).match(line).groups()
|
||||
countries_a = country_a.split(' / ')
|
||||
countries_b = country_b.split(' / ')
|
||||
if operator == '*':
|
||||
data['independence'][country_b] = {
|
||||
'country': countries_a,
|
||||
'date': date
|
||||
}
|
||||
elif operator == '=':
|
||||
data['dissolved'][country_a] = {
|
||||
'country': countries_b,
|
||||
'date': date,
|
||||
'dissolved': 'renamed'
|
||||
}
|
||||
data['created'][country_b] = {
|
||||
'country': countries_a,
|
||||
'date': date,
|
||||
'created': 'renamed'
|
||||
}
|
||||
elif operator == '+':
|
||||
for country in countries_a:
|
||||
data['dissolved'][country] = {
|
||||
'country': countries_b,
|
||||
'date': date,
|
||||
'dissolved': 'joined'
|
||||
}
|
||||
elif operator == '-':
|
||||
for country in countries_b:
|
||||
data['created'][country] = {
|
||||
'country': countries_a,
|
||||
'date': date,
|
||||
'created': 'split'
|
||||
}
|
||||
elif operator == '>':
|
||||
for country in countries_a:
|
||||
data['dissolved'][country] = {
|
||||
'country': countries_b,
|
||||
'date': date,
|
||||
'dissolved': 'merged'
|
||||
}
|
||||
data['created'][country_b] = {
|
||||
'country': countries_a,
|
||||
'date': date,
|
||||
'created': 'merged'
|
||||
}
|
||||
elif operator == '<':
|
||||
data['dissolved'][country_a] = {
|
||||
'country': countries_b,
|
||||
'date': date,
|
||||
'dissolved': 'split'
|
||||
}
|
||||
for country in countries_b:
|
||||
data['created'][country] = {
|
||||
'country': countries_a,
|
||||
'date': date,
|
||||
'created': 'merged'
|
||||
}
|
||||
return data
|
||||
|
||||
def read_url(url):
|
||||
print 'reading', url
|
||||
return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
|
||||
|
@ -252,6 +340,10 @@ def read_wikipedia_url(id):
|
|||
return html
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = parse_txt()
|
||||
DATA['created'] = data['created']
|
||||
DATA['dissolved'] = data['dissolved']
|
||||
DATA['independence'] = data['independence']
|
||||
countries = get_countries()
|
||||
ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
|
||||
LOGS['total'] = len(countries)
|
||||
|
@ -260,14 +352,19 @@ if __name__ == '__main__':
|
|||
LOGS['current independent'] = 0
|
||||
LOGS['current dependent'] = 0
|
||||
LOGS['current disputed'] = 0
|
||||
LOGS['current exception'] = 0
|
||||
LOGS['dissolved independent'] = 0
|
||||
LOGS['dissolved dependent'] = 0
|
||||
LOGS['dissolved disputed'] = 0
|
||||
LOGS['alias'] = 0
|
||||
LOGS['dissolved exception'] = 0
|
||||
for country in countries:
|
||||
key = 'alias' if 'alias' in country else ' '.join([
|
||||
key = ' '.join([
|
||||
'dissolved' if 'dissolved' in country else 'current',
|
||||
'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent')
|
||||
'exception' if 'exception' in country else (
|
||||
'disputed' if 'disputed' in country else (
|
||||
'dependent' if 'dependency' in country else 'independent'
|
||||
)
|
||||
)
|
||||
])
|
||||
LOGS[key] += 1
|
||||
get_imdb_countries(countries)
|
||||
|
|
Loading…
Reference in a new issue