74 lines
2.5 KiB
Python
74 lines
2.5 KiB
Python
|
import json
|
||
|
import math
|
||
|
import re
|
||
|
from ox.geo import get_country
|
||
|
|
||
|
def read_table(source, target, keys, filter_=lambda x: True, map_=lambda x: x, sort_=lambda x: x):
|
||
|
def parse_value(str, t):
|
||
|
if type(t) == float:
|
||
|
str = float(str) if str else t
|
||
|
elif type(t) == int:
|
||
|
str = int(str) if str else t
|
||
|
return str
|
||
|
data = []
|
||
|
f = open(source)
|
||
|
for r, row in enumerate(f):
|
||
|
if row and row[0] != '#':
|
||
|
item = {}
|
||
|
cols = row[:-1].split('\t')
|
||
|
for c, col in enumerate(cols):
|
||
|
key = keys[c]
|
||
|
if type(key['type']) == list:
|
||
|
if col:
|
||
|
col = col.split(',')
|
||
|
value = map(lambda x: parse_value(x, key['type'][0]), col)
|
||
|
else:
|
||
|
value = []
|
||
|
else:
|
||
|
value = parse_value(col, key['type'])
|
||
|
item[key['name']] = value
|
||
|
if filter_(item):
|
||
|
data.append(map_(item))
|
||
|
data = sorted(data, key=sort_)
|
||
|
f = open(target, 'w')
|
||
|
f.write(json.dumps(data, indent=4, sort_keys=True))
|
||
|
f.close()
|
||
|
print len(data), 'cities'
|
||
|
|
||
|
# http://download.geonames.org/export/dump/
|
||
|
# http://www.geonames.org/export/codes.html
|
||
|
source = '../txt/cities1000.txt'
|
||
|
target = '../json/cities.json'
|
||
|
keys = [
|
||
|
{'name': 'geonameid', 'type': 0},
|
||
|
{'name': 'name', 'type': ''},
|
||
|
{'name': 'asciiname', 'type': ''},
|
||
|
{'name': 'alternatenames', 'type': ['']},
|
||
|
{'name': 'latitude', 'type': 0.0},
|
||
|
{'name': 'longitude', 'type': 0.0},
|
||
|
{'name': 'feature_class', 'type': ''},
|
||
|
{'name': 'feature_code', 'type': ''},
|
||
|
{'name': 'country_code', 'type': ''},
|
||
|
{'name': 'cc2', 'type': ['']},
|
||
|
{'name': 'admin1_code', 'type': ''},
|
||
|
{'name': 'admin2_code', 'type': ''},
|
||
|
{'name': 'admin3_code', 'type': ''},
|
||
|
{'name': 'admin4_code', 'type': ''},
|
||
|
{'name': 'population', 'type': 0},
|
||
|
{'name': 'elevation', 'type': 0},
|
||
|
{'name': 'gtopo30', 'type': 0},
|
||
|
{'name': 'timezone', 'type': ''},
|
||
|
{'name': 'modification_date', 'type': ''}
|
||
|
]
|
||
|
filter_ = lambda x: re.search('^PPL(C|A)$', x['feature_code']) or x['population'] >= 49589
|
||
|
def map_(x):
|
||
|
data = {}
|
||
|
for key in [
|
||
|
'country_code', 'elevation', 'feature_code'
|
||
|
'latitude', 'longitude', 'name', 'population'
|
||
|
]:
|
||
|
data[key] = x[key]
|
||
|
return data
|
||
|
sort_ = lambda x: -x['population']
|
||
|
|
||
|
read_table(source, target, keys, filter_, map_, sort_)
|