import json
import math
import re
from ox.geo import get_country

def read_table(source, target, keys, filter_=lambda x: True, map_=lambda x: x, sort_=lambda x: x):
    def parse_value(str, t):
        if type(t) == float:
            str = float(str) if str else t
        elif type(t) == int:
            str = int(str) if str else t
        return str
    data = []
    f = open(source)
    for r, row in enumerate(f):
        if row and row[0] != '#':
            item = {}
            cols = row[:-1].split('\t')
            for c, col in enumerate(cols):
                key = keys[c]
                if type(key['type']) == list:
                    if col:
                        col = col.split(',')
                        value = map(lambda x: parse_value(x, key['type'][0]), col)
                    else:
                        value = []
                else:
                    value = parse_value(col, key['type'])
                item[key['name']] = value
            if filter_(item):
                data.append(map_(item))
    data = sorted(data, key=sort_)
    f = open(target, 'w')
    f.write(json.dumps(data, indent=4, sort_keys=True))
    f.close()
    print len(data), 'cities'

# http://download.geonames.org/export/dump/
# http://www.geonames.org/export/codes.html
source = '../txt/cities1000.txt'
target = '../json/cities.json'
keys = [
    {'name': 'geonameid', 'type': 0},
    {'name': 'name', 'type': ''},
    {'name': 'asciiname', 'type': ''},
    {'name': 'alternatenames', 'type': ['']},
    {'name': 'latitude', 'type': 0.0},
    {'name': 'longitude', 'type': 0.0},
    {'name': 'feature_class', 'type': ''},
    {'name': 'feature_code', 'type': ''},
    {'name': 'country_code', 'type': ''},
    {'name': 'cc2', 'type': ['']},
    {'name': 'admin1_code', 'type': ''},
    {'name': 'admin2_code', 'type': ''},
    {'name': 'admin3_code', 'type': ''},
    {'name': 'admin4_code', 'type': ''},
    {'name': 'population', 'type': 0},
    {'name': 'elevation', 'type': 0},
    {'name': 'gtopo30', 'type': 0},
    {'name': 'timezone', 'type': ''},
    {'name': 'modification_date', 'type': ''}
]
filter_ = lambda x: re.search('^PPL(C|A)$', x['feature_code']) or x['population'] >= 49589
def map_(x):
    data = {}
    for key in [
        'country_code', 'elevation', 'feature_code'
        'latitude', 'longitude', 'name', 'population'
    ]:
        data[key] = x[key]
    return data
sort_ = lambda x: -x['population']

read_table(source, target, keys, filter_, map_, sort_)