update Geo module

2012-04-02 23:13:05 +02:00 · 2012-04-02 23:13:05 +02:00 · a7c50d840b
commit a7c50d840b
parent 5a8d836233
1 changed files with 117 additions and 20 deletions
--- a/tools/geo/py/geo.py
+++ b/tools/geo/py/geo.py
@ -8,11 +8,20 @@ import sys
 import urllib
 CACHE = sys.argv[-1] == '-cache'
 try:
    DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
 except:
    print 'parse error, see jsonc/debug.json'
    ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
    sys.exit()
 GEO = {}
 for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
    GEO[country['code']] = {}
    for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
        GEO[country['code']][key] = country[key]
 LOGS = {}
 def decode_wikipedia_id(id):
@ -77,12 +86,20 @@ def get_countries():
    # Country data
    countries = sorted(countries, key=sort)
    countries = map(lambda x: get_country_data(x), countries)
    # Independence
    for i, country in enumerate(countries):
        if 'created' in country and not 'dependency' in country:
            name = country['created']['country'][0]
            data = filter(lambda x: x['name'] == name, countries)[0]
            if 'dependency' in data:
                countries[i]['independence'] = {
                    'country': data['dependency'],
                    'date': country['created']['date']
                }
    sys.exit()
    # Flags
    countries = sorted(countries, key=sort)
    flags = get_flags(countries)
    for country in countries:
        if country['code'] in flags:
            country['flag'] = flags[country['code']]
    return countries
 def get_country_data(country):
@ -97,9 +114,6 @@ def get_country_data(country):
            match = re.search('"/wiki/\.(\w{2})"', html)
        if match:
            country['code'] = match.group(1).upper()
    # alias
    if country['code'] in DATA['alias']:
        country['alias'] = True
    # continents and regions
    for continent, regions in DATA['continents'].iteritems():
        for region, countries in regions.iteritems():
@ -107,19 +121,9 @@ def get_country_data(country):
                country['continent'] = continent
                country['region'] = region
                break
-    # created and dissolved
+    # created
    if name in DATA['created']:
        country['created'] = DATA['created'][name]
    if name in DATA['dissolved']:
        country['dissolved'] = DATA['dissolved'][name]
    for c, d in DATA['dissolved'].iteritems():
        if d['dissolved'] in ['merged', 'split']:
            cs = d['country'] if type(d['country']) == list else [d['country']]
            if name in cs:
                country['created'] = {
                    'country': c,
                    'date': d['date']
                }                
    # dependencies
    for c, d in DATA['dependencies'].iteritems():
        c = c.split(', ')
@ -134,6 +138,12 @@ def get_country_data(country):
            country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
        elif name in d:
            country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
    # dissolved
    if name in DATA['dissolved']:
        country['dissolved'] = DATA['dissolved'][name]             
    # exception
    if country['code'] in DATA['exception']:
        country['exception'] = True
    # flag
    if name in DATA['flag']:
        file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
@ -150,6 +160,9 @@ def get_country_data(country):
    # imdb
    if name in DATA['imdb']:
        country['imdbName'] = DATA['imdb'][name]
    # independence
    if name in DATA['independence']:
        country['independence'] = DATA['independence'][name]
    # languages
    for language, c in DATA['languages'].iteritems():
        if c == name:
@ -157,6 +170,10 @@ def get_country_data(country):
                country['languages'] = [language]
            else:
                country['languages'].append(language)
    # area, lat, lng, south, west, north, east
    if country['code'] in GEO:
        for key in GEO[country['code']]:
            country[key] = GEO[country['code']][key]
    return country
 def get_flag(id):
@ -238,6 +255,77 @@ def get_imdb_languages():
            LOGS['new languages'].append(language)
    ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
 def parse_txt():
    data = {
        'created': {},
        'dissolved': {},
        'independence': {}
    }
    f = open('../txt/countries.txt')
    lines = map(lambda x: x.strip(), f.readlines())
    f.close()
    for line in filter(lambda x: x[0] != '#', lines):
        date, country_a, operator, country_b = re.compile(
            '([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
        ).match(line).groups()
        countries_a = country_a.split(' / ')
        countries_b = country_b.split(' / ')
        if operator == '*':
            data['independence'][country_b] = {
                'country': countries_a,
                'date': date
            }
        elif operator == '=':
            data['dissolved'][country_a] = {
                'country': countries_b,
                'date': date,
                'dissolved': 'renamed'
            }
            data['created'][country_b] = {
                'country': countries_a,
                'date': date,
                'created': 'renamed'
            }
        elif operator == '+':
            for country in countries_a:
                data['dissolved'][country] = {
                    'country': countries_b,
                    'date': date,
                    'dissolved': 'joined'
                }
        elif operator == '-':
            for country in countries_b:
                data['created'][country] = {
                    'country': countries_a,
                    'date': date,
                    'created': 'split'
                }
        elif operator == '>':
            for country in countries_a:
                data['dissolved'][country] = {
                    'country': countries_b,
                    'date': date,
                    'dissolved': 'merged'
                }
            data['created'][country_b] = {
                'country': countries_a,
                'date': date,
                'created': 'merged'
            }
        elif operator == '<':
            data['dissolved'][country_a] = {
                'country': countries_b,
                'date': date,
                'dissolved': 'split'
            }
            for country in countries_b:
                data['created'][country] = {
                    'country': countries_a,
                    'date': date,
                    'created': 'merged'
                }
    return data
 def read_url(url):
    print 'reading', url
    return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
@ -252,6 +340,10 @@ def read_wikipedia_url(id):
    return html
 if __name__ == '__main__':
    data = parse_txt()
    DATA['created'] = data['created']
    DATA['dissolved'] = data['dissolved']
    DATA['independence'] = data['independence']
    countries = get_countries()
    ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
    LOGS['total'] = len(countries)
@ -260,14 +352,19 @@ if __name__ == '__main__':
    LOGS['current independent'] = 0
    LOGS['current dependent'] = 0
    LOGS['current disputed'] = 0
    LOGS['current exception'] = 0
    LOGS['dissolved independent'] = 0
    LOGS['dissolved dependent'] = 0
    LOGS['dissolved disputed'] = 0
-    LOGS['alias'] = 0
+    LOGS['dissolved exception'] = 0
    for country in countries:
-        key = 'alias' if 'alias' in country else ' '.join([
+        key = ' '.join([
            'dissolved' if 'dissolved' in country else 'current',
-            'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent')
+            'exception' if 'exception' in country else (
                'disputed' if 'disputed' in country else (
                    'dependent' if 'dependency' in country else 'independent'
                )
            )
        ])
        LOGS[key] += 1
    get_imdb_countries(countries)