update Geo module

2012-04-02 23:13:05 +02:00 · 2012-04-02 23:13:05 +02:00 · a7c50d840b
commit a7c50d840b
parent 5a8d836233
1 changed files with 117 additions and 20 deletions
--- a/tools/geo/py/geo.py
+++ b/tools/geo/py/geo.py
@ -8,11 +8,20 @@ import sys
 import urllib

 CACHE = sys.argv[-1] == '-cache'
+
 try:
    DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
 except:
+    print 'parse error, see jsonc/debug.json'
    ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
    sys.exit()
+
+GEO = {}
+for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
+    GEO[country['code']] = {}
+    for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
+        GEO[country['code']][key] = country[key]
+
 LOGS = {}

 def decode_wikipedia_id(id):
@ -77,12 +86,20 @@ def get_countries():
    # Country data
    countries = sorted(countries, key=sort)
    countries = map(lambda x: get_country_data(x), countries)
+    # Independence
+    for i, country in enumerate(countries):
+        if 'created' in country and not 'dependency' in country:
+            name = country['created']['country'][0]
+            data = filter(lambda x: x['name'] == name, countries)[0]
+            if 'dependency' in data:
+                countries[i]['independence'] = {
+                    'country': data['dependency'],
+                    'date': country['created']['date']
+                }
+    sys.exit()
    # Flags
    countries = sorted(countries, key=sort)
    flags = get_flags(countries)
-    for country in countries:
-        if country['code'] in flags:
-            country['flag'] = flags[country['code']]
    return countries

 def get_country_data(country):
@ -97,9 +114,6 @@ def get_country_data(country):
            match = re.search('"/wiki/\.(\w{2})"', html)
        if match:
            country['code'] = match.group(1).upper()
-    # alias
-    if country['code'] in DATA['alias']:
-        country['alias'] = True
    # continents and regions
    for continent, regions in DATA['continents'].iteritems():
        for region, countries in regions.iteritems():
@ -107,19 +121,9 @@ def get_country_data(country):
                country['continent'] = continent
                country['region'] = region
                break
-    # created and dissolved
+    # created
    if name in DATA['created']:
        country['created'] = DATA['created'][name]
-    if name in DATA['dissolved']:
-        country['dissolved'] = DATA['dissolved'][name]
-    for c, d in DATA['dissolved'].iteritems():
-        if d['dissolved'] in ['merged', 'split']:
-            cs = d['country'] if type(d['country']) == list else [d['country']]
-            if name in cs:
-                country['created'] = {
-                    'country': c,
-                    'date': d['date']
-                }                
    # dependencies
    for c, d in DATA['dependencies'].iteritems():
        c = c.split(', ')
@ -134,6 +138,12 @@ def get_country_data(country):
            country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
        elif name in d:
            country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
+    # dissolved
+    if name in DATA['dissolved']:
+        country['dissolved'] = DATA['dissolved'][name]             
+    # exception
+    if country['code'] in DATA['exception']:
+        country['exception'] = True
    # flag
    if name in DATA['flag']:
        file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
@ -150,6 +160,9 @@ def get_country_data(country):
    # imdb
    if name in DATA['imdb']:
        country['imdbName'] = DATA['imdb'][name]
+    # independence
+    if name in DATA['independence']:
+        country['independence'] = DATA['independence'][name]
    # languages
    for language, c in DATA['languages'].iteritems():
        if c == name:
@ -157,6 +170,10 @@ def get_country_data(country):
                country['languages'] = [language]
            else:
                country['languages'].append(language)
+    # area, lat, lng, south, west, north, east
+    if country['code'] in GEO:
+        for key in GEO[country['code']]:
+            country[key] = GEO[country['code']][key]
    return country

 def get_flag(id):
@ -238,6 +255,77 @@ def get_imdb_languages():
            LOGS['new languages'].append(language)
    ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)

+def parse_txt():
+    data = {
+        'created': {},
+        'dissolved': {},
+        'independence': {}
+    }
+    f = open('../txt/countries.txt')
+    lines = map(lambda x: x.strip(), f.readlines())
+    f.close()
+    for line in filter(lambda x: x[0] != '#', lines):
+        date, country_a, operator, country_b = re.compile(
+            '([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
+        ).match(line).groups()
+        countries_a = country_a.split(' / ')
+        countries_b = country_b.split(' / ')
+        if operator == '*':
+            data['independence'][country_b] = {
+                'country': countries_a,
+                'date': date
+            }
+        elif operator == '=':
+            data['dissolved'][country_a] = {
+                'country': countries_b,
+                'date': date,
+                'dissolved': 'renamed'
+            }
+            data['created'][country_b] = {
+                'country': countries_a,
+                'date': date,
+                'created': 'renamed'
+            }
+        elif operator == '+':
+            for country in countries_a:
+                data['dissolved'][country] = {
+                    'country': countries_b,
+                    'date': date,
+                    'dissolved': 'joined'
+                }
+        elif operator == '-':
+            for country in countries_b:
+                data['created'][country] = {
+                    'country': countries_a,
+                    'date': date,
+                    'created': 'split'
+                }
+        elif operator == '>':
+            for country in countries_a:
+                data['dissolved'][country] = {
+                    'country': countries_b,
+                    'date': date,
+                    'dissolved': 'merged'
+                }
+            data['created'][country_b] = {
+                'country': countries_a,
+                'date': date,
+                'created': 'merged'
+            }
+        elif operator == '<':
+            data['dissolved'][country_a] = {
+                'country': countries_b,
+                'date': date,
+                'dissolved': 'split'
+            }
+            for country in countries_b:
+                data['created'][country] = {
+                    'country': countries_a,
+                    'date': date,
+                    'created': 'merged'
+                }
+    return data
+
 def read_url(url):
    print 'reading', url
    return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
@ -252,6 +340,10 @@ def read_wikipedia_url(id):
    return html

 if __name__ == '__main__':
+    data = parse_txt()
+    DATA['created'] = data['created']
+    DATA['dissolved'] = data['dissolved']
+    DATA['independence'] = data['independence']
    countries = get_countries()
    ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
    LOGS['total'] = len(countries)
@ -260,14 +352,19 @@ if __name__ == '__main__':
    LOGS['current independent'] = 0
    LOGS['current dependent'] = 0
    LOGS['current disputed'] = 0
+    LOGS['current exception'] = 0
    LOGS['dissolved independent'] = 0
    LOGS['dissolved dependent'] = 0
    LOGS['dissolved disputed'] = 0
-    LOGS['alias'] = 0
+    LOGS['dissolved exception'] = 0
    for country in countries:
-        key = 'alias' if 'alias' in country else ' '.join([
+        key = ' '.join([
            'dissolved' if 'dissolved' in country else 'current',
-            'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent')
+            'exception' if 'exception' in country else (
+                'disputed' if 'disputed' in country else (
+                    'dependent' if 'dependency' in country else 'independent'
+                )
+            )
        ])
        LOGS[key] += 1
    get_imdb_countries(countries)