From a7c50d840b5f0106f8642e7ff25003307b877193 Mon Sep 17 00:00:00 2001
From: rolux <rolux@rolux.org>
Date: Mon, 2 Apr 2012 23:13:05 +0200
Subject: [PATCH] update Geo module

---
 tools/geo/py/geo.py | 137 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 117 insertions(+), 20 deletions(-)

diff --git a/tools/geo/py/geo.py b/tools/geo/py/geo.py
index 5ecfd7d1..9f6d6996 100644
--- a/tools/geo/py/geo.py
+++ b/tools/geo/py/geo.py
@@ -8,11 +8,20 @@ import sys
 import urllib
 
 CACHE = sys.argv[-1] == '-cache'
+
 try:
     DATA = ox.jsonc.loads(ox.file.read_file('../jsonc/countries.jsonc'))
 except:
+    print 'parse error, see jsonc/debug.json'
     ox.file.write_file('../jsonc/debug.json', ox.js.minify(ox.file.read_file('../jsonc/countries.jsonc')))
     sys.exit()
+
+GEO = {}
+for country in json.loads(ox.file.read_file('../../../source/Ox.Geo/json/Ox.Geo.json')):
+    GEO[country['code']] = {}
+    for key in ['area', 'lat', 'lng', 'south', 'west', 'north', 'east']:
+        GEO[country['code']][key] = country[key]
+
 LOGS = {}
 
 def decode_wikipedia_id(id):
@@ -77,12 +86,20 @@ def get_countries():
     # Country data
     countries = sorted(countries, key=sort)
     countries = map(lambda x: get_country_data(x), countries)
+    # Independence
+    for i, country in enumerate(countries):
+        if 'created' in country and not 'dependency' in country:
+            name = country['created']['country'][0]
+            data = filter(lambda x: x['name'] == name, countries)[0]
+            if 'dependency' in data:
+                countries[i]['independence'] = {
+                    'country': data['dependency'],
+                    'date': country['created']['date']
+                }
+    sys.exit()
     # Flags
     countries = sorted(countries, key=sort)
     flags = get_flags(countries)
-    for country in countries:
-        if country['code'] in flags:
-            country['flag'] = flags[country['code']]
     return countries
 
 def get_country_data(country):
@@ -97,9 +114,6 @@ def get_country_data(country):
             match = re.search('"/wiki/\.(\w{2})"', html)
         if match:
             country['code'] = match.group(1).upper()
-    # alias
-    if country['code'] in DATA['alias']:
-        country['alias'] = True
     # continents and regions
     for continent, regions in DATA['continents'].iteritems():
         for region, countries in regions.iteritems():
@@ -107,19 +121,9 @@ def get_country_data(country):
                 country['continent'] = continent
                 country['region'] = region
                 break
-    # created and dissolved
+    # created
     if name in DATA['created']:
         country['created'] = DATA['created'][name]
-    if name in DATA['dissolved']:
-        country['dissolved'] = DATA['dissolved'][name]
-    for c, d in DATA['dissolved'].iteritems():
-        if d['dissolved'] in ['merged', 'split']:
-            cs = d['country'] if type(d['country']) == list else [d['country']]
-            if name in cs:
-                country['created'] = {
-                    'country': c,
-                    'date': d['date']
-                }                
     # dependencies
     for c, d in DATA['dependencies'].iteritems():
         c = c.split(', ')
@@ -134,6 +138,12 @@ def get_country_data(country):
             country['disputes'] = d if not 'disputes' in country else country['disputes'] + d
         elif name in d:
             country['disputed'] = c if not 'disputed' in country else country['disputed'] + c
+    # dissolved
+    if name in DATA['dissolved']:
+        country['dissolved'] = DATA['dissolved'][name]             
+    # exception
+    if country['code'] in DATA['exception']:
+        country['exception'] = True
     # flag
     if name in DATA['flag']:
         file = DATA['flag'][name] if DATA['flag'][name][-4:] == '.png' else DATA['flag'][name] + '.svg'
@@ -150,6 +160,9 @@ def get_country_data(country):
     # imdb
     if name in DATA['imdb']:
         country['imdbName'] = DATA['imdb'][name]
+    # independence
+    if name in DATA['independence']:
+        country['independence'] = DATA['independence'][name]
     # languages
     for language, c in DATA['languages'].iteritems():
         if c == name:
@@ -157,6 +170,10 @@ def get_country_data(country):
                 country['languages'] = [language]
             else:
                 country['languages'].append(language)
+    # area, lat, lng, south, west, north, east
+    if country['code'] in GEO:
+        for key in GEO[country['code']]:
+            country[key] = GEO[country['code']][key]
     return country
 
 def get_flag(id):
@@ -238,6 +255,77 @@ def get_imdb_languages():
             LOGS['new languages'].append(language)
     ox.file.write_json('../json/imdbLanguages.json', imdb_languages, indent=4, sort_keys=True)
 
+def parse_txt():
+    data = {
+        'created': {},
+        'dissolved': {},
+        'independence': {}
+    }
+    f = open('../txt/countries.txt')
+    lines = map(lambda x: x.strip(), f.readlines())
+    f.close()
+    for line in filter(lambda x: x[0] != '#', lines):
+        date, country_a, operator, country_b = re.compile(
+            '([\d\-]+) +(.+) ([\*=\+\-><]) (.+)'
+        ).match(line).groups()
+        countries_a = country_a.split(' / ')
+        countries_b = country_b.split(' / ')
+        if operator == '*':
+            data['independence'][country_b] = {
+                'country': countries_a,
+                'date': date
+            }
+        elif operator == '=':
+            data['dissolved'][country_a] = {
+                'country': countries_b,
+                'date': date,
+                'dissolved': 'renamed'
+            }
+            data['created'][country_b] = {
+                'country': countries_a,
+                'date': date,
+                'created': 'renamed'
+            }
+        elif operator == '+':
+            for country in countries_a:
+                data['dissolved'][country] = {
+                    'country': countries_b,
+                    'date': date,
+                    'dissolved': 'joined'
+                }
+        elif operator == '-':
+            for country in countries_b:
+                data['created'][country] = {
+                    'country': countries_a,
+                    'date': date,
+                    'created': 'split'
+                }
+        elif operator == '>':
+            for country in countries_a:
+                data['dissolved'][country] = {
+                    'country': countries_b,
+                    'date': date,
+                    'dissolved': 'merged'
+                }
+            data['created'][country_b] = {
+                'country': countries_a,
+                'date': date,
+                'created': 'merged'
+            }
+        elif operator == '<':
+            data['dissolved'][country_a] = {
+                'country': countries_b,
+                'date': date,
+                'dissolved': 'split'
+            }
+            for country in countries_b:
+                data['created'][country] = {
+                    'country': countries_a,
+                    'date': date,
+                    'created': 'merged'
+                }
+    return data
+
 def read_url(url):
     print 'reading', url
     return ox.cache.readUrl(url) if CACHE else ox.net.readUrl(url)
@@ -252,6 +340,10 @@ def read_wikipedia_url(id):
     return html
 
 if __name__ == '__main__':
+    data = parse_txt()
+    DATA['created'] = data['created']
+    DATA['dissolved'] = data['dissolved']
+    DATA['independence'] = data['independence']
     countries = get_countries()
     ox.file.write_json('../json/countries.json', countries, indent=4, sort_keys=True)
     LOGS['total'] = len(countries)
@@ -260,14 +352,19 @@ if __name__ == '__main__':
     LOGS['current independent'] = 0
     LOGS['current dependent'] = 0
     LOGS['current disputed'] = 0
+    LOGS['current exception'] = 0
     LOGS['dissolved independent'] = 0
     LOGS['dissolved dependent'] = 0
     LOGS['dissolved disputed'] = 0
-    LOGS['alias'] = 0
+    LOGS['dissolved exception'] = 0
     for country in countries:
-        key = 'alias' if 'alias' in country else ' '.join([
+        key = ' '.join([
             'dissolved' if 'dissolved' in country else 'current',
-            'disputed' if 'disputed' in country else ('dependent' if 'dependency' in country else 'independent')
+            'exception' if 'exception' in country else (
+                'disputed' if 'disputed' in country else (
+                    'dependent' if 'dependency' in country else 'independent'
+                )
+            )
         ])
         LOGS[key] += 1
     get_imdb_countries(countries)