diff --git a/import_padma.py b/import_padma.py index b854b57..6b4fd04 100755 --- a/import_padma.py +++ b/import_padma.py @@ -136,11 +136,15 @@ def item_data(data): value = value.replace('\r\n', '\n').strip() d[{ u'id': u'oldId', - u'categories': u'category', + u'categories': u'topic', + u'source': u'project', + u'collection': u'source', u'languages': u'language', }.get(key, key)] = value if 'director' in d: - d['director'] = unicode(d['director']).strip().split(', ') + d['director'] = unicode(d['director']).replace(' and ', ', ').strip().split(', ') + d['director'] = filter(lambda x: x.strip().lower() not in ('none', 'n/a', '', 'various'), + d['director']) for key in ('layers', 'duration', 'size', 'public'): if key in d: del d[key]