diff --git a/README b/README index acdfb8c..19f317f 100644 --- a/README +++ b/README @@ -5,6 +5,7 @@ Depends: python-chardet (http://chardet.feedparser.org/) python-feedparser (http://www.feedparser.org/) python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/) + django (optional, otherwise dates < 1900 are not supported) Usage: import ox diff --git a/ox/utils.py b/ox/utils.py new file mode 100644 index 0000000..332d5e8 --- /dev/null +++ b/ox/utils.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +try: + from django.utils.datetime_safe import datetime +except: + from datetime import datetime + diff --git a/ox/web/imdb.py b/ox/web/imdb.py index ff02ef9..124ad4a 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -26,7 +26,7 @@ class Imdb(SiteParser): }, 'cast': { - 'page': 'fullcredits', + 'page': 'combined', 're': [ '.*?>(.*?).*?(.*?)', lambda ll: [stripTags(l) for l in ll] @@ -34,7 +34,7 @@ class Imdb(SiteParser): 'type': 'list' }, 'cinematographers': { - 'page': 'fullcredits', + 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Cinematography by(.*?)', @@ -53,7 +53,7 @@ class Imdb(SiteParser): 'type': 'list' }, 'directors': { - 'page': 'fullcredits', + 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Directed by(.*?)', @@ -62,7 +62,7 @@ class Imdb(SiteParser): 'type': 'list' }, 'editors': { - 'page': 'fullcredits', + 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Film Editing by(.*?)', @@ -152,7 +152,7 @@ class Imdb(SiteParser): 'type': 'string' }, 'writers': { - 'page': 'fullcredits', + 'page': 'combined', 're': [ lambda data: data.split('Series Crew')[0], 'Writing credits(.*?)', diff --git a/ox/web/siteparser.py b/ox/web/siteparser.py index 2fa4332..b2064e5 100644 --- a/ox/web/siteparser.py +++ b/ox/web/siteparser.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re -from datetime import datetime -from ox.cache import readUrlUnicode -from ox import stripTags, decodeHtml +from ..cache import readUrlUnicode +from .. import stripTags, decodeHtml +from ..utils import datetime def cleanup(key, data, data_type):