dates, reduce number of imdb pages loaded

2010-07-10 13:54:33 +02:00 · 2010-07-10 13:54:33 +02:00 · f3147437b6
commit f3147437b6
parent 18ce4cd92d
4 changed files with 16 additions and 8 deletions
--- a/1
+++ b/1
@ -5,6 +5,7 @@ Depends:
 python-chardet (http://chardet.feedparser.org/)
 python-feedparser (http://www.feedparser.org/)
 python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
 django (optional, otherwise dates < 1900 are not supported)
 Usage:
 import ox
--- a/ox/utils.py
+++ b/ox/utils.py
@ -0,0 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 try:
    from django.utils.datetime_safe import datetime
 except:
    from datetime import datetime
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -26,7 +26,7 @@ class Imdb(SiteParser):
        },
        'cast': {
-            'page': 'fullcredits',
+            'page': 'combined',
            're': [
                '<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
                lambda ll: [stripTags(l) for l in ll]
@ -34,7 +34,7 @@ class Imdb(SiteParser):
            'type': 'list'
        },
        'cinematographers': {
-            'page': 'fullcredits',
+            'page': 'combined',
            're': [
                lambda data: data.split('Series Crew')[0],
                'Cinematography by</a>(.*?)</table>',
@ -53,7 +53,7 @@ class Imdb(SiteParser):
            'type': 'list'
        },
        'directors': {
-            'page': 'fullcredits',
+            'page': 'combined',
            're': [
                lambda data: data.split('Series Crew')[0],
                'Directed by</a>(.*?)</table>',
@ -62,7 +62,7 @@ class Imdb(SiteParser):
            'type': 'list'
        },
        'editors': {
-            'page': 'fullcredits',
+            'page': 'combined',
            're': [
                lambda data: data.split('Series Crew')[0],
                'Film Editing by</a>(.*?)</table>',
@ -152,7 +152,7 @@ class Imdb(SiteParser):
            'type': 'string'
        },
        'writers': {
-            'page': 'fullcredits',
+            'page': 'combined',
            're': [
                lambda data: data.split('Series Crew')[0],
                'Writing credits</a>(.*?)</table>',
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 from datetime import datetime
-from ox.cache import readUrlUnicode
+from ..cache import readUrlUnicode
-from ox import stripTags, decodeHtml
+from .. import stripTags, decodeHtml
 from ..utils import datetime
 def cleanup(key, data, data_type):