dates, reduce number of imdb pages loaded

This commit is contained in:
j 2010-07-10 13:54:33 +02:00
parent 18ce4cd92d
commit f3147437b6
4 changed files with 16 additions and 8 deletions

1
README
View file

@ -5,6 +5,7 @@ Depends:
python-chardet (http://chardet.feedparser.org/)
python-feedparser (http://www.feedparser.org/)
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
django (optional, otherwise dates < 1900 are not supported)
Usage:
import ox

7
ox/utils.py Normal file
View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
try:
from django.utils.datetime_safe import datetime
except:
from datetime import datetime

View file

@ -26,7 +26,7 @@ class Imdb(SiteParser):
},
'cast': {
'page': 'fullcredits',
'page': 'combined',
're': [
'<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
lambda ll: [stripTags(l) for l in ll]
@ -34,7 +34,7 @@ class Imdb(SiteParser):
'type': 'list'
},
'cinematographers': {
'page': 'fullcredits',
'page': 'combined',
're': [
lambda data: data.split('Series Crew')[0],
'Cinematography by</a>(.*?)</table>',
@ -53,7 +53,7 @@ class Imdb(SiteParser):
'type': 'list'
},
'directors': {
'page': 'fullcredits',
'page': 'combined',
're': [
lambda data: data.split('Series Crew')[0],
'Directed by</a>(.*?)</table>',
@ -62,7 +62,7 @@ class Imdb(SiteParser):
'type': 'list'
},
'editors': {
'page': 'fullcredits',
'page': 'combined',
're': [
lambda data: data.split('Series Crew')[0],
'Film Editing by</a>(.*?)</table>',
@ -152,7 +152,7 @@ class Imdb(SiteParser):
'type': 'string'
},
'writers': {
'page': 'fullcredits',
'page': 'combined',
're': [
lambda data: data.split('Series Crew')[0],
'Writing credits</a>(.*?)</table>',

View file

@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from datetime import datetime
from ox.cache import readUrlUnicode
from ox import stripTags, decodeHtml
from ..cache import readUrlUnicode
from .. import stripTags, decodeHtml
from ..utils import datetime
def cleanup(key, data, data_type):