dates, reduce number of imdb pages loaded
This commit is contained in:
parent
18ce4cd92d
commit
f3147437b6
4 changed files with 16 additions and 8 deletions
1
README
1
README
|
@ -5,6 +5,7 @@ Depends:
|
|||
python-chardet (http://chardet.feedparser.org/)
|
||||
python-feedparser (http://www.feedparser.org/)
|
||||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||
django (optional, otherwise dates < 1900 are not supported)
|
||||
|
||||
Usage:
|
||||
import ox
|
||||
|
|
7
ox/utils.py
Normal file
7
ox/utils.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
try:
|
||||
from django.utils.datetime_safe import datetime
|
||||
except:
|
||||
from datetime import datetime
|
||||
|
|
@ -26,7 +26,7 @@ class Imdb(SiteParser):
|
|||
|
||||
},
|
||||
'cast': {
|
||||
'page': 'fullcredits',
|
||||
'page': 'combined',
|
||||
're': [
|
||||
'<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
|
||||
lambda ll: [stripTags(l) for l in ll]
|
||||
|
@ -34,7 +34,7 @@ class Imdb(SiteParser):
|
|||
'type': 'list'
|
||||
},
|
||||
'cinematographers': {
|
||||
'page': 'fullcredits',
|
||||
'page': 'combined',
|
||||
're': [
|
||||
lambda data: data.split('Series Crew')[0],
|
||||
'Cinematography by</a>(.*?)</table>',
|
||||
|
@ -53,7 +53,7 @@ class Imdb(SiteParser):
|
|||
'type': 'list'
|
||||
},
|
||||
'directors': {
|
||||
'page': 'fullcredits',
|
||||
'page': 'combined',
|
||||
're': [
|
||||
lambda data: data.split('Series Crew')[0],
|
||||
'Directed by</a>(.*?)</table>',
|
||||
|
@ -62,7 +62,7 @@ class Imdb(SiteParser):
|
|||
'type': 'list'
|
||||
},
|
||||
'editors': {
|
||||
'page': 'fullcredits',
|
||||
'page': 'combined',
|
||||
're': [
|
||||
lambda data: data.split('Series Crew')[0],
|
||||
'Film Editing by</a>(.*?)</table>',
|
||||
|
@ -152,7 +152,7 @@ class Imdb(SiteParser):
|
|||
'type': 'string'
|
||||
},
|
||||
'writers': {
|
||||
'page': 'fullcredits',
|
||||
'page': 'combined',
|
||||
're': [
|
||||
lambda data: data.split('Series Crew')[0],
|
||||
'Writing credits</a>(.*?)</table>',
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox import stripTags, decodeHtml
|
||||
from ..cache import readUrlUnicode
|
||||
from .. import stripTags, decodeHtml
|
||||
from ..utils import datetime
|
||||
|
||||
|
||||
def cleanup(key, data, data_type):
|
||||
|
|
Loading…
Reference in a new issue