dates, reduce number of imdb pages loaded
This commit is contained in:
parent
18ce4cd92d
commit
f3147437b6
4 changed files with 16 additions and 8 deletions
1
README
1
README
|
@ -5,6 +5,7 @@ Depends:
|
||||||
python-chardet (http://chardet.feedparser.org/)
|
python-chardet (http://chardet.feedparser.org/)
|
||||||
python-feedparser (http://www.feedparser.org/)
|
python-feedparser (http://www.feedparser.org/)
|
||||||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||||
|
django (optional, otherwise dates < 1900 are not supported)
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
import ox
|
import ox
|
||||||
|
|
7
ox/utils.py
Normal file
7
ox/utils.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
try:
|
||||||
|
from django.utils.datetime_safe import datetime
|
||||||
|
except:
|
||||||
|
from datetime import datetime
|
||||||
|
|
|
@ -26,7 +26,7 @@ class Imdb(SiteParser):
|
||||||
|
|
||||||
},
|
},
|
||||||
'cast': {
|
'cast': {
|
||||||
'page': 'fullcredits',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
'<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
|
'<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
|
||||||
lambda ll: [stripTags(l) for l in ll]
|
lambda ll: [stripTags(l) for l in ll]
|
||||||
|
@ -34,7 +34,7 @@ class Imdb(SiteParser):
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'cinematographers': {
|
'cinematographers': {
|
||||||
'page': 'fullcredits',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
lambda data: data.split('Series Crew')[0],
|
lambda data: data.split('Series Crew')[0],
|
||||||
'Cinematography by</a>(.*?)</table>',
|
'Cinematography by</a>(.*?)</table>',
|
||||||
|
@ -53,7 +53,7 @@ class Imdb(SiteParser):
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'directors': {
|
'directors': {
|
||||||
'page': 'fullcredits',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
lambda data: data.split('Series Crew')[0],
|
lambda data: data.split('Series Crew')[0],
|
||||||
'Directed by</a>(.*?)</table>',
|
'Directed by</a>(.*?)</table>',
|
||||||
|
@ -62,7 +62,7 @@ class Imdb(SiteParser):
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'editors': {
|
'editors': {
|
||||||
'page': 'fullcredits',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
lambda data: data.split('Series Crew')[0],
|
lambda data: data.split('Series Crew')[0],
|
||||||
'Film Editing by</a>(.*?)</table>',
|
'Film Editing by</a>(.*?)</table>',
|
||||||
|
@ -152,7 +152,7 @@ class Imdb(SiteParser):
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'writers': {
|
'writers': {
|
||||||
'page': 'fullcredits',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
lambda data: data.split('Series Crew')[0],
|
lambda data: data.split('Series Crew')[0],
|
||||||
'Writing credits</a>(.*?)</table>',
|
'Writing credits</a>(.*?)</table>',
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from ox.cache import readUrlUnicode
|
from ..cache import readUrlUnicode
|
||||||
from ox import stripTags, decodeHtml
|
from .. import stripTags, decodeHtml
|
||||||
|
from ..utils import datetime
|
||||||
|
|
||||||
|
|
||||||
def cleanup(key, data, data_type):
|
def cleanup(key, data, data_type):
|
||||||
|
|
Loading…
Reference in a new issue