From fc22335f8eae8e6d9d90a515511c46117ec6335a Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 9 Dec 2010 04:37:28 +0100 Subject: [PATCH] use akas.imdb.com --- ox/web/imdb.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 3aebdfd..f895ba4 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -17,13 +17,19 @@ import google def readUrl(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None): headers = headers.copy() - headers["Cookie"] = 'session-id=061-6553581-0286357; uu=bl8Nra2zsmTjesDEOxamlwVkXrKj8h6ygOFd1LDhKNGwxHjk4LQopMCxSNGTU3nl88Ro5FCSHrKjUi2RoREt4SEhDZGA8Q4SILFsUfUFbhLFgr6EZTD4RYTFSEWWwr4UV+K+l/eivpfX51v2Y1JrhvCg/ZEg4QxRsLEcUYDivmGwwW3hINGNodNSvhGz0h6ypaRIUuPyHvWQ8paioNENkaDRndHw4r4RsKEt4SDRndHzwr4Rs9IesqPUWCLg4h6yoMGNISDRndHD4r4Rs9IesqPyHvLjom6Co=; cs=pReiGikHkbKk4Fhkk8Meyw5/E6t6mVT9+v+ACx7KZ/rpfwPtXklU/c7BdHWNegduvco3rq7p9+7eSVT9yK4Uvd5JVMtpSdz9/kliy+7BVP392hR17RoHzq1ad36dSlRdWF+Srs7fYurOSVS9XlkU3f5pVP3+SVS9vhkkzf; session-id-time=1286639981' return ox.cache.readUrl(url, data, headers, timeout) def readUrlUnicode(url, timeout=ox.cache.cache_timeout): return ox.cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout) class Imdb(SiteParser): + ''' + >>> Imdb('0068646')['title'] + u'The Godfather' + + >>> Imdb('0133093')['title'] + u'The Matrix' + ''' regex = { 'alternative_titles': { 'page': 'releaseinfo', @@ -112,11 +118,6 @@ class Imdb(SiteParser): 're': '(.*?)', 'type': 'list' }, - 'original_title': { - 'page': 'combined', - 're': '(.*?) (original title)', - 'type': 'string' - }, 'plot': { 'page': 'plotsummary', 're': '.*?
(.*?)',
@@ -185,7 +186,7 @@ class Imdb(SiteParser):
're': 'TV Series:
.*?