From 1b9c4d288c8894f22958b5ade8cd9723cf0dab31 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sat, 15 Oct 2011 19:03:32 +0200
Subject: [PATCH 01/14] check before use
---
ox/web/imdb.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index a9d3d88..5771b5b 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -330,7 +330,8 @@ class Imdb(SiteParser):
self[key] = filter(lambda x: x.lower() != 'home', self[key])
if 'creator' in self:
- self['episodeDirector'] = self['director']
+ if 'director' in self:
+ self['episodeDirector'] = self['director']
self['director'] = self['creator']
if 'series' in self:
if 'episodeTitle' in self:
From 0b380abaebf8cca2911f379c3ba06e709b4d52f6 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sat, 15 Oct 2011 21:32:32 +0200
Subject: [PATCH 02/14] series creator(s) fixed #25
---
ox/web/imdb.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 5771b5b..417d15b 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -88,7 +88,7 @@ class Imdb(SiteParser):
'creator': {
'page': 'combined',
're': [
- '
Creators:
.*?(.*?)
',
+ 'Creator.?:
.*?(.*?)
',
'
Date: Mon, 17 Oct 2011 13:56:49 +0200
Subject: [PATCH 04/14] .
---
ox/movie.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ox/movie.py b/ox/movie.py
index 75c5dab..c13d108 100644
--- a/ox/movie.py
+++ b/ox/movie.py
@@ -61,7 +61,7 @@ def parse_movie_path(path):
director = []
#extension/language
- fileparts = parts[-1].split('.')
+ fileparts = [x.replace('||', '. ') for x in parts[-1].replace('. ', '||').split('.')]
extension = fileparts[-1]
if len(fileparts[-2]) == 2:
From 7a0fad1e03c16ad3022e9ed541ce6a8318714060 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 17 Oct 2011 22:53:02 +0200
Subject: [PATCH 05/14] series without episodes
---
ox/movie.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ox/movie.py b/ox/movie.py
index c13d108..6b6eae0 100644
--- a/ox/movie.py
+++ b/ox/movie.py
@@ -152,7 +152,7 @@ def get_oxid(title, director=[], year='',
return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
director = ', '.join(director)
episode_director = ', '.join(episode_director)
- if not episode:
+ if not episode and not episode_title:
oxid = get_hash(director)[:8] + get_hash('\n'.join([title, str(year)]))[:8]
else:
oxid = get_hash('\n'.join([director, title, str(year), str(season)]))[:8] + \
From fc95c4797b24a6ec6da0265bc60df12c13e26f77 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 10:19:02 +0200
Subject: [PATCH 06/14] script titles
---
ox/web/imdb.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index c555ab3..ba2154e 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -266,6 +266,7 @@ class Imdb(SiteParser):
#only list one country per alternative title
def is_international_title(t):
+ if 'script title' in t[1].lower(): return False
if 'recut version' in t[1].lower(): return False
if 'working title' in t[1].lower(): return False
if 'complete title' in t[1].lower(): return False
From 2057e699bde0de7fcba10943728f778aec80d743 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 12:58:03 +0200
Subject: [PATCH 07/14] special case series without creator but a director
---
ox/web/imdb.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index ba2154e..3696b8a 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -102,6 +102,14 @@ class Imdb(SiteParser):
],
'type': 'list'
},
+ '_director': {
+ 'page': 'combined',
+ 're': [
+ 'Director:
.*?(.*?)
',
+ '>> decodeHtml('me & you and $&%')
u'me & you and $&%'
+ >>> decodeHtml('')
+ u'€'
"""
if type(html) != unicode:
html = unicode(html)[:]
@@ -146,7 +148,9 @@ def decodeHtml(html):
uchr = lambda value: value > 255 and unichr(value) or chr(value)
def entitydecode(match, uchr=uchr):
entity = match.group(1)
- if entity.startswith('#x'):
+ if entity == '#x80':
+ return u'€'
+ elif entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 3696b8a..ad97bf4 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -356,6 +356,10 @@ class Imdb(SiteParser):
for key in ['creator', 'year', 'country']:
if key in series:
self[key] = series[key]
+
+ if not 'director' in self and 'director' in series:
+ self['director'] = series['director']
+
if 'originalTitle' in self:
del self['originalTitle']
else:
From 71abfcc3078402ceeaac34fcbcadc73941163335 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 14:33:45 +0200
Subject: [PATCH 09/14] directors are creators
---
ox/web/imdb.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index ad97bf4..a9e9e2f 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -353,13 +353,13 @@ class Imdb(SiteParser):
self['episode%s'%key] = self[key.lower()]
series = Imdb(self['series'])
+ if not 'creator' in series and 'director' in series:
+ series['creator'] = series['director']
+
for key in ['creator', 'year', 'country']:
if key in series:
self[key] = series[key]
- if not 'director' in self and 'director' in series:
- self['director'] = series['director']
-
if 'originalTitle' in self:
del self['originalTitle']
else:
From 3e7b463ac8d2a0e9362655edd6f6eb75e312e375 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 14:45:00 +0200
Subject: [PATCH 10/14] International (Spanish
---
ox/web/imdb.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index a9e9e2f..3147e9c 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -279,7 +279,7 @@ class Imdb(SiteParser):
if 'working title' in t[1].lower(): return False
if 'complete title' in t[1].lower(): return False
if t[1].lower() == 'usa': return True
- if 'international' in t[1].lower(): return True
+ #if 'international' in t[1].lower(): return True
#fails if orignial is english... Japan (English title)
#if 'english title' in t[1].lower(): return True
return False
From 23ea669b7d200f7a51fab3841141b7a497b055bd Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 14:57:31 +0200
Subject: [PATCH 11/14] cleanup summary
---
ox/web/imdb.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 3147e9c..1e7fc49 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -377,6 +377,8 @@ class Imdb(SiteParser):
if 'releaseDate' in self:
if isinstance(self['releaseDate'], list):
self['releaseDate'] = min(self['releaseDate'])
+ if 'summary' in self:
+ self['summary'] = self['summary'].split('
Date: Tue, 18 Oct 2011 15:30:16 +0200
Subject: [PATCH 12/14] _
---
ox/web/imdb.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 1e7fc49..1274a96 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -530,8 +530,8 @@ def getMoviePoster(imdbId):
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
'''
info = ImdbCombined(imdbId)
- if 'poster_id' in info:
- url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['poster_id'], imdbId)
+ if 'posterId' in info:
+ url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['posterId'], imdbId)
data = readUrl(url)
poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
return poster
From 60d8c6bc05e6572af685e2ae3d8f2fd1a4a545bf Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 15:50:16 +0200
Subject: [PATCH 13/14] not everybody can be
---
ox/web/imdb.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 1274a96..bfb1cba 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -355,6 +355,8 @@ class Imdb(SiteParser):
if not 'creator' in series and 'director' in series:
series['creator'] = series['director']
+ if len(series['creator']) > 10:
+ series['creator'] = series['director'][:1]
for key in ['creator', 'year', 'country']:
if key in series:
From c8dc06d68265e265957a44bde0d85ce78480cb68 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 18 Oct 2011 16:15:07 +0200
Subject: [PATCH 14/14] take International (English title)
---
ox/web/imdb.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index bfb1cba..c3c2338 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -279,7 +279,7 @@ class Imdb(SiteParser):
if 'working title' in t[1].lower(): return False
if 'complete title' in t[1].lower(): return False
if t[1].lower() == 'usa': return True
- #if 'international' in t[1].lower(): return True
+ if 'international (english title)' in t[1].lower(): return True
#fails if orignial is english... Japan (English title)
#if 'english title' in t[1].lower(): return True
return False