From fab1f86987ab3274c092f260ba13b3d958885526 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 18 Mar 2012 15:38:51 +0100 Subject: [PATCH] include summary in results --- ox/movie.py | 2 ++ ox/web/google.py | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ox/movie.py b/ox/movie.py index e70620a..12818df 100644 --- a/ox/movie.py +++ b/ox/movie.py @@ -43,6 +43,8 @@ def parse_movie_path(path): else: title = parts[0] title = title.replace('_ ', ': ') + if title.endswith('_'): + title = title[:-1] + '.' year = findRe(title, '(\(\d{4}\))') if not year: diff --git a/ox/web/google.py b/ox/web/google.py index 0d3ea3b..26e6bfd 100644 --- a/ox/web/google.py +++ b/ox/web/google.py @@ -4,7 +4,7 @@ import re import urllib import ox -from ox import stripTags +from ox import stripTags, decodeHtml DEFAULT_MAX_RESULTS = 10 DEFAULT_TIMEOUT = 24*60*60 @@ -30,8 +30,11 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT): url = 'http://google.com/search?q=%s' % quote_plus(query) data = readUrlUnicode(url, timeout=timeout) results = [] - for a in re.compile('(.*?)').findall(data): - results.append((stripTags(a[1]), a[0], '')) + data = re.sub('(.*?)', '\\1', data) + for a in re.compile( + '(.*?).*?(.*?)<\/span>' + ).findall(data): + results.append((stripTags(decodeHtml(a[1])), a[0], stripTags(decodeHtml(a[2])))) if len(results) >= max_results: break return results