include summary in results

This commit is contained in:
j 2012-03-18 15:38:51 +01:00
parent bc655b4134
commit fab1f86987
2 changed files with 8 additions and 3 deletions

View file

@ -43,6 +43,8 @@ def parse_movie_path(path):
else: else:
title = parts[0] title = parts[0]
title = title.replace('_ ', ': ') title = title.replace('_ ', ': ')
if title.endswith('_'):
title = title[:-1] + '.'
year = findRe(title, '(\(\d{4}\))') year = findRe(title, '(\(\d{4}\))')
if not year: if not year:

View file

@ -4,7 +4,7 @@ import re
import urllib import urllib
import ox import ox
from ox import stripTags from ox import stripTags, decodeHtml
DEFAULT_MAX_RESULTS = 10 DEFAULT_MAX_RESULTS = 10
DEFAULT_TIMEOUT = 24*60*60 DEFAULT_TIMEOUT = 24*60*60
@ -30,8 +30,11 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
url = 'http://google.com/search?q=%s' % quote_plus(query) url = 'http://google.com/search?q=%s' % quote_plus(query)
data = readUrlUnicode(url, timeout=timeout) data = readUrlUnicode(url, timeout=timeout)
results = [] results = []
for a in re.compile('<a href="(\S+?)" class=l .*?>(.*?)</a>').findall(data): data = re.sub('<span class="f">(.*?)</span>', '\\1', data)
results.append((stripTags(a[1]), a[0], '')) for a in re.compile(
'<a href="(\S+?)" class=l .*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>'
).findall(data):
results.append((stripTags(decodeHtml(a[1])), a[0], stripTags(decodeHtml(a[2]))))
if len(results) >= max_results: if len(results) >= max_results:
break break
return results return results