filter results, return more results from TPB, filter HD content in filter

This commit is contained in:
j 2007-07-09 13:11:56 +00:00
parent 472c99240b
commit ee5864a99d
3 changed files with 49 additions and 21 deletions

View file

@ -15,9 +15,14 @@ def torrentsWeLike(link):
if word in text: if word in text:
return False return False
#no dubbed versions #no dubbed versions
for word in ('italian', 'german', 'spanish', 'french'): for word in ('italian', 'german', 'spanish', 'french', 'nl sub'):
if word in text: if word in text:
return False return False
#not blueray or hddvd version right now or even DVDRs
for word in ('chd', 'hd ', 'hd-', 'dvdr-', 'dvdr.', 'dvdr '):
if word in text:
return False
#only dvdrips or dvdscrs #only dvdrips or dvdscrs
for word in ('dvdrip', 'dvdscr', 'dvd screener'): for word in ('dvdrip', 'dvdscr', 'dvd screener'):
if word in text: if word in text:

View file

@ -12,7 +12,7 @@ from btutils import torrentsWeLike
socket.setdefaulttimeout(10.0) socket.setdefaulttimeout(10.0)
def search(query): def search(query, filterResult = False):
'''search for torrents on mininova '''search for torrents on mininova
''' '''
torrents = [] torrents = []
@ -21,9 +21,14 @@ def search(query):
soup = BeautifulSoup(page) soup = BeautifulSoup(page)
for row in soup('tr'): for row in soup('tr'):
links = row('a', {'href':re.compile('/tor')}) links = row('a', {'href':re.compile('/tor')})
if links and torrentsWeLike(links[0]): if links:
torrent_url = "http://www.mininova.org%s" % links[0].get('href').replace('/tor', '/get') torrent_url = "http://www.mininova.org%s" % links[0].get('href').replace('/tor', '/get')
torrents.append(torrent_url) if filterResult:
if torrentsWeLike(links[0]):
torrents.append(torrent_url)
else:
torrents.append(torrent_url)
return torrents return torrents
def searchByImdb(imdb): def searchByImdb(imdb):
@ -32,9 +37,13 @@ def searchByImdb(imdb):
torrents = [] torrents = []
page = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdb) page = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdb)
soup = BeautifulSoup(page) soup = BeautifulSoup(page)
for row in soup('tr'): for row in soup('tr'):
links = row('a', {'href':re.compile('/get')}) #filter private trackers
if links: private_tracker = row('a', {'href':re.compile('/faq/#pt')})
torrent_url = "http://www.mininova.org%s" % links[0].get('href') links = row('a', {'href':re.compile('/tor')})
torrents.append(torrent_url) if not private_tracker and links:
torrent = links[0]
if torrentsWeLike(unicode(torrent.contents[0])):
torrent_url = "http://www.mininova.org%s" % torrent.get('href').replace('/tor', '/get')
torrents.append(torrent_url)
return torrents return torrents

View file

@ -8,6 +8,7 @@ from urllib import quote
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
from btutils import torrentsWeLike
from google import google from google import google
from utils import read_url, read_url_utf8 from utils import read_url, read_url_utf8
@ -85,19 +86,32 @@ def get_episodes(id):
episodes = re.compile('<nobr><a href="(.*?)">(.*?)</a></nobr>').findall(data) episodes = re.compile('<nobr><a href="(.*?)">(.*?)</a></nobr>').findall(data)
return episodes return episodes
def search(query): def search(query, filterResult = False):
torrents = [] torrents = []
url = "http://thepiratebay.org/search.php?video=on&q=%s" % quote(query) next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
page = read_url(url) page_count = 1
soup = BeautifulSoup(page) while next and page_count < 4:
for row in soup('tr'): page_count += 1
torrentType = row.findAll('td', {'class': 'vertTh'}) url = next[0]
if torrentType: if not url.startswith('http'):
torrentType = torrentType[0]('a')[0].get('href').split('/')[-1] if not url.startswith('/'):
# 201 = Movies , 202 = Movie DVDR url = "/" + url
if torrentType in ['201']: url = "http://thepiratebay.org" + url
torrent = row.findAll('a', {'href':re.compile('.torrent$')})[0].get('href') page = read_url(url)
torrents.append(torrent) soup = BeautifulSoup(page)
for row in soup('tr'):
torrentType = row.findAll('td', {'class': 'vertTh'})
if torrentType:
torrentType = torrentType[0]('a')[0].get('href').split('/')[-1]
# 201 = Movies , 202 = Movie DVDR
if torrentType in ['201']:
torrent = row.findAll('a', {'href':re.compile('.torrent$')})[0].get('href')
if filterResult:
if torrentsWeLike(torrent):
torrents.append(torrent)
else:
torrents.append(torrent)
next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(page)
return torrents return torrents
def searchByImdb(imdb): def searchByImdb(imdb):