diff --git a/scrapeit/btutils.py b/scrapeit/btutils.py
index 474ea7c..a340b4f 100644
--- a/scrapeit/btutils.py
+++ b/scrapeit/btutils.py
@@ -15,9 +15,14 @@ def torrentsWeLike(link):
if word in text:
return False
#no dubbed versions
- for word in ('italian', 'german', 'spanish', 'french'):
+ for word in ('italian', 'german', 'spanish', 'french', 'nl sub'):
if word in text:
return False
+ #not blueray or hddvd version right now or even DVDRs
+ for word in ('chd', 'hd ', 'hd-', 'dvdr-', 'dvdr.', 'dvdr '):
+ if word in text:
+ return False
+
#only dvdrips or dvdscrs
for word in ('dvdrip', 'dvdscr', 'dvd screener'):
if word in text:
diff --git a/scrapeit/mininova.py b/scrapeit/mininova.py
index 7cf0059..f5091a3 100644
--- a/scrapeit/mininova.py
+++ b/scrapeit/mininova.py
@@ -12,7 +12,7 @@ from btutils import torrentsWeLike
socket.setdefaulttimeout(10.0)
-def search(query):
+def search(query, filterResult = False):
'''search for torrents on mininova
'''
torrents = []
@@ -21,9 +21,14 @@ def search(query):
soup = BeautifulSoup(page)
for row in soup('tr'):
links = row('a', {'href':re.compile('/tor')})
- if links and torrentsWeLike(links[0]):
+ if links:
torrent_url = "http://www.mininova.org%s" % links[0].get('href').replace('/tor', '/get')
- torrents.append(torrent_url)
+ if filterResult:
+ if torrentsWeLike(links[0]):
+ torrents.append(torrent_url)
+ else:
+ torrents.append(torrent_url)
+
return torrents
def searchByImdb(imdb):
@@ -32,9 +37,13 @@ def searchByImdb(imdb):
torrents = []
page = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdb)
soup = BeautifulSoup(page)
- for row in soup('tr'):
- links = row('a', {'href':re.compile('/get')})
- if links:
- torrent_url = "http://www.mininova.org%s" % links[0].get('href')
- torrents.append(torrent_url)
+ for row in soup('tr'):
+ #filter private trackers
+ private_tracker = row('a', {'href':re.compile('/faq/#pt')})
+ links = row('a', {'href':re.compile('/tor')})
+ if not private_tracker and links:
+ torrent = links[0]
+ if torrentsWeLike(unicode(torrent.contents[0])):
+ torrent_url = "http://www.mininova.org%s" % torrent.get('href').replace('/tor', '/get')
+ torrents.append(torrent_url)
return torrents
diff --git a/scrapeit/thepiratebay.py b/scrapeit/thepiratebay.py
index 1d42b51..fafa042 100644
--- a/scrapeit/thepiratebay.py
+++ b/scrapeit/thepiratebay.py
@@ -8,6 +8,7 @@ from urllib import quote
from BeautifulSoup import BeautifulSoup
+from btutils import torrentsWeLike
from google import google
from utils import read_url, read_url_utf8
@@ -85,19 +86,32 @@ def get_episodes(id):
episodes = re.compile('(.*?)').findall(data)
return episodes
-def search(query):
+def search(query, filterResult = False):
torrents = []
- url = "http://thepiratebay.org/search.php?video=on&q=%s" % quote(query)
- page = read_url(url)
- soup = BeautifulSoup(page)
- for row in soup('tr'):
- torrentType = row.findAll('td', {'class': 'vertTh'})
- if torrentType:
- torrentType = torrentType[0]('a')[0].get('href').split('/')[-1]
- # 201 = Movies , 202 = Movie DVDR
- if torrentType in ['201']:
- torrent = row.findAll('a', {'href':re.compile('.torrent$')})[0].get('href')
- torrents.append(torrent)
+ next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
+ page_count = 1
+ while next and page_count < 4:
+ page_count += 1
+ url = next[0]
+ if not url.startswith('http'):
+ if not url.startswith('/'):
+ url = "/" + url
+ url = "http://thepiratebay.org" + url
+ page = read_url(url)
+ soup = BeautifulSoup(page)
+ for row in soup('tr'):
+ torrentType = row.findAll('td', {'class': 'vertTh'})
+ if torrentType:
+ torrentType = torrentType[0]('a')[0].get('href').split('/')[-1]
+ # 201 = Movies , 202 = Movie DVDR
+ if torrentType in ['201']:
+ torrent = row.findAll('a', {'href':re.compile('.torrent$')})[0].get('href')
+ if filterResult:
+ if torrentsWeLike(torrent):
+ torrents.append(torrent)
+ else:
+ torrents.append(torrent)
+ next = re.compile('.*?next.gif.*?').findall(page)
return torrents
def searchByImdb(imdb):