scrapeit/scrapeit/mininova.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import socket
from urllib import quote

from BeautifulSoup import BeautifulSoup

from utils  import read_url, read_url_utf8
from btutils import torrentsWeLike

socket.setdefaulttimeout(10.0)

def search(query, filterResult = False):
  '''search for torrents on mininova
  '''
  torrents = []
  url = "http://www.mininova.org/search/%s/seeds" % quote(query)
  page = read_url(url)
  soup = BeautifulSoup(page)
  for row in soup('tr'):
    links = row('a', {'href':re.compile('/tor')})
    if links:
      torrent_url = "http://www.mininova.org%s" % links[0].get('href').replace('/tor', '/get')
      if filterResult:
        if torrentsWeLike(links[0]):
          torrents.append(torrent_url)
      else:
        torrents.append(torrent_url)

  return torrents

def searchByImdb(imdb):
  '''search for torrents on mininova by imdb
  '''
  torrents = []
  page = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdb)
  soup = BeautifulSoup(page)
  for row in soup('tr'):
    #filter private trackers
    private_tracker = row('a', {'href':re.compile('/faq/#pt')})
    links = row('a', {'href':re.compile('/tor')})
    if not private_tracker and links:
      torrent = links[0]
      if torrentsWeLike(unicode(torrent.contents[0])):
        torrent_url = "http://www.mininova.org%s" % torrent.get('href').replace('/tor', '/get')
        torrents.append(torrent_url)
  return torrents