From 1b93ae048d93b7f4a60ed79e5efafd8787f8272f Mon Sep 17 00:00:00 2001
From: j <j@0xdb.org>
Date: Mon, 5 May 2008 20:33:23 +0200
Subject: [PATCH] ox gets some tests too

---
 README             |  3 +++
 ox/dailymotion.py  | 10 +++++++---
 ox/mininova.py     | 15 ++++++++++++---
 ox/thepiratebay.py | 29 ++++++++++++++++++++---------
 ox/torrent.py      | 12 ++++++++----
 5 files changed, 50 insertions(+), 19 deletions(-)
diff --git a/README b/README
index c5a74e1..74076a9 100644
--- a/README
+++ b/README
@@ -6,3 +6,6 @@ Depends:
  python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
  python-feedparser (http://www.feedparser.org/)
 
+
+Test:
+ nosetests --with-doctest ox
diff --git a/ox/dailymotion.py b/ox/dailymotion.py
index cb5b576..1dafa75 100644
--- a/ox/dailymotion.py
+++ b/ox/dailymotion.py
@@ -3,6 +3,13 @@ from urllib import unquote
 from oxutils.cache import getUrl
 
 def getVideoUrl(url):
+  '''
+  >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
+  'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
+
+  >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
+  'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
+  '''
   data = getUrl(url)
   video = re.compile('''video", "(.*?)"''').findall(data)
   for v in video:
@@ -10,6 +17,3 @@ def getVideoUrl(url):
    return "http://www.dailymotion.com" + v
   return ''
 
-if __name__ == '__main__':
-  print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
-  print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
diff --git a/ox/mininova.py b/ox/mininova.py
index fa64b6f..03ea512 100644
--- a/ox/mininova.py
+++ b/ox/mininova.py
@@ -6,9 +6,10 @@ from datetime import datetime
 import re
 import socket
 from urllib import quote
+import sha
 
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 
 from torrent import Torrent
@@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
   return _parseResultsPage(data)
 
 def getId(mininovaId):
+  mininovaId = unicode(mininovaId)
   d = findRegexp(mininovaId, "/(\d+)")
   if d:
     return d
@@ -80,15 +82,22 @@ def getData(mininovaId):
   torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
   torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
   if torrent['description']:
-    torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
+    torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
   t = getUrl(torrent[u'torrent_link'])
   torrent[u'torrent_info'] = getTorrentInfo(t)
   return torrent
 
-
 class Mininova(Torrent):
+  '''
+  >>> Mininova('123')
+  {}
+  >>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
+  'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
+  '''
   def __init__(self, mininovaId):
     self.data = getData(mininovaId)
+    if not self.data:
+      return
     Torrent.__init__(self)
     ratio = self.data['share ratio'].split(',')
     self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py
index 1d85f73..897c4f8 100644
--- a/ox/thepiratebay.py
+++ b/ox/thepiratebay.py
@@ -7,9 +7,10 @@ import re
 import socket
 from urllib import quote, urlencode
 from urllib2 import URLError
+import sha
 
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 
 from torrent import Torrent
@@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
 season_episode = re.compile("S..E..", re.IGNORECASE)
 
 
-def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
+def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
   headers = cache.DEFAULT_HEADERS
   headers['Cookie'] = 'language=en_EN'
   return cache.getUrl(url, data, headers, timeout)
 
-def getUrlUnicode(url):
- return cache.getUrlUnicode(url, _getUrl=getUrl)
+def _getUrlUnicode(url):
+ return cache.getUrlUnicode(url, _getUrl=_getUrl)
 
 def findMovies(query, max_results=10):
   results = []
@@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
       if not url.startswith('/'):
         url = "/" + url
       url = "http://thepiratebay.org" + url
-    data = getUrlUnicode(url)
+    data = _getUrlUnicode(url)
     regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
     for row in  re.compile(regexp, re.DOTALL).findall(data):
       torrentType = row[0]
@@ -79,7 +80,7 @@ def getData(piratebayId):
   torrent[u'domain'] = 'thepiratebay.org'
   torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
 
-  data = getUrlUnicode(torrent['comment_link'])
+  data = _getUrlUnicode(torrent['comment_link'])
   torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
   if not torrent[u'title']:
     return None
@@ -91,15 +92,25 @@ def getData(piratebayId):
     key = _key_map.get(key, key)
     value = decodeHtml(stripTags(d[1].strip()))
     torrent[key] = value
-  torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
-  t = getUrl(torrent[u'torrent_link'])
+  torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
+  if torrent[u'description']:
+    torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
+  t = _getUrl(torrent[u'torrent_link'])
   torrent[u'torrent_info'] = getTorrentInfo(t)
   return torrent
 
-
 class Thepiratebay(Torrent):
+  '''
+  >>> Thepiratebay('123')
+  {}
+
+  >>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
+  'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
+  '''
   def __init__(self, piratebayId):
     self.data = getData(piratebayId)
+    if not self.data:
+      return
     Torrent.__init__(self)
     published =  self.data['uploaded']
     published = published.replace(' GMT', '').split(' +')[0]
diff --git a/ox/torrent.py b/ox/torrent.py
index 21e3df1..785f604 100644
--- a/ox/torrent.py
+++ b/ox/torrent.py
@@ -6,12 +6,16 @@ from oxutils import intValue
 
 
 class Torrent(dict):
+  '''
+  >>> Torrent()
+  {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
+  '''
   _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', 
                  'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
   _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
   _dict_keys = ('torrent_info', )
   _list_keys = ()
-  data = {}
+  data = {'torrent_info': {}}
 
   def __init__(self):
     for key in self._string_keys:
@@ -25,9 +29,9 @@ class Torrent(dict):
       if not isinstance(value, int):
         value = int(intValue(value))
       self[key] = value
-    self['infohash'] = self.data['torrent_info']['hash']
-    self['size'] = self.data['torrent_info']['size']
-    self['announce'] = self.data['torrent_info']['announce']
+    self['infohash'] = self.data['torrent_info'].get('hash', '')
+    self['size'] = self.data['torrent_info'].get('size', -1)
+    self['announce'] = self.data['torrent_info'].get('announce', '')
     if 'files' in self.data['torrent_info']:
       self['files'] = len(self.data['torrent_info']['files'])
     else: