ox gets some tests too

2008-05-05 20:33:23 +02:00 · 2008-05-05 20:33:23 +02:00 · 1b93ae048d
commit 1b93ae048d
parent b3d3f44d20
5 changed files with 50 additions and 19 deletions
--- a/3
+++ b/3
@ -6,3 +6,6 @@ Depends:
 python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
 python-feedparser (http://www.feedparser.org/)
 Test:
 nosetests --with-doctest ox
--- a/ox/dailymotion.py
+++ b/ox/dailymotion.py
@ -3,6 +3,13 @@ from urllib import unquote
 from oxutils.cache import getUrl
 def getVideoUrl(url):
  '''
  >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
  'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv?key=0a710ad6ffbfe980b1252569d16f957313399d0'
  >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
  'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv?key=08a18365ca6962c5ff7526f69872c36813399d4'
  '''
  data = getUrl(url)
  video = re.compile('''video", "(.*?)"''').findall(data)
  for v in video:
@ -10,6 +17,3 @@ def getVideoUrl(url):
   return "http://www.dailymotion.com" + v
  return ''
 if __name__ == '__main__':
  print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms')
  print getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms')
--- a/ox/mininova.py
+++ b/ox/mininova.py
@ -6,9 +6,10 @@ from datetime import datetime
 import re
 import socket
 from urllib import quote
 import sha
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 from torrent import Torrent
@ -45,6 +46,7 @@ def findMovieByImdb(imdbId):
  return _parseResultsPage(data)
 def getId(mininovaId):
  mininovaId = unicode(mininovaId)
  d = findRegexp(mininovaId, "/(\d+)")
  if d:
    return d
@ -80,15 +82,22 @@ def getData(mininovaId):
  torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
  torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
  if torrent['description']:
-    torrent['description'] = decodeHtml(stripTags(torrent['description'])).strip()
+    torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
  t = getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent
 class Mininova(Torrent):
  '''
  >>> Mininova('123')
  {}
  >>> sha.sha(unicode(Mininova('1072195'))).hexdigest()
  'ec98268a0aeaef8292f7bcf3585d0bc3910b3fac'
  '''
  def __init__(self, mininovaId):
    self.data = getData(mininovaId)
    if not self.data:
      return
    Torrent.__init__(self)
    ratio = self.data['share ratio'].split(',')
    self['seeder'] = int(intValue(ratio[0].replace(',','').strip()))
--- a/ox/thepiratebay.py
+++ b/ox/thepiratebay.py
@ -7,9 +7,10 @@ import re
 import socket
 from urllib import quote, urlencode
 from urllib2 import URLError
 import sha
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo
+from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 from torrent import Torrent
@ -19,13 +20,13 @@ socket.setdefaulttimeout(10.0)
 season_episode = re.compile("S..E..", re.IGNORECASE)
-def getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
+def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout):
  headers = cache.DEFAULT_HEADERS
  headers['Cookie'] = 'language=en_EN'
  return cache.getUrl(url, data, headers, timeout)
-def getUrlUnicode(url):
+def _getUrlUnicode(url):
- return cache.getUrlUnicode(url, _getUrl=getUrl)
+ return cache.getUrlUnicode(url, _getUrl=_getUrl)
 def findMovies(query, max_results=10):
  results = []
@ -38,7 +39,7 @@ def findMovies(query, max_results=10):
      if not url.startswith('/'):
        url = "/" + url
      url = "http://thepiratebay.org" + url
-    data = getUrlUnicode(url)
+    data = _getUrlUnicode(url)
    regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/tor/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
    for row in  re.compile(regexp, re.DOTALL).findall(data):
      torrentType = row[0]
@ -79,7 +80,7 @@ def getData(piratebayId):
  torrent[u'domain'] = 'thepiratebay.org'
  torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
-  data = getUrlUnicode(torrent['comment_link'])
+  data = _getUrlUnicode(torrent['comment_link'])
  torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
  if not torrent[u'title']:
    return None
@ -91,15 +92,25 @@ def getData(piratebayId):
    key = _key_map.get(key, key)
    value = decodeHtml(stripTags(d[1].strip()))
    torrent[key] = value
-  torrent[u'description'] = decodeHtml(stripTags(findRegexp(data, '<div class="nfo">(.*?)</div>'))).strip()
+  torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
-  t = getUrl(torrent[u'torrent_link'])
+  if torrent[u'description']:
    torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
  t = _getUrl(torrent[u'torrent_link'])
  torrent[u'torrent_info'] = getTorrentInfo(t)
  return torrent
 class Thepiratebay(Torrent):
  '''
  >>> Thepiratebay('123')
  {}
  >>> sha.sha(unicode(Thepiratebay('3951349'))).hexdigest()
  'ef64e438e3eef6e6a05cac4eea56b9f0289d3f22'
  '''
  def __init__(self, piratebayId):
    self.data = getData(piratebayId)
    if not self.data:
      return
    Torrent.__init__(self)
    published =  self.data['uploaded']
    published = published.replace(' GMT', '').split(' +')[0]
--- a/ox/torrent.py
+++ b/ox/torrent.py
@ -6,12 +6,16 @@ from oxutils import intValue
 class Torrent(dict):
  '''
  >>> Torrent()
  {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
  '''
  _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', 
                 'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
  _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
  _dict_keys = ('torrent_info', )
  _list_keys = ()
-  data = {}
+  data = {'torrent_info': {}}
  def __init__(self):
    for key in self._string_keys:
@ -25,9 +29,9 @@ class Torrent(dict):
      if not isinstance(value, int):
        value = int(intValue(value))
      self[key] = value
-    self['infohash'] = self.data['torrent_info']['hash']
+    self['infohash'] = self.data['torrent_info'].get('hash', '')
-    self['size'] = self.data['torrent_info']['size']
+    self['size'] = self.data['torrent_info'].get('size', -1)
-    self['announce'] = self.data['torrent_info']['announce']
+    self['announce'] = self.data['torrent_info'].get('announce', '')
    if 'files' in self.data['torrent_info']:
      self['files'] = len(self.data['torrent_info']['files'])
    else: