From 8e8f8f389696658e68c1e35daf5f6ef5a9fc284d Mon Sep 17 00:00:00 2001
From: j <j@0xdb.org>
Date: Wed, 7 May 2008 11:45:00 +0200
Subject: [PATCH] findRegexp -> findRe, update setup.py

---
 ox/imdb.py         | 33 +++++++++++++++++----------------
 ox/mininova.py     | 10 +++++-----
 ox/thepiratebay.py | 10 +++++-----
 setup.py           | 17 ++++++++++-------
 4 files changed, 37 insertions(+), 33 deletions(-)
diff --git a/ox/imdb.py b/ox/imdb.py
index d31afde..926b2dc 100644
--- a/ox/imdb.py
+++ b/ox/imdb.py
@@ -12,7 +12,7 @@ import time
 from BeautifulSoup import BeautifulSoup
 import chardet
 import oxutils
-from oxutils import stripTags, htmldecode, findRegexp, findString
+from oxutils import stripTags, htmldecode, findRe, findString
 from oxutils.cache import getUrl, getUrlUnicode
 from oxutils.normalize import normalizeTitle, normalizeImdbId
 
@@ -57,7 +57,7 @@ def getMovieInfo(imdbId):
   data = getUrl(getUrlBase(imdbId))
   soup = BeautifulSoup(data)
   info = dict()
-  info['poster'] = findRegexp(data, 'name="poster".*?<img .*?src="(.*?)"')
+  info['poster'] = findRe(data, 'name="poster".*?<img .*?src="(.*?)"')
 
   for i in re.compile('<h5>(.*?):</h5>(.*?)<div class="info"', re.DOTALL).findall(data):
     title = stripTags(i[0]).strip().lower()
@@ -93,11 +93,12 @@ def getMovieInfo(imdbId):
     html_title = html_title.replace('<br />', ' ').replace('  ', ' ')
     title = htmldecode(html_title)
     title = stripTags(title)
-    year = findRegexp(title, '\((\d{4})\)')
+    year = findRe(title, '\((\d{4})\)')
     if not year:
-      year = findRegexp(title, '\((\d{4})')
-    title = re.sub('\(\d{4}\)', '', title)
-    title = re.sub('\(\d{4}/I*\)', '', title)
+      year = findRe(title, '\((\d{4})')
+    _y = findRe(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)')
+    if _y:
+      title = title.replace(_y, '')
     for t in ('TV series', 'TV-Series', 'TV mini-series', '(mini)', '(VG)', '(V)', '(TV)'):
       title = title.replace(t, '')
   title = title.strip()
@@ -109,14 +110,14 @@ def getMovieInfo(imdbId):
   info['year'] = year
   '''
   #Rating
-  rating = findRegexp(data, '<b>(.*?)/10</b>')
+  rating = findRe(data, '<b>(.*?)/10</b>')
   if rating:
     info['rating'] = int(float(rating) * 1000)
   else:
     info['rating'] = -1
 
   #Votes
-  votes = findRegexp(data, '<small>\(<a href="ratings">(.*?) votes</a>\)</small>')
+  votes = findRe(data, '<small>\(<a href="ratings">(.*?) votes</a>\)</small>')
   if votes:
     info['votes'] = int(votes.replace(',', ''))
   else:
@@ -171,10 +172,10 @@ def getMovieTrailers(imdbId):
     for a in videos[0]('a'):
       title = stripTags(unicode(a)).strip()
       url = 'http://www.imdb.com' + a['href']
-      videoId = findRegexp(url, '/(vi\d*?)/')
+      videoId = findRe(url, '/(vi\d*?)/')
       iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId
       iframe = getUrlUnicode(iframeUrl)
-      videoUrl = unquote(findRegexp(iframe, 'addVariable\("file", "(.*?)"'))
+      videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"'))
       trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl})
   return trailers
 
@@ -260,7 +261,7 @@ def getMovieConnections(imdbId):
     cs = BeautifulSoup(c)
     if connection:
       #relation -> list of imdb ids
-      connections[connection] = [findRegexp(a.get('href'), "\d{7}") for a in cs('a', {'href': re.compile('/title/tt')})]
+      connections[connection] = [findRe(a.get('href'), "\d{7}") for a in cs('a', {'href': re.compile('/title/tt')})]
   return connections
 
 def getMovieKeywords(imdbId):
@@ -315,11 +316,11 @@ class IMDb:
       value = unicode(value, 'utf-8')
       value = stripTags(value).strip()
     if key == 'runtime':
-      parsed_value = findRegexp(value, '(.*?) min')
-      parsed_value = findRegexp(parsed_value, '([0-9]+)')
+      parsed_value = findRe(value, '(.*?) min')
+      parsed_value = findRe(parsed_value, '([0-9]+)')
       if not parsed_value:
-        parsed_value = findRegexp(value, '(.*?) sec')
-        parsed_value = findRegexp(parsed_value, '([0-9]+)')
+        parsed_value = findRe(value, '(.*?) sec')
+        parsed_value = findRe(parsed_value, '([0-9]+)')
         if not parsed_value:
           parsed_value = 0
         else:
@@ -598,7 +599,7 @@ def guess(title, director=''):
   if return_url.startswith('http://www.imdb.com/title/tt'):
     return return_url[28:35]
   if data:
-    imdb_id = findRegexp(data.replace('\n', ' '), 'Popular Results.*?<ol><li>.*?<a href="/title/tt(.......)')
+    imdb_id = findRe(data.replace('\n', ' '), 'Popular Results.*?<ol><li>.*?<a href="/title/tt(.......)')
     if imdb_id:
       return imdb_id
 
diff --git a/ox/mininova.py b/ox/mininova.py
index 03ea512..176375d 100644
--- a/ox/mininova.py
+++ b/ox/mininova.py
@@ -9,7 +9,7 @@ from urllib import quote
 import sha
 
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
+from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 
 from torrent import Torrent
@@ -47,7 +47,7 @@ def findMovieByImdb(imdbId):
 
 def getId(mininovaId):
   mininovaId = unicode(mininovaId)
-  d = findRegexp(mininovaId, "/(\d+)")
+  d = findRe(mininovaId, "/(\d+)")
   if d:
     return d
   mininovaId = mininovaId.split('/')
@@ -78,9 +78,9 @@ def getData(mininovaId):
     value = decodeHtml(stripTags(d[1].strip()))
     torrent[key] = value
 
-  torrent[u'title'] = findRegexp(data, '<title>(.*?):.*?</title>')
-  torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
-  torrent[u'description'] = findRegexp(data, '<div id="description">(.*?)</div>')
+  torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
+  torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
+  torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
   if torrent['description']:
     torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
   t = getUrl(torrent[u'torrent_link'])
diff --git a/ox/thepiratebay.py b/ox/thepiratebay.py
index 897c4f8..b5ea4c0 100644
--- a/ox/thepiratebay.py
+++ b/ox/thepiratebay.py
@@ -10,7 +10,7 @@ from urllib2 import URLError
 import sha
 
 from oxutils.cache import getUrl, getUrlUnicode
-from oxutils import findRegexp, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
+from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
 from oxutils.normalize import normalizeImdbId
 
 from torrent import Torrent
@@ -61,7 +61,7 @@ def getId(piratebayId):
     piratebayId = piratebayId.split('org/')[1]
   if 'tor/' in piratebayId:
     piratebayId = piratebayId.split('tor/')[1]
-  d = findRegexp(piratebayId, "/(\d+)")
+  d = findRe(piratebayId, "/(\d+)")
   if d:
     piratebayId = d
   return piratebayId
@@ -81,18 +81,18 @@ def getData(piratebayId):
   torrent[u'comment_link'] = 'http://thepiratebay.org/tor/%s' % piratebayId
 
   data = _getUrlUnicode(torrent['comment_link'])
-  torrent[u'title'] = findRegexp(data, '<title>(.*?) \(download torrent\) - TPB</title>')
+  torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
   if not torrent[u'title']:
     return None
   torrent[u'title'] = decodeHtml(torrent[u'title']).strip()
-  torrent[u'imdbId'] = findRegexp(data, 'title/tt(\d{7})')
+  torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
   torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, quote(torrent['title']))
   for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
     key = d[0].lower().strip()
     key = _key_map.get(key, key)
     value = decodeHtml(stripTags(d[1].strip()))
     torrent[key] = value
-  torrent[u'description'] = findRegexp(data, '<div class="nfo">(.*?)</div>')
+  torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
   if torrent[u'description']:
     torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
   t = _getUrl(torrent[u'torrent_link'])
diff --git a/setup.py b/setup.py
index 4877b36..4840537 100644
--- a/setup.py
+++ b/setup.py
@@ -8,16 +8,19 @@ import os
 setup(
   name="ox",
   version="0.1",
-
-  # uncomment the following lines if you fill them out in release.py
   description="collection of scrapers for various websites",
-  author="bot",
-  author_email="bot@0xdb.org",
-  url="http://ox.0xdb.org",
-  download_url="http://ox.0xdb.org/download",
-  license="GPL",
+  author="0x",
+  author_email="code@0xdb.org",
+  url="http://code.0xdb.org/ox",
+  download_url="http://code.0xdb.org/ox/download",
+  license="GPLv3",
   packages=find_packages(),
   zip_safe=False,
+  install_requires=[
+        'oxutils',
+        'feedparser',
+        'beautifulsoup',
+  ],
   keywords = [
   ],
   classifiers = [