From 16eeaf8b25acaa97e67ea3cf0a91ec1c5ef759c6 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 12 Oct 2009 13:47:43 +0200 Subject: [PATCH] depend on ox, install as ox.web, migrate getUrl to readUrl --- README | 4 +- requirements.txt | 2 +- setup.py | 4 +- {oxweb => web}/__init__.py | 2 +- {oxweb => web}/allmovie.py | 12 +++--- {oxweb => web}/auth.py | 2 - {oxweb => web}/criterion.py | 20 +++++----- {oxweb => web}/dailymotion.py | 4 +- {oxweb => web}/epguides.py | 6 +-- {oxweb => web}/google.py | 12 +++--- {oxweb => web}/imdb.py | 64 ++++++++++++++++---------------- {oxweb => web}/impawards.py | 18 ++++----- {oxweb => web}/itunes.py | 16 ++++---- {oxweb => web}/karagarga.py | 20 +++++----- {oxweb => web}/lyricsfly.py | 10 ++--- {oxweb => web}/metacritic.py | 8 ++-- {oxweb => web}/mininova.py | 18 ++++----- {oxweb => web}/movieposterdb.py | 8 ++-- {oxweb => web}/opensubtitles.py | 16 ++++---- {oxweb => web}/oxdb.py | 2 +- {oxweb => web}/piratecinema.py | 4 +- {oxweb => web}/rottentomatoes.py | 10 ++--- {oxweb => web}/spiegel.py | 22 +++++------ {oxweb => web}/thepiratebay.py | 24 ++++++------ {oxweb => web}/torrent.py | 2 +- {oxweb => web}/tv.py | 6 +-- {oxweb => web}/wikipedia.py | 13 +++---- {oxweb => web}/youtube.py | 12 +++--- 28 files changed, 169 insertions(+), 172 deletions(-) rename {oxweb => web}/__init__.py (84%) rename {oxweb => web}/allmovie.py (88%) rename {oxweb => web}/auth.py (97%) rename {oxweb => web}/criterion.py (85%) rename {oxweb => web}/dailymotion.py (93%) rename {oxweb => web}/epguides.py (93%) rename {oxweb => web}/google.py (80%) rename {oxweb => web}/imdb.py (95%) rename {oxweb => web}/impawards.py (85%) rename {oxweb => web}/itunes.py (95%) rename {oxweb => web}/karagarga.py (89%) rename {oxweb => web}/lyricsfly.py (78%) rename {oxweb => web}/metacritic.py (89%) rename {oxweb => web}/mininova.py (88%) rename {oxweb => web}/movieposterdb.py (89%) rename {oxweb => web}/opensubtitles.py (75%) rename {oxweb => web}/oxdb.py (90%) rename {oxweb => web}/piratecinema.py (80%) rename {oxweb => web}/rottentomatoes.py (83%) rename {oxweb => web}/spiegel.py (96%) rename {oxweb => web}/thepiratebay.py (84%) rename {oxweb => web}/torrent.py (98%) rename {oxweb => web}/tv.py (91%) rename {oxweb => web}/wikipedia.py (92%) rename {oxweb => web}/youtube.py (93%) diff --git a/README b/README index bad1382..d44d4e1 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ python-oxweb the internet is a dict Depends: python2.5 - python-oxlib (bzr branch http://code.0xdb.org/python-oxlib) + python-ox (bzr branch http://code.0xdb.org/python-ox) python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/) python-feedparser (http://www.feedparser.org/) (there seam to be some issues if not using the one from ubuntu/debian) @@ -17,4 +17,4 @@ Install: } Test: - nosetests --with-doctest oxweb + nosetests --with-doctest web diff --git a/requirements.txt b/requirements.txt index 67f19ac..3fec7b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -oxlib +ox diff --git a/setup.py b/setup.py index 75ef72e..7435af9 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,8 @@ setup( url="http://code.0xdb.org/oxweb", download_url="http://code.0xdb.org/oxweb/download", license="GPLv3", - packages=['oxweb'], - zip_safe=False, + package_dir = {'ox.web': 'web'}, + packages=['ox.web'], keywords = [ ], classifiers = [ diff --git a/oxweb/__init__.py b/web/__init__.py similarity index 84% rename from oxweb/__init__.py rename to web/__init__.py index ac2b910..1b94046 100644 --- a/oxweb/__init__.py +++ b/web/__init__.py @@ -1,6 +1,6 @@ # vi:si:et:sw=4:sts=4:ts=4 # encoding: utf-8 -__version__ = '0.1.0' +__version__ = '1.0.0' import imdb import wikipedia diff --git a/oxweb/allmovie.py b/web/allmovie.py similarity index 88% rename from oxweb/allmovie.py rename to web/allmovie.py index 3b0295b..b189645 100644 --- a/oxweb/allmovie.py +++ b/web/allmovie.py @@ -3,8 +3,8 @@ import re import time -from oxlib import stripTags, findRe -from oxlib.cache import getUrlUnicode +from ox import stripTags, findRe +from ox.cache import readUrlUnicode def getId(url): @@ -24,7 +24,7 @@ def getData(id): data = { "url": getUrl(id) } - html = getUrlUnicode(data["url"]) + html = readUrlUnicode(data["url"]) data['aka'] = parseList(html, 'AKA') data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)') data['countries'] = parseList(html, 'Countries') @@ -42,11 +42,11 @@ def getData(id): data['themes'] = parseList(html, 'Themes') data['types'] = parseList(html, 'Types') data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"') - html = getUrlUnicode("http://allmovie.com/work/%s/cast" % id) + html = readUrlUnicode("http://allmovie.com/work/%s/cast" % id) data['cast'] = parseTable(html) - html = getUrlUnicode("http://allmovie.com/work/%s/credits" % id) + html = readUrlUnicode("http://allmovie.com/work/%s/credits" % id) data['credits'] = parseTable(html) - html = getUrlUnicode("http://allmovie.com/work/%s/review" % id) + html = readUrlUnicode("http://allmovie.com/work/%s/review" % id) data['review'] = parseText(html, 'Review') return data diff --git a/oxweb/auth.py b/web/auth.py similarity index 97% rename from oxweb/auth.py rename to web/auth.py index 0f360df..fdb283d 100644 --- a/oxweb/auth.py +++ b/web/auth.py @@ -4,8 +4,6 @@ import os import simplejson -import oxlib - def get(key): user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json')) diff --git a/oxweb/criterion.py b/web/criterion.py similarity index 85% rename from oxweb/criterion.py rename to web/criterion.py index a5074be..c204360 100644 --- a/oxweb/criterion.py +++ b/web/criterion.py @@ -2,10 +2,10 @@ # vi:si:et:sw=4:sts=4:ts=4 import re -import oxlib.cache -from oxlib.cache import getUrlUnicode -from oxlib.html import stripTags -from oxlib.text import findRe, removeSpecialCharacters +import ox.cache +from ox.cache import readUrlUnicode +from ox.html import stripTags +from ox.text import findRe, removeSpecialCharacters import imdb @@ -30,9 +30,9 @@ def getData(id): "url": getUrl(id) } try: - html = getUrlUnicode(data["url"]) + html = readUrlUnicode(data["url"]) except: - html = oxlib.cache.getUrl(data["url"]) + html = ox.cache.getUrl(data["url"]) data["number"] = findRe(html, "

(.*?)

") data["title"] = findRe(html, "

(.*?)

") data["director"] = findRe(html, "

(.*?)

") @@ -48,7 +48,7 @@ def getData(id): if not "/boxsets/" in result: data["posters"] = [result] else: - html_ = getUrlUnicode(result) + html_ = readUrlUnicode(result) result = findRe(html_, "(.*?)" % id) result = findRe(result, "src=\"(.*?)\"") data["posters"] = [result.replace("_w100", "")] @@ -64,7 +64,7 @@ def getData(id): def getIds(): ids = [] - html = getUrlUnicode("http://www.criterion.com/library/dvd") + html = readUrlUnicode("http://www.criterion.com/library/dvd") results = re.compile("page=(.*?)\"").findall(html) pages = int(results[len(results) - 2]) for page in range(pages, 0, -1): @@ -74,13 +74,13 @@ def getIds(): def getIdsByPage(page): ids = [] - html = getUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page) + html = readUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page) results = re.compile("films/(.*?)\"").findall(html) for result in results: ids.append(result) results = re.compile("boxsets/(.*?)\"").findall(html) for result in results: - html = getUrlUnicode("http://www.criterion.com/boxsets/" + result) + html = readUrlUnicode("http://www.criterion.com/boxsets/" + result) results = re.compile("films/(.*?)\"").findall(html) for result in results: ids.append(result) diff --git a/oxweb/dailymotion.py b/web/dailymotion.py similarity index 93% rename from oxweb/dailymotion.py rename to web/dailymotion.py index c3bc2e7..63e09dc 100644 --- a/oxweb/dailymotion.py +++ b/web/dailymotion.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 import re from urllib import unquote -from oxlib.cache import getUrl +from ox.cache import readUrl def getVideoUrl(url): @@ -13,7 +13,7 @@ def getVideoUrl(url): >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0] 'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv' ''' - data = getUrl(url) + data = readUrl(url) video = re.compile('''video", "(.*?)"''').findall(data) for v in video: v = unquote(v).split('@@')[0] diff --git a/oxweb/epguides.py b/web/epguides.py similarity index 93% rename from oxweb/epguides.py rename to web/epguides.py index f4cda49..d4ad1aa 100644 --- a/oxweb/epguides.py +++ b/web/epguides.py @@ -3,8 +3,8 @@ import re import time -from oxlib import stripTags, findRe -from oxlib.cache import getUrlUnicode +from ox import stripTags, findRe +from ox.cache import readUrlUnicode import google @@ -21,7 +21,7 @@ def getShowUrl(title): return None def getShowData(url): - data = getUrlUnicode(url) + data = readUrlUnicode(url) r = {} r['title'] = stripTags(findRe(data, '

(.*?)

')) r['imdb'] = findRe(data, '

.*?

') diff --git a/oxweb/google.py b/web/google.py similarity index 80% rename from oxweb/google.py rename to web/google.py index 2362cba..9c61b19 100644 --- a/oxweb/google.py +++ b/web/google.py @@ -10,8 +10,8 @@ import Queue import simplejson -import oxlib -from oxlib import stripTags +import ox +from ox import stripTags ''' @@ -30,15 +30,15 @@ FIXME: how search depper than first page? DEFAULT_MAX_RESULTS = 10 DEFAULT_TIMEOUT = 24*60*60 -def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT): - return oxlib.cache.getUrl(url, data, headers, timeout) +def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT): + return ox.cache.readUrl(url, data, headers, timeout) def quote_plus(s): return urllib.quote_plus(s.encode('utf-8')) def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT): url = "http://www.google.com/search?q=%s" % quote_plus(query) - data = getUrl(url, timeout=timeout) + data = readUrl(url, timeout=timeout) link_re = r'(?P.*?)' + \ r'.*?(?:
|)' + \ r'(?P.*?)' + '(?:|(.*?)') titles = re.compile("td>(.*?)\n\n(.*)").findall(titles) return titles @@ -268,7 +268,7 @@ def creditList(data, section=None): def getMovieCredits(imdbId): credits = dict() url = "%sfullcredits" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) groups = data.split('
') for g in groups: section = re.compile('''name="(.*?)".*? href="/Glossary''').findall(g) @@ -278,7 +278,7 @@ def getMovieCredits(imdbId): def getMovieTrailers(imdbId): url = "%strailers" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) soup = BeautifulSoup(data) videos = soup('div', {'class':"video-gallery"}) trailers = [] @@ -288,27 +288,27 @@ def getMovieTrailers(imdbId): url = 'http://www.imdb.com' + a['href'] videoId = findRe(url, '/(vi\d*?)/') iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId - iframe = getUrlUnicode(iframeUrl) + iframe = readUrlUnicode(iframeUrl) videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"')) trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl}) return trailers def getMovieQuotes(imdbId): url = "%squotes" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) quotes = re.compile('(.*?):(.*?)
', re.DOTALL).findall(findString(data, '(.*?)').split('

')[0] return plot.strip() def getMovieTechnical(imdbId): url = "%stechnical" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) results = {} for t in re.compile('
(.*?)
(.*?)
', re.DOTALL).findall(data): results[t[0].strip()] = t[1].strip() @@ -316,7 +316,7 @@ def getMovieTechnical(imdbId): def getMovieCompanyCredits(imdbId): url = "%scompanycredits" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) results = {} for field, c in re.compile('

(.*?)

    (.*?)
').findall(data): results[field.strip()] = [] @@ -326,7 +326,7 @@ def getMovieCompanyCredits(imdbId): def getMovieLocations(imdbId): url = "%slocations" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) locations = re.compile('
(.*?)').findall(data) def clean(t): t = decodeHtml(t) @@ -371,7 +371,7 @@ def getMovieTrivia(imdbId): def getMovieConnections(imdbId): url = "%smovieconnections" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) connections={} for c in re.compile('''
(.*?)
(.*?)\n\n''', re.DOTALL).findall(data): connections[unicode(c[0])] = re.compile('''
''').findall(c[1]) @@ -379,7 +379,7 @@ def getMovieConnections(imdbId): def getMovieKeywords(imdbId): url = "%skeywords" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) keywords = [] for keyword in re.compile('''(.*?)''').findall(data): keyword = decodeHtml(keyword) @@ -389,7 +389,7 @@ def getMovieKeywords(imdbId): def getMovieExternalReviews(imdbId): url = "%sexternalreviews" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) _reviews = re.compile('
  • (.*?)
  • ').findall(data) reviews = {} for r in _reviews: @@ -430,7 +430,7 @@ def _parseDate(d): def getMovieReleaseDates(imdbId): url = "%sreleaseinfo" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) releasedates = [] regexp = '''(.*?).*?(.*?).*?(.*?)''' @@ -468,7 +468,7 @@ def getMovieFlimingDates(imdbId): def getMovieBusiness(imdbId): url = "%sbusiness" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) business = {} for r in re.compile('''
    (.*?)
    (.*?)
    .
    ''', re.DOTALL).findall(data): key = stripTags(r[0]).strip().lower() @@ -478,7 +478,7 @@ def getMovieBusiness(imdbId): def getMovieEpisodes(imdbId): url = "%sepisodes" % getUrlBase(imdbId) - data = getUrlUnicode(url) + data = readUrlUnicode(url) episodes = {} regexp = r'''

    Season (.*?), Episode (.*?): (.*?)

    (.*?)
    (.*?)
    ''' for r in re.compile(regexp, re.DOTALL).findall(data): @@ -514,7 +514,7 @@ class IMDb: self.pageUrl = getUrlBase(imdbId) def getPage(self): - return getUrlUnicode(self.pageUrl) + return readUrlUnicode(self.pageUrl) def parse_raw_value(self, key, value): if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'): @@ -682,10 +682,10 @@ def guess(title, director=''): search = 'site:imdb.com "%s"' % title for (name, url, desc) in google.find(search, 2): if url.startswith('http://www.imdb.com/title/tt'): - return normalizeImdbId(int(oxlib.intValue(url))) + return normalizeImdbId(int(ox.intValue(url))) try: - req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS) + req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS) u = urllib2.urlopen(req) data = u.read() return_url = u.url @@ -700,7 +700,7 @@ def guess(title, director=''): return imdb_id imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8')) - req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS) + req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS) u = urllib2.urlopen(req) data = u.read() return_url = u.url @@ -737,7 +737,7 @@ def getEpisodeData(title, episode, show_url = None): def getPersonData(imdbId): imdbId = normalizeImdbId(imdbId) url = u'http://www.imdb.com/name/nm%s/' % imdbId - data = getUrlUnicode(url) + data = readUrlUnicode(url) info = dict() info['name'] = findRe(data, u'(.*?)') filmo = data.split(u'

    Additional Details

    ')[0] diff --git a/oxweb/impawards.py b/web/impawards.py similarity index 85% rename from oxweb/impawards.py rename to web/impawards.py index d743223..7d027a4 100644 --- a/oxweb/impawards.py +++ b/web/impawards.py @@ -2,9 +2,9 @@ # encoding: utf-8 import re -from oxlib.cache import getUrlUnicode -from oxlib.html import stripTags -from oxlib.text import findRe +from ox.cache import readUrlUnicode +from ox.html import stripTags +from ox.text import findRe import imdb @@ -22,7 +22,7 @@ def getData(id): data = { 'url': getUrl(id) } - html = getUrlUnicode(data['url']) + html = readUrlUnicode(data['url']) data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?) ') data['title'] = stripTags(findRe(html, '

    (.*?) \(')) data['year'] = findRe(html, '\((.*?)\)') @@ -31,11 +31,11 @@ def getData(id): for result in results: result = result.replace('_xlg.html', '.html') url = 'http://www.impawards.com/%s/%s' % (data['year'], result) - html = getUrlUnicode(url) + html = readUrlUnicode(url) result = findRe(html, '')) + 1 for page in range(pages, 0, -1): for id in getIdsByPage(page): @@ -65,7 +65,7 @@ def getIds(): def getIdsByPage(page): ids = [] - html = getUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1) + html = readUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1) results = re.compile('', re.DOTALL).findall(html) for result in results: url = 'http://impawards.com/%s' % result @@ -74,7 +74,7 @@ def getIdsByPage(page): def getUrl(id): url = "http://www.impawards.com/%s.html" % id - html = getUrlUnicode(url) + html = readUrlUnicode(url) if findRe(html, "No Movie Posters on This Page"): url = "http://www.impawards.com/%s_ver1.html" % id return url diff --git a/oxweb/itunes.py b/web/itunes.py similarity index 95% rename from oxweb/itunes.py rename to web/itunes.py index ffabeff..5348e40 100644 --- a/oxweb/itunes.py +++ b/web/itunes.py @@ -3,10 +3,10 @@ import re import urllib -from oxlib.cache import getUrl -from oxlib.html import decodeHtml, stripTags -from oxlib.text import findRe -from oxlib.text import findString +from ox.cache import readUrl +from ox.html import decodeHtml, stripTags +from ox.text import findRe +from ox.text import findString # to sniff itunes traffic, use something like @@ -113,14 +113,14 @@ class ItunesAlbum: def getId(self): url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist}) - xml = getUrl(url, headers = ITUNES_HEADERS) + xml = readUrl(url, headers = ITUNES_HEADERS) id = findRe(xml, 'viewAlbum\?id=(.*?)&') return id def getData(self): data = {'id': self.id} url = composeUrl('viewAlbum', {'id': self.id}) - xml = getUrl(url, None, ITUNES_HEADERS) + xml = readUrl(url, None, ITUNES_HEADERS) data['albumName'] = findRe(xml, '(.*?)') data['artistName'] = findRe(xml, '(.*?)') data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"') @@ -144,14 +144,14 @@ class ItunesMovie: def getId(self): url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director}) - xml = getUrl(url, headers = ITUNES_HEADERS) + xml = readUrl(url, headers = ITUNES_HEADERS) id = findRe(xml, 'viewMovie\?id=(.*?)&') return id def getData(self): data = {'id': self.id} url = composeUrl('viewMovie', {'id': self.id}) - xml = getUrl(url, None, ITUNES_HEADERS) + xml = readUrl(url, None, ITUNES_HEADERS) f = open('/Users/rolux/Desktop/iTunesData.xml', 'w') f.write(xml) f.close() diff --git a/oxweb/karagarga.py b/web/karagarga.py similarity index 89% rename from oxweb/karagarga.py rename to web/karagarga.py index acbbebd..dfb667b 100644 --- a/oxweb/karagarga.py +++ b/web/karagarga.py @@ -1,24 +1,24 @@ import re -from oxlib import cache -from oxlib.html import stripTags -from oxlib.text import findRe +from ox import cache +from ox.html import stripTags +from ox.text import findRe import auth -def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): +def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): headers = headers.copy() headers["Cookie"] = auth.get("karagarga.cookie") - return cache.getUrl(url, data, headers, timeout) + return cache.readUrl(url, data, headers, timeout) -def getUrlUnicode(url, timeout=cache.cache_timeout): - return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout) +def readUrlUnicode(url, timeout=cache.cache_timeout): + return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout) def getData(id): data = { "url": getUrl(id) } - html = getUrlUnicode("%s%s" % (data["url"], "&filelist=1")) + html = readUrlUnicode("%s%s" % (data["url"], "&filelist=1")) if 'No torrent with ID' in html: return False data['added'] = stripTags(parseTable(html, 'Added')) @@ -87,7 +87,7 @@ def getId(url): return url.split("=")[-1] def getTorrent(id): - return _getUrl(getData(id)['torrent']) + return readUrl(getData(id)['torrent']) def getIds(lastId = 20): lastId = '%s' % lastId @@ -105,7 +105,7 @@ def getIds(lastId = 20): def getIdsByPage(page): ids = [] url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page - html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day + html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day strings = html.split('') strings.pop(0) for string in strings: diff --git a/oxweb/lyricsfly.py b/web/lyricsfly.py similarity index 78% rename from oxweb/lyricsfly.py rename to web/lyricsfly.py index 46c9545..6a9b58a 100644 --- a/oxweb/lyricsfly.py +++ b/web/lyricsfly.py @@ -1,15 +1,15 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from oxlib.cache import getUrl -from oxlib.html import decodeHtml -from oxlib.text import findRe +from ox.cache import readUrl +from ox.html import decodeHtml +from ox.text import findRe def getLyrics(title, artist): - html = getUrl('http://lyricsfly.com/api/') + html = readUrl('http://lyricsfly.com/api/') key = findRe(html, '(.*?)') url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title) - xml = getUrl(url) + xml = readUrl(url) lyrics = findRe(xml, '(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com') lyrics = lyrics.replace('\n', '').replace('\r', '') lyrics = lyrics.replace('[br]', '\n').strip() diff --git a/oxweb/metacritic.py b/web/metacritic.py similarity index 89% rename from oxweb/metacritic.py rename to web/metacritic.py index 220870c..34e20a3 100644 --- a/oxweb/metacritic.py +++ b/web/metacritic.py @@ -3,14 +3,14 @@ import re from urllib import quote -from oxlib.cache import getUrl, getUrlUnicode -from oxlib import findRe, decodeHtml, stripTags +from ox.cache import readUrl, readUrlUnicode +from ox import findRe, decodeHtml, stripTags def getMetacriticShowUrl(title): title = quote(title) url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title - data = getUrl(url) + data = readUrl(url) return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?') def getData(title, url=None): @@ -18,7 +18,7 @@ def getData(title, url=None): url = getMetacriticShowUrl(title) if not url: return None - data = getUrlUnicode(url) + data = readUrlUnicode(url) score = findRe(data, 'ALT="Metascore: (.*?)"') if score: score = int(score) diff --git a/oxweb/mininova.py b/web/mininova.py similarity index 88% rename from oxweb/mininova.py rename to web/mininova.py index 816904d..bfaa776 100644 --- a/oxweb/mininova.py +++ b/web/mininova.py @@ -5,10 +5,10 @@ import re import socket from urllib import quote -from oxlib.cache import getUrl, getUrlUnicode -from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines -from oxlib.normalize import normalizeImdbId -import oxlib +from ox.cache import readUrl, readUrlUnicode +from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines +from ox.normalize import normalizeImdbId +import ox from torrent import Torrent @@ -31,7 +31,7 @@ def findMovie(query, max_results=10): '''search for torrents on mininova ''' url = "http://www.mininova.org/search/%s/seeds" % quote(query) - data = getUrlUnicode(url) + data = readUrlUnicode(url) return _parseResultsPage(data, max_results) def findMovieByImdb(imdbId): @@ -39,7 +39,7 @@ def findMovieByImdb(imdbId): ''' results = [] imdbId = normalizeImdbId(imdbId) - data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId) + data = readUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId) return _parseResultsPage(data) def getId(mininovaId): @@ -55,7 +55,7 @@ def getId(mininovaId): def exists(mininovaId): mininovaId = getId(mininovaId) - data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId) + data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId) if not data or 'Torrent not found...' in data: return False if 'tracker of this torrent requires registration.' in data: @@ -74,7 +74,7 @@ def getData(mininovaId): torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId - data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link']) + data = readUrlUnicode(torrent['comment_link']) + readUrlUnicode(torrent['details_link']) if '

    Torrent not found...

    ' in data: return None @@ -89,7 +89,7 @@ def getData(mininovaId): torrent[u'description'] = findRe(data, '
    (.*?)
    ') if torrent['description']: torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip() - t = getUrl(torrent[u'torrent_link']) + t = readUrl(torrent[u'torrent_link']) torrent[u'torrent_info'] = getTorrentInfo(t) return torrent diff --git a/oxweb/movieposterdb.py b/web/movieposterdb.py similarity index 89% rename from oxweb/movieposterdb.py rename to web/movieposterdb.py index 85ee172..0068123 100644 --- a/oxweb/movieposterdb.py +++ b/web/movieposterdb.py @@ -3,8 +3,8 @@ import re -from oxlib.cache import getUrlUnicode -from oxlib import findRe +from ox.cache import readUrlUnicode +from ox import findRe def getData(id): ''' @@ -24,7 +24,7 @@ def getId(url): def getPostersByUrl(url, group=True): posters = [] - html = getUrlUnicode(url) + html = readUrlUnicode(url) if url in html: if group: results = re.compile('', re.DOTALL).findall(html) @@ -32,7 +32,7 @@ def getPostersByUrl(url, group=True): posters += getPostersByUrl(result, False) results = re.compile('', re.DOTALL).findall(html) for result in results: - html = getUrlUnicode(result) + html = readUrlUnicode(result) posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"')) return posters diff --git a/oxweb/opensubtitles.py b/web/opensubtitles.py similarity index 75% rename from oxweb/opensubtitles.py rename to web/opensubtitles.py index e0c0b4d..3d872ba 100644 --- a/oxweb/opensubtitles.py +++ b/web/opensubtitles.py @@ -3,9 +3,9 @@ import re import feedparser -from oxlib.cache import getUrl, getUrlUnicode -import oxlib -from oxlib import langCode2To3, langTo3Code +from ox.cache import readUrl, readUrlUnicode +import ox +from ox import langCode2To3, langTo3Code def findSubtitlesByImdb(imdb, parts = 1, language = "eng"): if len(language) == 2: @@ -16,7 +16,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"): if language: url += "sublanguageid-%s/" % language url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb) - data = getUrl(url) + data = readUrl(url) if "title>opensubtitles.com - search results(.*?)
    ') if '(' in r['title']: diff --git a/oxweb/spiegel.py b/web/spiegel.py similarity index 96% rename from oxweb/spiegel.py rename to web/spiegel.py index b345c43..abfe189 100644 --- a/oxweb/spiegel.py +++ b/web/spiegel.py @@ -6,9 +6,9 @@ import time from BeautifulSoup import BeautifulSoup -import oxlib.cache -from oxlib.html import decodeHtml, stripTags -import oxlib.net +import ox.cache +from ox.html import decodeHtml, stripTags +import ox.net def getNews(year, month, day): @@ -23,9 +23,9 @@ def getNews(year, month, day): for section in sections: url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day) if date == time.strftime('%d.%m.%Y', time.localtime()): - html = oxlib.net.getUrl(url) + html = ox.net.readUrl(url) else: - html = oxlib.cache.getUrl(url) + html = ox.cache.readUrl(url) for item in re.compile('
    (.*?)
    ', re.DOTALL).findall(item)[0]).strip() try: @@ -102,11 +102,11 @@ def formatSubsection(string): def getIssue(year, week): coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week) - if not oxlib.net.exists(coverUrl): + if not ox.net.exists(coverUrl): return None url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) contents = [] - soup = BeautifulSoup(oxlib.cache.getUrl(url)) + soup = BeautifulSoup(ox.cache.readUrl(url)) for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}): item = str(item) page = int(re.compile('&SE=(.*?)"').findall(item)[0]) @@ -116,7 +116,7 @@ def getIssue(year, week): pages = page + 2 for page in range(1, pages + 10): url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page) - if oxlib.cache.exists(url): + if ox.cache.exists(url): pageUrl[page] = url else: pageUrl[page] = '' @@ -164,7 +164,7 @@ def archiveIssues(): f.close() filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w) if not os.path.exists(filename): - data = oxlib.cache.getUrl(issue['coverUrl']) + data = ox.cache.readUrl(issue['coverUrl']) f = open(filename, 'w') f.write(data) f.close() @@ -173,7 +173,7 @@ def archiveIssues(): if url: filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page) if not os.path.exists(filename): - data = oxlib.cache.getUrl(url) + data = ox.cache.readUrl(url) f = open(filename, 'w') f.write(data) f.close() @@ -244,7 +244,7 @@ def archiveNews(): f.close() filename = dirname + '/' + new['imageUrl'].split('/')[-1] if not os.path.exists(filename): - data = oxlib.cache.getUrl(new['imageUrl']) + data = ox.cache.readUrl(new['imageUrl']) f = open(filename, 'w') f.write(data) f.close() diff --git a/oxweb/thepiratebay.py b/web/thepiratebay.py similarity index 84% rename from oxweb/thepiratebay.py rename to web/thepiratebay.py index d2e23b0..4202a4d 100644 --- a/oxweb/thepiratebay.py +++ b/web/thepiratebay.py @@ -6,10 +6,10 @@ import socket from urllib import quote, urlencode from urllib2 import URLError -from oxlib.cache import getUrl, getUrlUnicode -from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines -from oxlib.normalize import normalizeImdbId -import oxlib +from ox.cache import readUrl, readUrlUnicode +from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines +from ox.normalize import normalizeImdbId +import ox from torrent import Torrent @@ -18,13 +18,13 @@ cache_timeout = 24*60*60 # cache search only for 24 hours season_episode = re.compile("S..E..", re.IGNORECASE) -def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): +def _readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): headers = headers.copy() headers['Cookie'] = 'language=en_EN' - return cache.getUrl(url, data, headers, timeout) + return cache.readUrl(url, data, headers, timeout) -def _getUrlUnicode(url, timeout=cache.cache_timeout): - return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout) +def _readUrlUnicode(url, timeout=cache.cache_timeout): + return cache.readUrlUnicode(url, _readUrl=_readUrl, timeout=timeout) def findMovies(query, max_results=10): results = [] @@ -37,7 +37,7 @@ def findMovies(query, max_results=10): if not url.startswith('/'): url = "/" + url url = "http://thepiratebay.org" + url - data = _getUrlUnicode(url, timeout=cache_timeout) + data = _readUrlUnicode(url, timeout=cache_timeout) regexp = '''
    (.*?).*?''' for row in re.compile(regexp, re.DOTALL).findall(data): torrentType = row[0] @@ -67,7 +67,7 @@ def getId(piratebayId): def exists(piratebayId): piratebayId = getId(piratebayId) - return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId) + return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId) def getData(piratebayId): _key_map = { @@ -83,7 +83,7 @@ def getData(piratebayId): torrent[u'domain'] = 'thepiratebay.org' torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId - data = _getUrlUnicode(torrent['comment_link']) + data = _readUrlUnicode(torrent['comment_link']) torrent[u'title'] = findRe(data, '(.*?) \(download torrent\) - TPB') if not torrent[u'title']: return None @@ -99,7 +99,7 @@ def getData(piratebayId): torrent[u'description'] = findRe(data, '
    (.*?)
    ') if torrent[u'description']: torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip() - t = _getUrl(torrent[u'torrent_link']) + t = _readUrl(torrent[u'torrent_link']) torrent[u'torrent_info'] = getTorrentInfo(t) return torrent diff --git a/oxweb/torrent.py b/web/torrent.py similarity index 98% rename from oxweb/torrent.py rename to web/torrent.py index 9188716..68cd274 100644 --- a/oxweb/torrent.py +++ b/web/torrent.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from oxlib import intValue +from ox import intValue class Torrent(dict): diff --git a/oxweb/tv.py b/web/tv.py similarity index 91% rename from oxweb/tv.py rename to web/tv.py index d51069c..3808bbd 100644 --- a/oxweb/tv.py +++ b/web/tv.py @@ -3,8 +3,8 @@ import re import time -from oxlib import stripTags, findRe -from oxlib.cache import getUrlUnicode +from ox import stripTags, findRe +from ox.cache import readUrlUnicode def getEpisodeData(url): @@ -14,7 +14,7 @@ def getEpisodeData(url): example: getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html') ''' - data = getUrlUnicode(url) + data = readUrlUnicode(url) r = {} r['description'] = stripTags(findRe(data, 'div id="main-col">.*?
    (.*?)(.*?)') diff --git a/oxweb/wikipedia.py b/web/wikipedia.py similarity index 92% rename from oxweb/wikipedia.py rename to web/wikipedia.py index 2247440..3d99688 100644 --- a/oxweb/wikipedia.py +++ b/web/wikipedia.py @@ -3,8 +3,8 @@ from urllib import urlencode import simplejson -from oxlib.cache import getUrlUnicode -from oxlib import findRe, decodeHtml +from ox.cache import readUrl, readUrlUnicode +from ox import findRe, decodeHtml def getId(url): @@ -44,7 +44,7 @@ def getUrlByAllmovieId(allmovieId): def getWikiData(wikipediaUrl): url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=') url = "%s&action=raw" % url - data = getUrlUnicode(url) + data = readUrlUnicode(url) return data def getMovieData(wikipediaUrl): @@ -83,7 +83,7 @@ def getMovieData(wikipediaUrl): return filmbox def getImageUrl(name): - data = getUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name) + data = readUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name) url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"') return url @@ -103,13 +103,12 @@ def getAllmovieId(wikipediaUrl): return data.get('amg_id', '') def find(query, max_results=10): - from oxlib.cache import getUrl query = {'action': 'query', 'list':'search', 'format': 'json', 'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')} url = "http://en.wikipedia.org/w/api.php?" + urlencode(query) - data = getUrl(url) + data = readUrl(url) if not data: - data = getUrl(url, timeout=0) + data = readUrl(url, timeout=0) result = simplejson.loads(data) results = [] if result and 'query' in result: diff --git a/oxweb/youtube.py b/web/youtube.py similarity index 93% rename from oxweb/youtube.py rename to web/youtube.py index d4b36db..cb9ca53 100644 --- a/oxweb/youtube.py +++ b/web/youtube.py @@ -6,12 +6,12 @@ import xml.etree.ElementTree as ET import re import feedparser -from oxlib.cache import getUrl, getUrlUnicode -from oxlib import findString, findRe +from ox.cache import readUrl, readUrlUnicode +from ox import findString, findRe def getVideoKey(youtubeId): - data = getUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId) + data = readUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId) match = re.compile("token=(.+)&thumbnail").findall(data) if match: return unquote(match[0]) @@ -31,7 +31,7 @@ def getVideoUrl(youtubeId, format='mp4'): def getMovieInfo(youtubeId, video_url_base=None): url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId - data = getUrl(url) + data = readUrl(url) fd = feedparser.parse(data) return getInfoFromAtom(fd.entries[0], video_url_base) @@ -59,7 +59,7 @@ def getInfoFromAtom(entry, video_url_base=None): def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None): query = quote(query) url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results) - data = getUrlUnicode(url) + data = readUrlUnicode(url) fd = feedparser.parse(data) videos = [] for entry in fd.entries: @@ -72,7 +72,7 @@ def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=No ''' def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None): url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query) - data = getUrlUnicode(url) + data = readUrlUnicode(url) regx = re.compile(' video') id_title = regx.findall(data)