depend on ox, install as ox.web, migrate getUrl to readUrl
This commit is contained in:
parent
d2849d44ef
commit
16eeaf8b25
28 changed files with 169 additions and 172 deletions
4
README
4
README
|
@ -2,7 +2,7 @@ python-oxweb the internet is a dict
|
|||
|
||||
Depends:
|
||||
python2.5
|
||||
python-oxlib (bzr branch http://code.0xdb.org/python-oxlib)
|
||||
python-ox (bzr branch http://code.0xdb.org/python-ox)
|
||||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||
python-feedparser (http://www.feedparser.org/)
|
||||
(there seam to be some issues if not using the one from ubuntu/debian)
|
||||
|
@ -17,4 +17,4 @@ Install:
|
|||
}
|
||||
|
||||
Test:
|
||||
nosetests --with-doctest oxweb
|
||||
nosetests --with-doctest web
|
||||
|
|
|
@ -1 +1 @@
|
|||
oxlib
|
||||
ox
|
||||
|
|
4
setup.py
4
setup.py
|
@ -19,8 +19,8 @@ setup(
|
|||
url="http://code.0xdb.org/oxweb",
|
||||
download_url="http://code.0xdb.org/oxweb/download",
|
||||
license="GPLv3",
|
||||
packages=['oxweb'],
|
||||
zip_safe=False,
|
||||
package_dir = {'ox.web': 'web'},
|
||||
packages=['ox.web'],
|
||||
keywords = [
|
||||
],
|
||||
classifiers = [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# encoding: utf-8
|
||||
__version__ = '0.1.0'
|
||||
__version__ = '1.0.0'
|
||||
|
||||
import imdb
|
||||
import wikipedia
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
|
||||
|
||||
def getId(url):
|
||||
|
@ -24,7 +24,7 @@ def getData(id):
|
|||
data = {
|
||||
"url": getUrl(id)
|
||||
}
|
||||
html = getUrlUnicode(data["url"])
|
||||
html = readUrlUnicode(data["url"])
|
||||
data['aka'] = parseList(html, 'AKA')
|
||||
data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)</a>')
|
||||
data['countries'] = parseList(html, 'Countries')
|
||||
|
@ -42,11 +42,11 @@ def getData(id):
|
|||
data['themes'] = parseList(html, 'Themes')
|
||||
data['types'] = parseList(html, 'Types')
|
||||
data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"')
|
||||
html = getUrlUnicode("http://allmovie.com/work/%s/cast" % id)
|
||||
html = readUrlUnicode("http://allmovie.com/work/%s/cast" % id)
|
||||
data['cast'] = parseTable(html)
|
||||
html = getUrlUnicode("http://allmovie.com/work/%s/credits" % id)
|
||||
html = readUrlUnicode("http://allmovie.com/work/%s/credits" % id)
|
||||
data['credits'] = parseTable(html)
|
||||
html = getUrlUnicode("http://allmovie.com/work/%s/review" % id)
|
||||
html = readUrlUnicode("http://allmovie.com/work/%s/review" % id)
|
||||
data['review'] = parseText(html, 'Review')
|
||||
return data
|
||||
|
|
@ -4,8 +4,6 @@
|
|||
import os
|
||||
import simplejson
|
||||
|
||||
import oxlib
|
||||
|
||||
|
||||
def get(key):
|
||||
user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
|
|
@ -2,10 +2,10 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
|
||||
import oxlib.cache
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe, removeSpecialCharacters
|
||||
import ox.cache
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe, removeSpecialCharacters
|
||||
|
||||
import imdb
|
||||
|
||||
|
@ -30,9 +30,9 @@ def getData(id):
|
|||
"url": getUrl(id)
|
||||
}
|
||||
try:
|
||||
html = getUrlUnicode(data["url"])
|
||||
html = readUrlUnicode(data["url"])
|
||||
except:
|
||||
html = oxlib.cache.getUrl(data["url"])
|
||||
html = ox.cache.getUrl(data["url"])
|
||||
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
|
||||
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
|
||||
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
|
||||
|
@ -48,7 +48,7 @@ def getData(id):
|
|||
if not "/boxsets/" in result:
|
||||
data["posters"] = [result]
|
||||
else:
|
||||
html_ = getUrlUnicode(result)
|
||||
html_ = readUrlUnicode(result)
|
||||
result = findRe(html_, "<a href=\"http://www.criterion.com/films/%s\">(.*?)</a>" % id)
|
||||
result = findRe(result, "src=\"(.*?)\"")
|
||||
data["posters"] = [result.replace("_w100", "")]
|
||||
|
@ -64,7 +64,7 @@ def getData(id):
|
|||
|
||||
def getIds():
|
||||
ids = []
|
||||
html = getUrlUnicode("http://www.criterion.com/library/dvd")
|
||||
html = readUrlUnicode("http://www.criterion.com/library/dvd")
|
||||
results = re.compile("page=(.*?)\"").findall(html)
|
||||
pages = int(results[len(results) - 2])
|
||||
for page in range(pages, 0, -1):
|
||||
|
@ -74,13 +74,13 @@ def getIds():
|
|||
|
||||
def getIdsByPage(page):
|
||||
ids = []
|
||||
html = getUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
|
||||
html = readUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
|
||||
results = re.compile("films/(.*?)\"").findall(html)
|
||||
for result in results:
|
||||
ids.append(result)
|
||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||
for result in results:
|
||||
html = getUrlUnicode("http://www.criterion.com/boxsets/" + result)
|
||||
html = readUrlUnicode("http://www.criterion.com/boxsets/" + result)
|
||||
results = re.compile("films/(.*?)\"").findall(html)
|
||||
for result in results:
|
||||
ids.append(result)
|
|
@ -2,7 +2,7 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
from urllib import unquote
|
||||
from oxlib.cache import getUrl
|
||||
from ox.cache import readUrl
|
||||
|
||||
|
||||
def getVideoUrl(url):
|
||||
|
@ -13,7 +13,7 @@ def getVideoUrl(url):
|
|||
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0]
|
||||
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv'
|
||||
'''
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||
for v in video:
|
||||
v = unquote(v).split('@@')[0]
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
|
||||
import google
|
||||
|
||||
|
@ -21,7 +21,7 @@ def getShowUrl(title):
|
|||
return None
|
||||
|
||||
def getShowData(url):
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
r = {}
|
||||
r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
|
||||
r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
|
|
@ -10,8 +10,8 @@ import Queue
|
|||
import simplejson
|
||||
|
||||
|
||||
import oxlib
|
||||
from oxlib import stripTags
|
||||
import ox
|
||||
from ox import stripTags
|
||||
|
||||
|
||||
'''
|
||||
|
@ -30,15 +30,15 @@ FIXME: how search depper than first page?
|
|||
DEFAULT_MAX_RESULTS = 10
|
||||
DEFAULT_TIMEOUT = 24*60*60
|
||||
|
||||
def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
||||
return oxlib.cache.getUrl(url, data, headers, timeout)
|
||||
def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
||||
return ox.cache.readUrl(url, data, headers, timeout)
|
||||
|
||||
def quote_plus(s):
|
||||
return urllib.quote_plus(s.encode('utf-8'))
|
||||
|
||||
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
||||
url = "http://www.google.com/search?q=%s" % quote_plus(query)
|
||||
data = getUrl(url, timeout=timeout)
|
||||
data = readUrl(url, timeout=timeout)
|
||||
link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' + \
|
||||
r'.*?(?:<br>|<table.*?>)' + \
|
||||
r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
|
||||
|
@ -52,6 +52,6 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
|||
|
||||
def _find(query):
|
||||
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
|
||||
results = simplejson.loads(getUrlUnicode(url))['responseData']['results']
|
||||
results = simplejson.loads(ox.cache.readUrlUnicode(url))['responseData']['results']
|
||||
return results
|
||||
|
|
@ -8,19 +8,19 @@ import time
|
|||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
import chardet
|
||||
import oxlib
|
||||
from oxlib import stripTags, decodeHtml, findRe, findString
|
||||
import oxlib.cache
|
||||
from oxlib.normalize import normalizeTitle, normalizeImdbId
|
||||
from oxlib import *
|
||||
import ox
|
||||
from ox import stripTags, decodeHtml, findRe, findString
|
||||
import ox.cache
|
||||
from ox.normalize import normalizeTitle, normalizeImdbId
|
||||
from ox import *
|
||||
|
||||
import google
|
||||
|
||||
'''
|
||||
never timeout imdb data, to update cache remove data from cache folder
|
||||
'''
|
||||
def getUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
|
||||
return oxlib.cache.getUrlUnicode(url, data, headers, timeout)
|
||||
def readUrlUnicode(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=-1):
|
||||
return ox.cache.readUrlUnicode(url, data, headers, timeout)
|
||||
|
||||
'''
|
||||
check if result is valid while updating
|
||||
|
@ -28,8 +28,8 @@ def validate(result, header):
|
|||
return header['status'] == u'200'
|
||||
|
||||
try:
|
||||
d = oxlib.cache.getUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
||||
except oxlib.cache.InvalidResult, e:
|
||||
d = ox.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
||||
except ox.cache.InvalidResult, e:
|
||||
print e.headers
|
||||
|
||||
'''
|
||||
|
@ -76,7 +76,7 @@ def getRawMovieData(imdbId):
|
|||
return data
|
||||
|
||||
def getMovieInfo(imdbId):
|
||||
data = getUrlUnicode(getUrlBase(imdbId))
|
||||
data = readUrlUnicode(getUrlBase(imdbId))
|
||||
info = dict()
|
||||
info['poster'] = findRe(data, 'name="poster".*?<img .*?src="(.*?)"')
|
||||
if info['poster'] and '_V' in info['poster']:
|
||||
|
@ -246,7 +246,7 @@ def getMovieAKATitles(imdbId):
|
|||
(u'Women of the Night', u'(undefined)')]
|
||||
'''
|
||||
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
titles = findRe(data, 'name="akas".*?<table.*?>(.*?)</table>')
|
||||
titles = re.compile("td>(.*?)</td>\n\n<td>(.*)</td>").findall(titles)
|
||||
return titles
|
||||
|
@ -268,7 +268,7 @@ def creditList(data, section=None):
|
|||
def getMovieCredits(imdbId):
|
||||
credits = dict()
|
||||
url = "%sfullcredits" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
groups = data.split('<h5>')
|
||||
for g in groups:
|
||||
section = re.compile('''name="(.*?)".*? href="/Glossary''').findall(g)
|
||||
|
@ -278,7 +278,7 @@ def getMovieCredits(imdbId):
|
|||
|
||||
def getMovieTrailers(imdbId):
|
||||
url = "%strailers" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
soup = BeautifulSoup(data)
|
||||
videos = soup('div', {'class':"video-gallery"})
|
||||
trailers = []
|
||||
|
@ -288,27 +288,27 @@ def getMovieTrailers(imdbId):
|
|||
url = 'http://www.imdb.com' + a['href']
|
||||
videoId = findRe(url, '/(vi\d*?)/')
|
||||
iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId
|
||||
iframe = getUrlUnicode(iframeUrl)
|
||||
iframe = readUrlUnicode(iframeUrl)
|
||||
videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"'))
|
||||
trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl})
|
||||
return trailers
|
||||
|
||||
def getMovieQuotes(imdbId):
|
||||
url = "%squotes" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
|
||||
quotes = [(q[0].strip(),q[1].strip()) for q in quotes]
|
||||
return quotes
|
||||
|
||||
def getMoviePlot(imdbId):
|
||||
url = "%splotsummary" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
plot = findRe(data, '<p class="plotpar">(.*?)<i>').split('</p>')[0]
|
||||
return plot.strip()
|
||||
|
||||
def getMovieTechnical(imdbId):
|
||||
url = "%stechnical" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
results = {}
|
||||
for t in re.compile('<h5>(.*?)</h5>(.*?)<br/>', re.DOTALL).findall(data):
|
||||
results[t[0].strip()] = t[1].strip()
|
||||
|
@ -316,7 +316,7 @@ def getMovieTechnical(imdbId):
|
|||
|
||||
def getMovieCompanyCredits(imdbId):
|
||||
url = "%scompanycredits" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
results = {}
|
||||
for field, c in re.compile('<h2>(.*?)</h2><ul>(.*?)</ul>').findall(data):
|
||||
results[field.strip()] = []
|
||||
|
@ -326,7 +326,7 @@ def getMovieCompanyCredits(imdbId):
|
|||
|
||||
def getMovieLocations(imdbId):
|
||||
url = "%slocations" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
|
||||
return locations
|
||||
|
||||
|
@ -334,7 +334,7 @@ def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
|
|||
photos = {}
|
||||
for key in keys:
|
||||
url = "%smediaindex?refine=%s" % (getUrlBase(imdbId), key)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
photos[key] = {}
|
||||
for s in re.compile('''<img alt="(.*?)".*?src="(http://ia.media-imdb.com/.*?.jpg)''').findall(data):
|
||||
img= "%s.jpg" % s[1].split('._V')[0]
|
||||
|
@ -358,7 +358,7 @@ def getMoviePosters(imdbId):
|
|||
|
||||
def getMovieTrivia(imdbId):
|
||||
url = "%strivia" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
trivia = re.compile('<li>(.*?)</li>').findall(data)
|
||||
def clean(t):
|
||||
t = decodeHtml(t)
|
||||
|
@ -371,7 +371,7 @@ def getMovieTrivia(imdbId):
|
|||
|
||||
def getMovieConnections(imdbId):
|
||||
url = "%smovieconnections" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
connections={}
|
||||
for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data):
|
||||
connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1])
|
||||
|
@ -379,7 +379,7 @@ def getMovieConnections(imdbId):
|
|||
|
||||
def getMovieKeywords(imdbId):
|
||||
url = "%skeywords" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
keywords = []
|
||||
for keyword in re.compile('''<a.*?href="/keyword.*?>(.*?)</a>''').findall(data):
|
||||
keyword = decodeHtml(keyword)
|
||||
|
@ -389,7 +389,7 @@ def getMovieKeywords(imdbId):
|
|||
|
||||
def getMovieExternalReviews(imdbId):
|
||||
url = "%sexternalreviews" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
|
||||
reviews = {}
|
||||
for r in _reviews:
|
||||
|
@ -430,7 +430,7 @@ def _parseDate(d):
|
|||
|
||||
def getMovieReleaseDates(imdbId):
|
||||
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
releasedates = []
|
||||
regexp = '''<tr><td>(.*?)</td>.*?<td align="right">(.*?)</td>.*?<td>(.*?)</td></tr>'''
|
||||
|
||||
|
@ -468,7 +468,7 @@ def getMovieFlimingDates(imdbId):
|
|||
|
||||
def getMovieBusiness(imdbId):
|
||||
url = "%sbusiness" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
business = {}
|
||||
for r in re.compile('''<h5>(.*?)</h5>(.*?)<br/>.<br/>''', re.DOTALL).findall(data):
|
||||
key = stripTags(r[0]).strip().lower()
|
||||
|
@ -478,7 +478,7 @@ def getMovieBusiness(imdbId):
|
|||
|
||||
def getMovieEpisodes(imdbId):
|
||||
url = "%sepisodes" % getUrlBase(imdbId)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
episodes = {}
|
||||
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
|
||||
for r in re.compile(regexp, re.DOTALL).findall(data):
|
||||
|
@ -514,7 +514,7 @@ class IMDb:
|
|||
self.pageUrl = getUrlBase(imdbId)
|
||||
|
||||
def getPage(self):
|
||||
return getUrlUnicode(self.pageUrl)
|
||||
return readUrlUnicode(self.pageUrl)
|
||||
|
||||
def parse_raw_value(self, key, value):
|
||||
if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'):
|
||||
|
@ -682,10 +682,10 @@ def guess(title, director=''):
|
|||
search = 'site:imdb.com "%s"' % title
|
||||
for (name, url, desc) in google.find(search, 2):
|
||||
if url.startswith('http://www.imdb.com/title/tt'):
|
||||
return normalizeImdbId(int(oxlib.intValue(url)))
|
||||
return normalizeImdbId(int(ox.intValue(url)))
|
||||
|
||||
try:
|
||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||
u = urllib2.urlopen(req)
|
||||
data = u.read()
|
||||
return_url = u.url
|
||||
|
@ -700,7 +700,7 @@ def guess(title, director=''):
|
|||
return imdb_id
|
||||
|
||||
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||
u = urllib2.urlopen(req)
|
||||
data = u.read()
|
||||
return_url = u.url
|
||||
|
@ -737,7 +737,7 @@ def getEpisodeData(title, episode, show_url = None):
|
|||
def getPersonData(imdbId):
|
||||
imdbId = normalizeImdbId(imdbId)
|
||||
url = u'http://www.imdb.com/name/nm%s/' % imdbId
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
info = dict()
|
||||
info['name'] = findRe(data, u'<title>(.*?)</title>')
|
||||
filmo = data.split(u'<h3>Additional Details</h3>')[0]
|
|
@ -2,9 +2,9 @@
|
|||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe
|
||||
|
||||
import imdb
|
||||
|
||||
|
@ -22,7 +22,7 @@ def getData(id):
|
|||
data = {
|
||||
'url': getUrl(id)
|
||||
}
|
||||
html = getUrlUnicode(data['url'])
|
||||
html = readUrlUnicode(data['url'])
|
||||
data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?) ')
|
||||
data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
|
||||
data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
|
||||
|
@ -31,11 +31,11 @@ def getData(id):
|
|||
for result in results:
|
||||
result = result.replace('_xlg.html', '.html')
|
||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||
html = getUrlUnicode(url)
|
||||
html = readUrlUnicode(url)
|
||||
result = findRe(html, '<a href = (\w*?_xlg.html)')
|
||||
if result:
|
||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||
html = getUrlUnicode(url)
|
||||
html = readUrlUnicode(url)
|
||||
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
|
||||
else:
|
||||
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)" alt='))
|
||||
|
@ -55,7 +55,7 @@ def getId(url):
|
|||
|
||||
def getIds():
|
||||
ids = []
|
||||
html = getUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
|
||||
html = readUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
|
||||
pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
|
||||
for page in range(pages, 0, -1):
|
||||
for id in getIdsByPage(page):
|
||||
|
@ -65,7 +65,7 @@ def getIds():
|
|||
|
||||
def getIdsByPage(page):
|
||||
ids = []
|
||||
html = getUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
|
||||
html = readUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
|
||||
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
|
||||
for result in results:
|
||||
url = 'http://impawards.com/%s' % result
|
||||
|
@ -74,7 +74,7 @@ def getIdsByPage(page):
|
|||
|
||||
def getUrl(id):
|
||||
url = "http://www.impawards.com/%s.html" % id
|
||||
html = getUrlUnicode(url)
|
||||
html = readUrlUnicode(url)
|
||||
if findRe(html, "No Movie Posters on This Page"):
|
||||
url = "http://www.impawards.com/%s_ver1.html" % id
|
||||
return url
|
|
@ -3,10 +3,10 @@
|
|||
import re
|
||||
import urllib
|
||||
|
||||
from oxlib.cache import getUrl
|
||||
from oxlib.html import decodeHtml, stripTags
|
||||
from oxlib.text import findRe
|
||||
from oxlib.text import findString
|
||||
from ox.cache import readUrl
|
||||
from ox.html import decodeHtml, stripTags
|
||||
from ox.text import findRe
|
||||
from ox.text import findString
|
||||
|
||||
|
||||
# to sniff itunes traffic, use something like
|
||||
|
@ -113,14 +113,14 @@ class ItunesAlbum:
|
|||
|
||||
def getId(self):
|
||||
url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
|
||||
xml = getUrl(url, headers = ITUNES_HEADERS)
|
||||
xml = readUrl(url, headers = ITUNES_HEADERS)
|
||||
id = findRe(xml, 'viewAlbum\?id=(.*?)&')
|
||||
return id
|
||||
|
||||
def getData(self):
|
||||
data = {'id': self.id}
|
||||
url = composeUrl('viewAlbum', {'id': self.id})
|
||||
xml = getUrl(url, None, ITUNES_HEADERS)
|
||||
xml = readUrl(url, None, ITUNES_HEADERS)
|
||||
data['albumName'] = findRe(xml, '<B>(.*?)</B>')
|
||||
data['artistName'] = findRe(xml, '<b>(.*?)</b>')
|
||||
data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
|
||||
|
@ -144,14 +144,14 @@ class ItunesMovie:
|
|||
|
||||
def getId(self):
|
||||
url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
|
||||
xml = getUrl(url, headers = ITUNES_HEADERS)
|
||||
xml = readUrl(url, headers = ITUNES_HEADERS)
|
||||
id = findRe(xml, 'viewMovie\?id=(.*?)&')
|
||||
return id
|
||||
|
||||
def getData(self):
|
||||
data = {'id': self.id}
|
||||
url = composeUrl('viewMovie', {'id': self.id})
|
||||
xml = getUrl(url, None, ITUNES_HEADERS)
|
||||
xml = readUrl(url, None, ITUNES_HEADERS)
|
||||
f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
|
||||
f.write(xml)
|
||||
f.close()
|
|
@ -1,24 +1,24 @@
|
|||
import re
|
||||
from oxlib import cache
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe
|
||||
from ox import cache
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe
|
||||
|
||||
import auth
|
||||
|
||||
|
||||
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||
def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||
headers = headers.copy()
|
||||
headers["Cookie"] = auth.get("karagarga.cookie")
|
||||
return cache.getUrl(url, data, headers, timeout)
|
||||
return cache.readUrl(url, data, headers, timeout)
|
||||
|
||||
def getUrlUnicode(url, timeout=cache.cache_timeout):
|
||||
return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
|
||||
def readUrlUnicode(url, timeout=cache.cache_timeout):
|
||||
return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout)
|
||||
|
||||
def getData(id):
|
||||
data = {
|
||||
"url": getUrl(id)
|
||||
}
|
||||
html = getUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
|
||||
html = readUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
|
||||
if 'No torrent with ID' in html:
|
||||
return False
|
||||
data['added'] = stripTags(parseTable(html, 'Added'))
|
||||
|
@ -87,7 +87,7 @@ def getId(url):
|
|||
return url.split("=")[-1]
|
||||
|
||||
def getTorrent(id):
|
||||
return _getUrl(getData(id)['torrent'])
|
||||
return readUrl(getData(id)['torrent'])
|
||||
|
||||
def getIds(lastId = 20):
|
||||
lastId = '%s' % lastId
|
||||
|
@ -105,7 +105,7 @@ def getIds(lastId = 20):
|
|||
def getIdsByPage(page):
|
||||
ids = []
|
||||
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
|
||||
html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
|
||||
html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
|
||||
strings = html.split('<td width="42" style="padding:0px;">')
|
||||
strings.pop(0)
|
||||
for string in strings:
|
|
@ -1,15 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from oxlib.cache import getUrl
|
||||
from oxlib.html import decodeHtml
|
||||
from oxlib.text import findRe
|
||||
from ox.cache import readUrl
|
||||
from ox.html import decodeHtml
|
||||
from ox.text import findRe
|
||||
|
||||
|
||||
def getLyrics(title, artist):
|
||||
html = getUrl('http://lyricsfly.com/api/')
|
||||
html = readUrl('http://lyricsfly.com/api/')
|
||||
key = findRe(html, '<font color=green><b>(.*?)</b></font>')
|
||||
url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
|
||||
xml = getUrl(url)
|
||||
xml = readUrl(url)
|
||||
lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
|
||||
lyrics = lyrics.replace('\n', '').replace('\r', '')
|
||||
lyrics = lyrics.replace('[br]', '\n').strip()
|
|
@ -3,14 +3,14 @@
|
|||
import re
|
||||
from urllib import quote
|
||||
|
||||
from oxlib.cache import getUrl, getUrlUnicode
|
||||
from oxlib import findRe, decodeHtml, stripTags
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, decodeHtml, stripTags
|
||||
|
||||
|
||||
def getMetacriticShowUrl(title):
|
||||
title = quote(title)
|
||||
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
|
||||
|
||||
def getData(title, url=None):
|
||||
|
@ -18,7 +18,7 @@ def getData(title, url=None):
|
|||
url = getMetacriticShowUrl(title)
|
||||
if not url:
|
||||
return None
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
score = findRe(data, 'ALT="Metascore: (.*?)"')
|
||||
if score:
|
||||
score = int(score)
|
|
@ -5,10 +5,10 @@ import re
|
|||
import socket
|
||||
from urllib import quote
|
||||
|
||||
from oxlib.cache import getUrl, getUrlUnicode
|
||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||
from oxlib.normalize import normalizeImdbId
|
||||
import oxlib
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||
from ox.normalize import normalizeImdbId
|
||||
import ox
|
||||
|
||||
from torrent import Torrent
|
||||
|
||||
|
@ -31,7 +31,7 @@ def findMovie(query, max_results=10):
|
|||
'''search for torrents on mininova
|
||||
'''
|
||||
url = "http://www.mininova.org/search/%s/seeds" % quote(query)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
return _parseResultsPage(data, max_results)
|
||||
|
||||
def findMovieByImdb(imdbId):
|
||||
|
@ -39,7 +39,7 @@ def findMovieByImdb(imdbId):
|
|||
'''
|
||||
results = []
|
||||
imdbId = normalizeImdbId(imdbId)
|
||||
data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
|
||||
data = readUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
|
||||
return _parseResultsPage(data)
|
||||
|
||||
def getId(mininovaId):
|
||||
|
@ -55,7 +55,7 @@ def getId(mininovaId):
|
|||
|
||||
def exists(mininovaId):
|
||||
mininovaId = getId(mininovaId)
|
||||
data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||
data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||
if not data or 'Torrent not found...' in data:
|
||||
return False
|
||||
if 'tracker</a> of this torrent requires registration.' in data:
|
||||
|
@ -74,7 +74,7 @@ def getData(mininovaId):
|
|||
torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
|
||||
torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
|
||||
|
||||
data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
|
||||
data = readUrlUnicode(torrent['comment_link']) + readUrlUnicode(torrent['details_link'])
|
||||
if '<h1>Torrent not found...</h1>' in data:
|
||||
return None
|
||||
|
||||
|
@ -89,7 +89,7 @@ def getData(mininovaId):
|
|||
torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
|
||||
if torrent['description']:
|
||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||
t = getUrl(torrent[u'torrent_link'])
|
||||
t = readUrl(torrent[u'torrent_link'])
|
||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||
return torrent
|
||||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
import re
|
||||
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from oxlib import findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox import findRe
|
||||
|
||||
def getData(id):
|
||||
'''
|
||||
|
@ -24,7 +24,7 @@ def getId(url):
|
|||
|
||||
def getPostersByUrl(url, group=True):
|
||||
posters = []
|
||||
html = getUrlUnicode(url)
|
||||
html = readUrlUnicode(url)
|
||||
if url in html:
|
||||
if group:
|
||||
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
|
||||
|
@ -32,7 +32,7 @@ def getPostersByUrl(url, group=True):
|
|||
posters += getPostersByUrl(result, False)
|
||||
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
|
||||
for result in results:
|
||||
html = getUrlUnicode(result)
|
||||
html = readUrlUnicode(result)
|
||||
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
|
||||
return posters
|
||||
|
|
@ -3,9 +3,9 @@
|
|||
import re
|
||||
|
||||
import feedparser
|
||||
from oxlib.cache import getUrl, getUrlUnicode
|
||||
import oxlib
|
||||
from oxlib import langCode2To3, langTo3Code
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
import ox
|
||||
from ox import langCode2To3, langTo3Code
|
||||
|
||||
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||
if len(language) == 2:
|
||||
|
@ -16,7 +16,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
|||
if language:
|
||||
url += "sublanguageid-%s/" % language
|
||||
url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
if "title>opensubtitles.com - search results</title" in data:
|
||||
fd = feedparser.parse(data)
|
||||
opensubtitleId = None
|
||||
|
@ -26,16 +26,16 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
|||
if opensubtitleId:
|
||||
opensubtitleId = opensubtitleId[0]
|
||||
else:
|
||||
opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
|
||||
opensubtitleId = ox.findRe(data, '/en/subtitles/(.*?)/')
|
||||
return opensubtitleId
|
||||
|
||||
def downloadSubtitleById(opensubtitle_id):
|
||||
srts = {}
|
||||
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
||||
data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
||||
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
||||
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
||||
name = oxlib.stripTags(f[1]).split('\n')[0]
|
||||
name = ox.stripTags(f[1]).split('\n')[0]
|
||||
url = "http://www.opensubtitles.com%s" % f[0]
|
||||
srts[name] = getUrlUnicode(url)
|
||||
srts[name] = readUrlUnicode(url)
|
||||
return srts
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import oxlib.cache
|
||||
import ox.cache
|
||||
|
||||
def getPosterUrl(id):
|
||||
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import oxlib.cache
|
||||
from oxlib.cache import exists
|
||||
import ox.cache
|
||||
from ox.cache import exists
|
||||
|
||||
|
||||
def getPosterUrl(id):
|
|
@ -2,11 +2,11 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
|
||||
from oxlib.cache import getHeaders, getUrl, getUrlUnicode
|
||||
from oxlib import findRe, stripTags
|
||||
from ox.cache import getHeaders, readUrl, readUrlUnicode
|
||||
from ox import findRe, stripTags
|
||||
|
||||
|
||||
def getUrlByImdb(imdb):
|
||||
def readUrlByImdb(imdb):
|
||||
#this would also wor but does not cache:
|
||||
'''
|
||||
from urllib2 import urlopen
|
||||
|
@ -14,7 +14,7 @@ def getUrlByImdb(imdb):
|
|||
return u.url
|
||||
'''
|
||||
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
if "movie_title" in data:
|
||||
movies = re.compile('(/m/.*?/)').findall(data)
|
||||
if movies:
|
||||
|
@ -22,7 +22,7 @@ def getUrlByImdb(imdb):
|
|||
return None
|
||||
|
||||
def getData(url):
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
r = {}
|
||||
r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
|
||||
if '(' in r['title']:
|
|
@ -6,9 +6,9 @@ import time
|
|||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
import oxlib.cache
|
||||
from oxlib.html import decodeHtml, stripTags
|
||||
import oxlib.net
|
||||
import ox.cache
|
||||
from ox.html import decodeHtml, stripTags
|
||||
import ox.net
|
||||
|
||||
|
||||
def getNews(year, month, day):
|
||||
|
@ -23,9 +23,9 @@ def getNews(year, month, day):
|
|||
for section in sections:
|
||||
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
||||
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
||||
html = oxlib.net.getUrl(url)
|
||||
html = ox.net.readUrl(url)
|
||||
else:
|
||||
html = oxlib.cache.getUrl(url)
|
||||
html = ox.cache.readUrl(url)
|
||||
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
||||
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
||||
try:
|
||||
|
@ -102,11 +102,11 @@ def formatSubsection(string):
|
|||
|
||||
def getIssue(year, week):
|
||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||
if not oxlib.net.exists(coverUrl):
|
||||
if not ox.net.exists(coverUrl):
|
||||
return None
|
||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||
contents = []
|
||||
soup = BeautifulSoup(oxlib.cache.getUrl(url))
|
||||
soup = BeautifulSoup(ox.cache.readUrl(url))
|
||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||
item = str(item)
|
||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||
|
@ -116,7 +116,7 @@ def getIssue(year, week):
|
|||
pages = page + 2
|
||||
for page in range(1, pages + 10):
|
||||
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||
if oxlib.cache.exists(url):
|
||||
if ox.cache.exists(url):
|
||||
pageUrl[page] = url
|
||||
else:
|
||||
pageUrl[page] = ''
|
||||
|
@ -164,7 +164,7 @@ def archiveIssues():
|
|||
f.close()
|
||||
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||
if not os.path.exists(filename):
|
||||
data = oxlib.cache.getUrl(issue['coverUrl'])
|
||||
data = ox.cache.readUrl(issue['coverUrl'])
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
@ -173,7 +173,7 @@ def archiveIssues():
|
|||
if url:
|
||||
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||
if not os.path.exists(filename):
|
||||
data = oxlib.cache.getUrl(url)
|
||||
data = ox.cache.readUrl(url)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
@ -244,7 +244,7 @@ def archiveNews():
|
|||
f.close()
|
||||
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
||||
if not os.path.exists(filename):
|
||||
data = oxlib.cache.getUrl(new['imageUrl'])
|
||||
data = ox.cache.readUrl(new['imageUrl'])
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
|
@ -6,10 +6,10 @@ import socket
|
|||
from urllib import quote, urlencode
|
||||
from urllib2 import URLError
|
||||
|
||||
from oxlib.cache import getUrl, getUrlUnicode
|
||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||
from oxlib.normalize import normalizeImdbId
|
||||
import oxlib
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||
from ox.normalize import normalizeImdbId
|
||||
import ox
|
||||
|
||||
from torrent import Torrent
|
||||
|
||||
|
@ -18,13 +18,13 @@ cache_timeout = 24*60*60 # cache search only for 24 hours
|
|||
season_episode = re.compile("S..E..", re.IGNORECASE)
|
||||
|
||||
|
||||
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||
def _readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||
headers = headers.copy()
|
||||
headers['Cookie'] = 'language=en_EN'
|
||||
return cache.getUrl(url, data, headers, timeout)
|
||||
return cache.readUrl(url, data, headers, timeout)
|
||||
|
||||
def _getUrlUnicode(url, timeout=cache.cache_timeout):
|
||||
return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
|
||||
def _readUrlUnicode(url, timeout=cache.cache_timeout):
|
||||
return cache.readUrlUnicode(url, _readUrl=_readUrl, timeout=timeout)
|
||||
|
||||
def findMovies(query, max_results=10):
|
||||
results = []
|
||||
|
@ -37,7 +37,7 @@ def findMovies(query, max_results=10):
|
|||
if not url.startswith('/'):
|
||||
url = "/" + url
|
||||
url = "http://thepiratebay.org" + url
|
||||
data = _getUrlUnicode(url, timeout=cache_timeout)
|
||||
data = _readUrlUnicode(url, timeout=cache_timeout)
|
||||
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
||||
for row in re.compile(regexp, re.DOTALL).findall(data):
|
||||
torrentType = row[0]
|
||||
|
@ -67,7 +67,7 @@ def getId(piratebayId):
|
|||
|
||||
def exists(piratebayId):
|
||||
piratebayId = getId(piratebayId)
|
||||
return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
||||
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
||||
|
||||
def getData(piratebayId):
|
||||
_key_map = {
|
||||
|
@ -83,7 +83,7 @@ def getData(piratebayId):
|
|||
torrent[u'domain'] = 'thepiratebay.org'
|
||||
torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
|
||||
|
||||
data = _getUrlUnicode(torrent['comment_link'])
|
||||
data = _readUrlUnicode(torrent['comment_link'])
|
||||
torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
||||
if not torrent[u'title']:
|
||||
return None
|
||||
|
@ -99,7 +99,7 @@ def getData(piratebayId):
|
|||
torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
|
||||
if torrent[u'description']:
|
||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||
t = _getUrl(torrent[u'torrent_link'])
|
||||
t = _readUrl(torrent[u'torrent_link'])
|
||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||
return torrent
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from oxlib import intValue
|
||||
from ox import intValue
|
||||
|
||||
|
||||
class Torrent(dict):
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
|
||||
|
||||
def getEpisodeData(url):
|
||||
|
@ -14,7 +14,7 @@ def getEpisodeData(url):
|
|||
example:
|
||||
getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
|
||||
'''
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
r = {}
|
||||
r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
|
||||
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
|
@ -3,8 +3,8 @@
|
|||
from urllib import urlencode
|
||||
|
||||
import simplejson
|
||||
from oxlib.cache import getUrlUnicode
|
||||
from oxlib import findRe, decodeHtml
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, decodeHtml
|
||||
|
||||
|
||||
def getId(url):
|
||||
|
@ -44,7 +44,7 @@ def getUrlByAllmovieId(allmovieId):
|
|||
def getWikiData(wikipediaUrl):
|
||||
url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
|
||||
url = "%s&action=raw" % url
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
return data
|
||||
|
||||
def getMovieData(wikipediaUrl):
|
||||
|
@ -83,7 +83,7 @@ def getMovieData(wikipediaUrl):
|
|||
return filmbox
|
||||
|
||||
def getImageUrl(name):
|
||||
data = getUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name)
|
||||
data = readUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name)
|
||||
url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
|
||||
return url
|
||||
|
||||
|
@ -103,13 +103,12 @@ def getAllmovieId(wikipediaUrl):
|
|||
return data.get('amg_id', '')
|
||||
|
||||
def find(query, max_results=10):
|
||||
from oxlib.cache import getUrl
|
||||
query = {'action': 'query', 'list':'search', 'format': 'json',
|
||||
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
||||
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
if not data:
|
||||
data = getUrl(url, timeout=0)
|
||||
data = readUrl(url, timeout=0)
|
||||
result = simplejson.loads(data)
|
||||
results = []
|
||||
if result and 'query' in result:
|
|
@ -6,12 +6,12 @@ import xml.etree.ElementTree as ET
|
|||
import re
|
||||
|
||||
import feedparser
|
||||
from oxlib.cache import getUrl, getUrlUnicode
|
||||
from oxlib import findString, findRe
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findString, findRe
|
||||
|
||||
|
||||
def getVideoKey(youtubeId):
|
||||
data = getUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
|
||||
data = readUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
|
||||
match = re.compile("token=(.+)&thumbnail").findall(data)
|
||||
if match:
|
||||
return unquote(match[0])
|
||||
|
@ -31,7 +31,7 @@ def getVideoUrl(youtubeId, format='mp4'):
|
|||
|
||||
def getMovieInfo(youtubeId, video_url_base=None):
|
||||
url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId
|
||||
data = getUrl(url)
|
||||
data = readUrl(url)
|
||||
fd = feedparser.parse(data)
|
||||
return getInfoFromAtom(fd.entries[0], video_url_base)
|
||||
|
||||
|
@ -59,7 +59,7 @@ def getInfoFromAtom(entry, video_url_base=None):
|
|||
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
||||
query = quote(query)
|
||||
url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
fd = feedparser.parse(data)
|
||||
videos = []
|
||||
for entry in fd.entries:
|
||||
|
@ -72,7 +72,7 @@ def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=No
|
|||
'''
|
||||
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
||||
url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
|
||||
data = getUrlUnicode(url)
|
||||
data = readUrlUnicode(url)
|
||||
regx = re.compile(' <a href="/watch.v=(.*?)" title="(.*?)" ')
|
||||
regx = re.compile('<a href="/watch\?v=(\w*?)" ><img src="(.*?)" class="vimg120" title="(.*?)" alt="video">')
|
||||
id_title = regx.findall(data)
|
Loading…
Reference in a new issue