depend on ox, install as ox.web, migrate getUrl to readUrl
This commit is contained in:
parent
d2849d44ef
commit
16eeaf8b25
28 changed files with 169 additions and 172 deletions
4
README
4
README
|
@ -2,7 +2,7 @@ python-oxweb the internet is a dict
|
||||||
|
|
||||||
Depends:
|
Depends:
|
||||||
python2.5
|
python2.5
|
||||||
python-oxlib (bzr branch http://code.0xdb.org/python-oxlib)
|
python-ox (bzr branch http://code.0xdb.org/python-ox)
|
||||||
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
python-beautifulsoup (http://www.crummy.com/software/BeautifulSoup/)
|
||||||
python-feedparser (http://www.feedparser.org/)
|
python-feedparser (http://www.feedparser.org/)
|
||||||
(there seam to be some issues if not using the one from ubuntu/debian)
|
(there seam to be some issues if not using the one from ubuntu/debian)
|
||||||
|
@ -17,4 +17,4 @@ Install:
|
||||||
}
|
}
|
||||||
|
|
||||||
Test:
|
Test:
|
||||||
nosetests --with-doctest oxweb
|
nosetests --with-doctest web
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
oxlib
|
ox
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -19,8 +19,8 @@ setup(
|
||||||
url="http://code.0xdb.org/oxweb",
|
url="http://code.0xdb.org/oxweb",
|
||||||
download_url="http://code.0xdb.org/oxweb/download",
|
download_url="http://code.0xdb.org/oxweb/download",
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
packages=['oxweb'],
|
package_dir = {'ox.web': 'web'},
|
||||||
zip_safe=False,
|
packages=['ox.web'],
|
||||||
keywords = [
|
keywords = [
|
||||||
],
|
],
|
||||||
classifiers = [
|
classifiers = [
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
__version__ = '0.1.0'
|
__version__ = '1.0.0'
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
import wikipedia
|
import wikipedia
|
|
@ -3,8 +3,8 @@
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from oxlib import stripTags, findRe
|
from ox import stripTags, findRe
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
|
|
||||||
|
|
||||||
def getId(url):
|
def getId(url):
|
||||||
|
@ -24,7 +24,7 @@ def getData(id):
|
||||||
data = {
|
data = {
|
||||||
"url": getUrl(id)
|
"url": getUrl(id)
|
||||||
}
|
}
|
||||||
html = getUrlUnicode(data["url"])
|
html = readUrlUnicode(data["url"])
|
||||||
data['aka'] = parseList(html, 'AKA')
|
data['aka'] = parseList(html, 'AKA')
|
||||||
data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)</a>')
|
data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)</a>')
|
||||||
data['countries'] = parseList(html, 'Countries')
|
data['countries'] = parseList(html, 'Countries')
|
||||||
|
@ -42,11 +42,11 @@ def getData(id):
|
||||||
data['themes'] = parseList(html, 'Themes')
|
data['themes'] = parseList(html, 'Themes')
|
||||||
data['types'] = parseList(html, 'Types')
|
data['types'] = parseList(html, 'Types')
|
||||||
data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"')
|
data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"')
|
||||||
html = getUrlUnicode("http://allmovie.com/work/%s/cast" % id)
|
html = readUrlUnicode("http://allmovie.com/work/%s/cast" % id)
|
||||||
data['cast'] = parseTable(html)
|
data['cast'] = parseTable(html)
|
||||||
html = getUrlUnicode("http://allmovie.com/work/%s/credits" % id)
|
html = readUrlUnicode("http://allmovie.com/work/%s/credits" % id)
|
||||||
data['credits'] = parseTable(html)
|
data['credits'] = parseTable(html)
|
||||||
html = getUrlUnicode("http://allmovie.com/work/%s/review" % id)
|
html = readUrlUnicode("http://allmovie.com/work/%s/review" % id)
|
||||||
data['review'] = parseText(html, 'Review')
|
data['review'] = parseText(html, 'Review')
|
||||||
return data
|
return data
|
||||||
|
|
|
@ -4,8 +4,6 @@
|
||||||
import os
|
import os
|
||||||
import simplejson
|
import simplejson
|
||||||
|
|
||||||
import oxlib
|
|
||||||
|
|
||||||
|
|
||||||
def get(key):
|
def get(key):
|
||||||
user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
|
user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
|
|
@ -2,10 +2,10 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import oxlib.cache
|
import ox.cache
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
from oxlib.html import stripTags
|
from ox.html import stripTags
|
||||||
from oxlib.text import findRe, removeSpecialCharacters
|
from ox.text import findRe, removeSpecialCharacters
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
|
|
||||||
|
@ -30,9 +30,9 @@ def getData(id):
|
||||||
"url": getUrl(id)
|
"url": getUrl(id)
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
html = getUrlUnicode(data["url"])
|
html = readUrlUnicode(data["url"])
|
||||||
except:
|
except:
|
||||||
html = oxlib.cache.getUrl(data["url"])
|
html = ox.cache.getUrl(data["url"])
|
||||||
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
|
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
|
||||||
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
|
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
|
||||||
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
|
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
|
||||||
|
@ -48,7 +48,7 @@ def getData(id):
|
||||||
if not "/boxsets/" in result:
|
if not "/boxsets/" in result:
|
||||||
data["posters"] = [result]
|
data["posters"] = [result]
|
||||||
else:
|
else:
|
||||||
html_ = getUrlUnicode(result)
|
html_ = readUrlUnicode(result)
|
||||||
result = findRe(html_, "<a href=\"http://www.criterion.com/films/%s\">(.*?)</a>" % id)
|
result = findRe(html_, "<a href=\"http://www.criterion.com/films/%s\">(.*?)</a>" % id)
|
||||||
result = findRe(result, "src=\"(.*?)\"")
|
result = findRe(result, "src=\"(.*?)\"")
|
||||||
data["posters"] = [result.replace("_w100", "")]
|
data["posters"] = [result.replace("_w100", "")]
|
||||||
|
@ -64,7 +64,7 @@ def getData(id):
|
||||||
|
|
||||||
def getIds():
|
def getIds():
|
||||||
ids = []
|
ids = []
|
||||||
html = getUrlUnicode("http://www.criterion.com/library/dvd")
|
html = readUrlUnicode("http://www.criterion.com/library/dvd")
|
||||||
results = re.compile("page=(.*?)\"").findall(html)
|
results = re.compile("page=(.*?)\"").findall(html)
|
||||||
pages = int(results[len(results) - 2])
|
pages = int(results[len(results) - 2])
|
||||||
for page in range(pages, 0, -1):
|
for page in range(pages, 0, -1):
|
||||||
|
@ -74,13 +74,13 @@ def getIds():
|
||||||
|
|
||||||
def getIdsByPage(page):
|
def getIdsByPage(page):
|
||||||
ids = []
|
ids = []
|
||||||
html = getUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
|
html = readUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
|
||||||
results = re.compile("films/(.*?)\"").findall(html)
|
results = re.compile("films/(.*?)\"").findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
ids.append(result)
|
ids.append(result)
|
||||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
html = getUrlUnicode("http://www.criterion.com/boxsets/" + result)
|
html = readUrlUnicode("http://www.criterion.com/boxsets/" + result)
|
||||||
results = re.compile("films/(.*?)\"").findall(html)
|
results = re.compile("films/(.*?)\"").findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
ids.append(result)
|
ids.append(result)
|
|
@ -2,7 +2,7 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from oxlib.cache import getUrl
|
from ox.cache import readUrl
|
||||||
|
|
||||||
|
|
||||||
def getVideoUrl(url):
|
def getVideoUrl(url):
|
||||||
|
@ -13,7 +13,7 @@ def getVideoUrl(url):
|
||||||
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0]
|
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0]
|
||||||
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv'
|
'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv'
|
||||||
'''
|
'''
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||||
for v in video:
|
for v in video:
|
||||||
v = unquote(v).split('@@')[0]
|
v = unquote(v).split('@@')[0]
|
|
@ -3,8 +3,8 @@
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from oxlib import stripTags, findRe
|
from ox import stripTags, findRe
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
|
|
||||||
import google
|
import google
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ def getShowUrl(title):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def getShowData(url):
|
def getShowData(url):
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
r = {}
|
r = {}
|
||||||
r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
|
r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
|
||||||
r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
|
r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
|
|
@ -10,8 +10,8 @@ import Queue
|
||||||
import simplejson
|
import simplejson
|
||||||
|
|
||||||
|
|
||||||
import oxlib
|
import ox
|
||||||
from oxlib import stripTags
|
from ox import stripTags
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -30,15 +30,15 @@ FIXME: how search depper than first page?
|
||||||
DEFAULT_MAX_RESULTS = 10
|
DEFAULT_MAX_RESULTS = 10
|
||||||
DEFAULT_TIMEOUT = 24*60*60
|
DEFAULT_TIMEOUT = 24*60*60
|
||||||
|
|
||||||
def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
||||||
return oxlib.cache.getUrl(url, data, headers, timeout)
|
return ox.cache.readUrl(url, data, headers, timeout)
|
||||||
|
|
||||||
def quote_plus(s):
|
def quote_plus(s):
|
||||||
return urllib.quote_plus(s.encode('utf-8'))
|
return urllib.quote_plus(s.encode('utf-8'))
|
||||||
|
|
||||||
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
||||||
url = "http://www.google.com/search?q=%s" % quote_plus(query)
|
url = "http://www.google.com/search?q=%s" % quote_plus(query)
|
||||||
data = getUrl(url, timeout=timeout)
|
data = readUrl(url, timeout=timeout)
|
||||||
link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' + \
|
link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' + \
|
||||||
r'.*?(?:<br>|<table.*?>)' + \
|
r'.*?(?:<br>|<table.*?>)' + \
|
||||||
r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
|
r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
|
||||||
|
@ -52,6 +52,6 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
||||||
|
|
||||||
def _find(query):
|
def _find(query):
|
||||||
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
|
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
|
||||||
results = simplejson.loads(getUrlUnicode(url))['responseData']['results']
|
results = simplejson.loads(ox.cache.readUrlUnicode(url))['responseData']['results']
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -8,19 +8,19 @@ import time
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
import chardet
|
import chardet
|
||||||
import oxlib
|
import ox
|
||||||
from oxlib import stripTags, decodeHtml, findRe, findString
|
from ox import stripTags, decodeHtml, findRe, findString
|
||||||
import oxlib.cache
|
import ox.cache
|
||||||
from oxlib.normalize import normalizeTitle, normalizeImdbId
|
from ox.normalize import normalizeTitle, normalizeImdbId
|
||||||
from oxlib import *
|
from ox import *
|
||||||
|
|
||||||
import google
|
import google
|
||||||
|
|
||||||
'''
|
'''
|
||||||
never timeout imdb data, to update cache remove data from cache folder
|
never timeout imdb data, to update cache remove data from cache folder
|
||||||
'''
|
'''
|
||||||
def getUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
|
def readUrlUnicode(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=-1):
|
||||||
return oxlib.cache.getUrlUnicode(url, data, headers, timeout)
|
return ox.cache.readUrlUnicode(url, data, headers, timeout)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
check if result is valid while updating
|
check if result is valid while updating
|
||||||
|
@ -28,8 +28,8 @@ def validate(result, header):
|
||||||
return header['status'] == u'200'
|
return header['status'] == u'200'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
d = oxlib.cache.getUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
d = ox.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
||||||
except oxlib.cache.InvalidResult, e:
|
except ox.cache.InvalidResult, e:
|
||||||
print e.headers
|
print e.headers
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -76,7 +76,7 @@ def getRawMovieData(imdbId):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def getMovieInfo(imdbId):
|
def getMovieInfo(imdbId):
|
||||||
data = getUrlUnicode(getUrlBase(imdbId))
|
data = readUrlUnicode(getUrlBase(imdbId))
|
||||||
info = dict()
|
info = dict()
|
||||||
info['poster'] = findRe(data, 'name="poster".*?<img .*?src="(.*?)"')
|
info['poster'] = findRe(data, 'name="poster".*?<img .*?src="(.*?)"')
|
||||||
if info['poster'] and '_V' in info['poster']:
|
if info['poster'] and '_V' in info['poster']:
|
||||||
|
@ -246,7 +246,7 @@ def getMovieAKATitles(imdbId):
|
||||||
(u'Women of the Night', u'(undefined)')]
|
(u'Women of the Night', u'(undefined)')]
|
||||||
'''
|
'''
|
||||||
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
titles = findRe(data, 'name="akas".*?<table.*?>(.*?)</table>')
|
titles = findRe(data, 'name="akas".*?<table.*?>(.*?)</table>')
|
||||||
titles = re.compile("td>(.*?)</td>\n\n<td>(.*)</td>").findall(titles)
|
titles = re.compile("td>(.*?)</td>\n\n<td>(.*)</td>").findall(titles)
|
||||||
return titles
|
return titles
|
||||||
|
@ -268,7 +268,7 @@ def creditList(data, section=None):
|
||||||
def getMovieCredits(imdbId):
|
def getMovieCredits(imdbId):
|
||||||
credits = dict()
|
credits = dict()
|
||||||
url = "%sfullcredits" % getUrlBase(imdbId)
|
url = "%sfullcredits" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
groups = data.split('<h5>')
|
groups = data.split('<h5>')
|
||||||
for g in groups:
|
for g in groups:
|
||||||
section = re.compile('''name="(.*?)".*? href="/Glossary''').findall(g)
|
section = re.compile('''name="(.*?)".*? href="/Glossary''').findall(g)
|
||||||
|
@ -278,7 +278,7 @@ def getMovieCredits(imdbId):
|
||||||
|
|
||||||
def getMovieTrailers(imdbId):
|
def getMovieTrailers(imdbId):
|
||||||
url = "%strailers" % getUrlBase(imdbId)
|
url = "%strailers" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
videos = soup('div', {'class':"video-gallery"})
|
videos = soup('div', {'class':"video-gallery"})
|
||||||
trailers = []
|
trailers = []
|
||||||
|
@ -288,27 +288,27 @@ def getMovieTrailers(imdbId):
|
||||||
url = 'http://www.imdb.com' + a['href']
|
url = 'http://www.imdb.com' + a['href']
|
||||||
videoId = findRe(url, '/(vi\d*?)/')
|
videoId = findRe(url, '/(vi\d*?)/')
|
||||||
iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId
|
iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId
|
||||||
iframe = getUrlUnicode(iframeUrl)
|
iframe = readUrlUnicode(iframeUrl)
|
||||||
videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"'))
|
videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"'))
|
||||||
trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl})
|
trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl})
|
||||||
return trailers
|
return trailers
|
||||||
|
|
||||||
def getMovieQuotes(imdbId):
|
def getMovieQuotes(imdbId):
|
||||||
url = "%squotes" % getUrlBase(imdbId)
|
url = "%squotes" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
|
quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
|
||||||
quotes = [(q[0].strip(),q[1].strip()) for q in quotes]
|
quotes = [(q[0].strip(),q[1].strip()) for q in quotes]
|
||||||
return quotes
|
return quotes
|
||||||
|
|
||||||
def getMoviePlot(imdbId):
|
def getMoviePlot(imdbId):
|
||||||
url = "%splotsummary" % getUrlBase(imdbId)
|
url = "%splotsummary" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
plot = findRe(data, '<p class="plotpar">(.*?)<i>').split('</p>')[0]
|
plot = findRe(data, '<p class="plotpar">(.*?)<i>').split('</p>')[0]
|
||||||
return plot.strip()
|
return plot.strip()
|
||||||
|
|
||||||
def getMovieTechnical(imdbId):
|
def getMovieTechnical(imdbId):
|
||||||
url = "%stechnical" % getUrlBase(imdbId)
|
url = "%stechnical" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
results = {}
|
results = {}
|
||||||
for t in re.compile('<h5>(.*?)</h5>(.*?)<br/>', re.DOTALL).findall(data):
|
for t in re.compile('<h5>(.*?)</h5>(.*?)<br/>', re.DOTALL).findall(data):
|
||||||
results[t[0].strip()] = t[1].strip()
|
results[t[0].strip()] = t[1].strip()
|
||||||
|
@ -316,7 +316,7 @@ def getMovieTechnical(imdbId):
|
||||||
|
|
||||||
def getMovieCompanyCredits(imdbId):
|
def getMovieCompanyCredits(imdbId):
|
||||||
url = "%scompanycredits" % getUrlBase(imdbId)
|
url = "%scompanycredits" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
results = {}
|
results = {}
|
||||||
for field, c in re.compile('<h2>(.*?)</h2><ul>(.*?)</ul>').findall(data):
|
for field, c in re.compile('<h2>(.*?)</h2><ul>(.*?)</ul>').findall(data):
|
||||||
results[field.strip()] = []
|
results[field.strip()] = []
|
||||||
|
@ -326,7 +326,7 @@ def getMovieCompanyCredits(imdbId):
|
||||||
|
|
||||||
def getMovieLocations(imdbId):
|
def getMovieLocations(imdbId):
|
||||||
url = "%slocations" % getUrlBase(imdbId)
|
url = "%slocations" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
|
locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
|
@ -334,7 +334,7 @@ def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
|
||||||
photos = {}
|
photos = {}
|
||||||
for key in keys:
|
for key in keys:
|
||||||
url = "%smediaindex?refine=%s" % (getUrlBase(imdbId), key)
|
url = "%smediaindex?refine=%s" % (getUrlBase(imdbId), key)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
photos[key] = {}
|
photos[key] = {}
|
||||||
for s in re.compile('''<img alt="(.*?)".*?src="(http://ia.media-imdb.com/.*?.jpg)''').findall(data):
|
for s in re.compile('''<img alt="(.*?)".*?src="(http://ia.media-imdb.com/.*?.jpg)''').findall(data):
|
||||||
img= "%s.jpg" % s[1].split('._V')[0]
|
img= "%s.jpg" % s[1].split('._V')[0]
|
||||||
|
@ -358,7 +358,7 @@ def getMoviePosters(imdbId):
|
||||||
|
|
||||||
def getMovieTrivia(imdbId):
|
def getMovieTrivia(imdbId):
|
||||||
url = "%strivia" % getUrlBase(imdbId)
|
url = "%strivia" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
trivia = re.compile('<li>(.*?)</li>').findall(data)
|
trivia = re.compile('<li>(.*?)</li>').findall(data)
|
||||||
def clean(t):
|
def clean(t):
|
||||||
t = decodeHtml(t)
|
t = decodeHtml(t)
|
||||||
|
@ -371,7 +371,7 @@ def getMovieTrivia(imdbId):
|
||||||
|
|
||||||
def getMovieConnections(imdbId):
|
def getMovieConnections(imdbId):
|
||||||
url = "%smovieconnections" % getUrlBase(imdbId)
|
url = "%smovieconnections" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
connections={}
|
connections={}
|
||||||
for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data):
|
for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data):
|
||||||
connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1])
|
connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1])
|
||||||
|
@ -379,7 +379,7 @@ def getMovieConnections(imdbId):
|
||||||
|
|
||||||
def getMovieKeywords(imdbId):
|
def getMovieKeywords(imdbId):
|
||||||
url = "%skeywords" % getUrlBase(imdbId)
|
url = "%skeywords" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
keywords = []
|
keywords = []
|
||||||
for keyword in re.compile('''<a.*?href="/keyword.*?>(.*?)</a>''').findall(data):
|
for keyword in re.compile('''<a.*?href="/keyword.*?>(.*?)</a>''').findall(data):
|
||||||
keyword = decodeHtml(keyword)
|
keyword = decodeHtml(keyword)
|
||||||
|
@ -389,7 +389,7 @@ def getMovieKeywords(imdbId):
|
||||||
|
|
||||||
def getMovieExternalReviews(imdbId):
|
def getMovieExternalReviews(imdbId):
|
||||||
url = "%sexternalreviews" % getUrlBase(imdbId)
|
url = "%sexternalreviews" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
|
_reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
|
||||||
reviews = {}
|
reviews = {}
|
||||||
for r in _reviews:
|
for r in _reviews:
|
||||||
|
@ -430,7 +430,7 @@ def _parseDate(d):
|
||||||
|
|
||||||
def getMovieReleaseDates(imdbId):
|
def getMovieReleaseDates(imdbId):
|
||||||
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
url = "%sreleaseinfo" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
releasedates = []
|
releasedates = []
|
||||||
regexp = '''<tr><td>(.*?)</td>.*?<td align="right">(.*?)</td>.*?<td>(.*?)</td></tr>'''
|
regexp = '''<tr><td>(.*?)</td>.*?<td align="right">(.*?)</td>.*?<td>(.*?)</td></tr>'''
|
||||||
|
|
||||||
|
@ -468,7 +468,7 @@ def getMovieFlimingDates(imdbId):
|
||||||
|
|
||||||
def getMovieBusiness(imdbId):
|
def getMovieBusiness(imdbId):
|
||||||
url = "%sbusiness" % getUrlBase(imdbId)
|
url = "%sbusiness" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
business = {}
|
business = {}
|
||||||
for r in re.compile('''<h5>(.*?)</h5>(.*?)<br/>.<br/>''', re.DOTALL).findall(data):
|
for r in re.compile('''<h5>(.*?)</h5>(.*?)<br/>.<br/>''', re.DOTALL).findall(data):
|
||||||
key = stripTags(r[0]).strip().lower()
|
key = stripTags(r[0]).strip().lower()
|
||||||
|
@ -478,7 +478,7 @@ def getMovieBusiness(imdbId):
|
||||||
|
|
||||||
def getMovieEpisodes(imdbId):
|
def getMovieEpisodes(imdbId):
|
||||||
url = "%sepisodes" % getUrlBase(imdbId)
|
url = "%sepisodes" % getUrlBase(imdbId)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
episodes = {}
|
episodes = {}
|
||||||
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
|
regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
|
||||||
for r in re.compile(regexp, re.DOTALL).findall(data):
|
for r in re.compile(regexp, re.DOTALL).findall(data):
|
||||||
|
@ -514,7 +514,7 @@ class IMDb:
|
||||||
self.pageUrl = getUrlBase(imdbId)
|
self.pageUrl = getUrlBase(imdbId)
|
||||||
|
|
||||||
def getPage(self):
|
def getPage(self):
|
||||||
return getUrlUnicode(self.pageUrl)
|
return readUrlUnicode(self.pageUrl)
|
||||||
|
|
||||||
def parse_raw_value(self, key, value):
|
def parse_raw_value(self, key, value):
|
||||||
if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'):
|
if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'):
|
||||||
|
@ -682,10 +682,10 @@ def guess(title, director=''):
|
||||||
search = 'site:imdb.com "%s"' % title
|
search = 'site:imdb.com "%s"' % title
|
||||||
for (name, url, desc) in google.find(search, 2):
|
for (name, url, desc) in google.find(search, 2):
|
||||||
if url.startswith('http://www.imdb.com/title/tt'):
|
if url.startswith('http://www.imdb.com/title/tt'):
|
||||||
return normalizeImdbId(int(oxlib.intValue(url)))
|
return normalizeImdbId(int(ox.intValue(url)))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
data = u.read()
|
data = u.read()
|
||||||
return_url = u.url
|
return_url = u.url
|
||||||
|
@ -700,7 +700,7 @@ def guess(title, director=''):
|
||||||
return imdb_id
|
return imdb_id
|
||||||
|
|
||||||
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
||||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
data = u.read()
|
data = u.read()
|
||||||
return_url = u.url
|
return_url = u.url
|
||||||
|
@ -737,7 +737,7 @@ def getEpisodeData(title, episode, show_url = None):
|
||||||
def getPersonData(imdbId):
|
def getPersonData(imdbId):
|
||||||
imdbId = normalizeImdbId(imdbId)
|
imdbId = normalizeImdbId(imdbId)
|
||||||
url = u'http://www.imdb.com/name/nm%s/' % imdbId
|
url = u'http://www.imdb.com/name/nm%s/' % imdbId
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
info = dict()
|
info = dict()
|
||||||
info['name'] = findRe(data, u'<title>(.*?)</title>')
|
info['name'] = findRe(data, u'<title>(.*?)</title>')
|
||||||
filmo = data.split(u'<h3>Additional Details</h3>')[0]
|
filmo = data.split(u'<h3>Additional Details</h3>')[0]
|
|
@ -2,9 +2,9 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
from oxlib.html import stripTags
|
from ox.html import stripTags
|
||||||
from oxlib.text import findRe
|
from ox.text import findRe
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ def getData(id):
|
||||||
data = {
|
data = {
|
||||||
'url': getUrl(id)
|
'url': getUrl(id)
|
||||||
}
|
}
|
||||||
html = getUrlUnicode(data['url'])
|
html = readUrlUnicode(data['url'])
|
||||||
data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?) ')
|
data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?) ')
|
||||||
data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
|
data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
|
||||||
data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
|
data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
|
||||||
|
@ -31,11 +31,11 @@ def getData(id):
|
||||||
for result in results:
|
for result in results:
|
||||||
result = result.replace('_xlg.html', '.html')
|
result = result.replace('_xlg.html', '.html')
|
||||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||||
html = getUrlUnicode(url)
|
html = readUrlUnicode(url)
|
||||||
result = findRe(html, '<a href = (\w*?_xlg.html)')
|
result = findRe(html, '<a href = (\w*?_xlg.html)')
|
||||||
if result:
|
if result:
|
||||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||||
html = getUrlUnicode(url)
|
html = readUrlUnicode(url)
|
||||||
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
|
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
|
||||||
else:
|
else:
|
||||||
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)" alt='))
|
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)" alt='))
|
||||||
|
@ -55,7 +55,7 @@ def getId(url):
|
||||||
|
|
||||||
def getIds():
|
def getIds():
|
||||||
ids = []
|
ids = []
|
||||||
html = getUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
|
html = readUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
|
||||||
pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
|
pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
|
||||||
for page in range(pages, 0, -1):
|
for page in range(pages, 0, -1):
|
||||||
for id in getIdsByPage(page):
|
for id in getIdsByPage(page):
|
||||||
|
@ -65,7 +65,7 @@ def getIds():
|
||||||
|
|
||||||
def getIdsByPage(page):
|
def getIdsByPage(page):
|
||||||
ids = []
|
ids = []
|
||||||
html = getUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
|
html = readUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
|
||||||
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
|
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
url = 'http://impawards.com/%s' % result
|
url = 'http://impawards.com/%s' % result
|
||||||
|
@ -74,7 +74,7 @@ def getIdsByPage(page):
|
||||||
|
|
||||||
def getUrl(id):
|
def getUrl(id):
|
||||||
url = "http://www.impawards.com/%s.html" % id
|
url = "http://www.impawards.com/%s.html" % id
|
||||||
html = getUrlUnicode(url)
|
html = readUrlUnicode(url)
|
||||||
if findRe(html, "No Movie Posters on This Page"):
|
if findRe(html, "No Movie Posters on This Page"):
|
||||||
url = "http://www.impawards.com/%s_ver1.html" % id
|
url = "http://www.impawards.com/%s_ver1.html" % id
|
||||||
return url
|
return url
|
|
@ -3,10 +3,10 @@
|
||||||
import re
|
import re
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from oxlib.cache import getUrl
|
from ox.cache import readUrl
|
||||||
from oxlib.html import decodeHtml, stripTags
|
from ox.html import decodeHtml, stripTags
|
||||||
from oxlib.text import findRe
|
from ox.text import findRe
|
||||||
from oxlib.text import findString
|
from ox.text import findString
|
||||||
|
|
||||||
|
|
||||||
# to sniff itunes traffic, use something like
|
# to sniff itunes traffic, use something like
|
||||||
|
@ -113,14 +113,14 @@ class ItunesAlbum:
|
||||||
|
|
||||||
def getId(self):
|
def getId(self):
|
||||||
url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
|
url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
|
||||||
xml = getUrl(url, headers = ITUNES_HEADERS)
|
xml = readUrl(url, headers = ITUNES_HEADERS)
|
||||||
id = findRe(xml, 'viewAlbum\?id=(.*?)&')
|
id = findRe(xml, 'viewAlbum\?id=(.*?)&')
|
||||||
return id
|
return id
|
||||||
|
|
||||||
def getData(self):
|
def getData(self):
|
||||||
data = {'id': self.id}
|
data = {'id': self.id}
|
||||||
url = composeUrl('viewAlbum', {'id': self.id})
|
url = composeUrl('viewAlbum', {'id': self.id})
|
||||||
xml = getUrl(url, None, ITUNES_HEADERS)
|
xml = readUrl(url, None, ITUNES_HEADERS)
|
||||||
data['albumName'] = findRe(xml, '<B>(.*?)</B>')
|
data['albumName'] = findRe(xml, '<B>(.*?)</B>')
|
||||||
data['artistName'] = findRe(xml, '<b>(.*?)</b>')
|
data['artistName'] = findRe(xml, '<b>(.*?)</b>')
|
||||||
data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
|
data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
|
||||||
|
@ -144,14 +144,14 @@ class ItunesMovie:
|
||||||
|
|
||||||
def getId(self):
|
def getId(self):
|
||||||
url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
|
url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
|
||||||
xml = getUrl(url, headers = ITUNES_HEADERS)
|
xml = readUrl(url, headers = ITUNES_HEADERS)
|
||||||
id = findRe(xml, 'viewMovie\?id=(.*?)&')
|
id = findRe(xml, 'viewMovie\?id=(.*?)&')
|
||||||
return id
|
return id
|
||||||
|
|
||||||
def getData(self):
|
def getData(self):
|
||||||
data = {'id': self.id}
|
data = {'id': self.id}
|
||||||
url = composeUrl('viewMovie', {'id': self.id})
|
url = composeUrl('viewMovie', {'id': self.id})
|
||||||
xml = getUrl(url, None, ITUNES_HEADERS)
|
xml = readUrl(url, None, ITUNES_HEADERS)
|
||||||
f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
|
f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
|
||||||
f.write(xml)
|
f.write(xml)
|
||||||
f.close()
|
f.close()
|
|
@ -1,24 +1,24 @@
|
||||||
import re
|
import re
|
||||||
from oxlib import cache
|
from ox import cache
|
||||||
from oxlib.html import stripTags
|
from ox.html import stripTags
|
||||||
from oxlib.text import findRe
|
from ox.text import findRe
|
||||||
|
|
||||||
import auth
|
import auth
|
||||||
|
|
||||||
|
|
||||||
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||||
headers = headers.copy()
|
headers = headers.copy()
|
||||||
headers["Cookie"] = auth.get("karagarga.cookie")
|
headers["Cookie"] = auth.get("karagarga.cookie")
|
||||||
return cache.getUrl(url, data, headers, timeout)
|
return cache.readUrl(url, data, headers, timeout)
|
||||||
|
|
||||||
def getUrlUnicode(url, timeout=cache.cache_timeout):
|
def readUrlUnicode(url, timeout=cache.cache_timeout):
|
||||||
return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
|
return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout)
|
||||||
|
|
||||||
def getData(id):
|
def getData(id):
|
||||||
data = {
|
data = {
|
||||||
"url": getUrl(id)
|
"url": getUrl(id)
|
||||||
}
|
}
|
||||||
html = getUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
|
html = readUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
|
||||||
if 'No torrent with ID' in html:
|
if 'No torrent with ID' in html:
|
||||||
return False
|
return False
|
||||||
data['added'] = stripTags(parseTable(html, 'Added'))
|
data['added'] = stripTags(parseTable(html, 'Added'))
|
||||||
|
@ -87,7 +87,7 @@ def getId(url):
|
||||||
return url.split("=")[-1]
|
return url.split("=")[-1]
|
||||||
|
|
||||||
def getTorrent(id):
|
def getTorrent(id):
|
||||||
return _getUrl(getData(id)['torrent'])
|
return readUrl(getData(id)['torrent'])
|
||||||
|
|
||||||
def getIds(lastId = 20):
|
def getIds(lastId = 20):
|
||||||
lastId = '%s' % lastId
|
lastId = '%s' % lastId
|
||||||
|
@ -105,7 +105,7 @@ def getIds(lastId = 20):
|
||||||
def getIdsByPage(page):
|
def getIdsByPage(page):
|
||||||
ids = []
|
ids = []
|
||||||
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
|
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
|
||||||
html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
|
html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
|
||||||
strings = html.split('<td width="42" style="padding:0px;">')
|
strings = html.split('<td width="42" style="padding:0px;">')
|
||||||
strings.pop(0)
|
strings.pop(0)
|
||||||
for string in strings:
|
for string in strings:
|
|
@ -1,15 +1,15 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from oxlib.cache import getUrl
|
from ox.cache import readUrl
|
||||||
from oxlib.html import decodeHtml
|
from ox.html import decodeHtml
|
||||||
from oxlib.text import findRe
|
from ox.text import findRe
|
||||||
|
|
||||||
|
|
||||||
def getLyrics(title, artist):
|
def getLyrics(title, artist):
|
||||||
html = getUrl('http://lyricsfly.com/api/')
|
html = readUrl('http://lyricsfly.com/api/')
|
||||||
key = findRe(html, '<font color=green><b>(.*?)</b></font>')
|
key = findRe(html, '<font color=green><b>(.*?)</b></font>')
|
||||||
url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
|
url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
|
||||||
xml = getUrl(url)
|
xml = readUrl(url)
|
||||||
lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
|
lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
|
||||||
lyrics = lyrics.replace('\n', '').replace('\r', '')
|
lyrics = lyrics.replace('\n', '').replace('\r', '')
|
||||||
lyrics = lyrics.replace('[br]', '\n').strip()
|
lyrics = lyrics.replace('[br]', '\n').strip()
|
|
@ -3,14 +3,14 @@
|
||||||
import re
|
import re
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
from oxlib.cache import getUrl, getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
from oxlib import findRe, decodeHtml, stripTags
|
from ox import findRe, decodeHtml, stripTags
|
||||||
|
|
||||||
|
|
||||||
def getMetacriticShowUrl(title):
|
def getMetacriticShowUrl(title):
|
||||||
title = quote(title)
|
title = quote(title)
|
||||||
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
|
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
|
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
|
||||||
|
|
||||||
def getData(title, url=None):
|
def getData(title, url=None):
|
||||||
|
@ -18,7 +18,7 @@ def getData(title, url=None):
|
||||||
url = getMetacriticShowUrl(title)
|
url = getMetacriticShowUrl(title)
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
score = findRe(data, 'ALT="Metascore: (.*?)"')
|
score = findRe(data, 'ALT="Metascore: (.*?)"')
|
||||||
if score:
|
if score:
|
||||||
score = int(score)
|
score = int(score)
|
|
@ -5,10 +5,10 @@ import re
|
||||||
import socket
|
import socket
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
from oxlib.cache import getUrl, getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||||
from oxlib.normalize import normalizeImdbId
|
from ox.normalize import normalizeImdbId
|
||||||
import oxlib
|
import ox
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ def findMovie(query, max_results=10):
|
||||||
'''search for torrents on mininova
|
'''search for torrents on mininova
|
||||||
'''
|
'''
|
||||||
url = "http://www.mininova.org/search/%s/seeds" % quote(query)
|
url = "http://www.mininova.org/search/%s/seeds" % quote(query)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
return _parseResultsPage(data, max_results)
|
return _parseResultsPage(data, max_results)
|
||||||
|
|
||||||
def findMovieByImdb(imdbId):
|
def findMovieByImdb(imdbId):
|
||||||
|
@ -39,7 +39,7 @@ def findMovieByImdb(imdbId):
|
||||||
'''
|
'''
|
||||||
results = []
|
results = []
|
||||||
imdbId = normalizeImdbId(imdbId)
|
imdbId = normalizeImdbId(imdbId)
|
||||||
data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
|
data = readUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
|
||||||
return _parseResultsPage(data)
|
return _parseResultsPage(data)
|
||||||
|
|
||||||
def getId(mininovaId):
|
def getId(mininovaId):
|
||||||
|
@ -55,7 +55,7 @@ def getId(mininovaId):
|
||||||
|
|
||||||
def exists(mininovaId):
|
def exists(mininovaId):
|
||||||
mininovaId = getId(mininovaId)
|
mininovaId = getId(mininovaId)
|
||||||
data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||||
if not data or 'Torrent not found...' in data:
|
if not data or 'Torrent not found...' in data:
|
||||||
return False
|
return False
|
||||||
if 'tracker</a> of this torrent requires registration.' in data:
|
if 'tracker</a> of this torrent requires registration.' in data:
|
||||||
|
@ -74,7 +74,7 @@ def getData(mininovaId):
|
||||||
torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
|
torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
|
||||||
torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
|
torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
|
||||||
|
|
||||||
data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
|
data = readUrlUnicode(torrent['comment_link']) + readUrlUnicode(torrent['details_link'])
|
||||||
if '<h1>Torrent not found...</h1>' in data:
|
if '<h1>Torrent not found...</h1>' in data:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ def getData(mininovaId):
|
||||||
torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
|
torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
|
||||||
if torrent['description']:
|
if torrent['description']:
|
||||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||||
t = getUrl(torrent[u'torrent_link'])
|
t = readUrl(torrent[u'torrent_link'])
|
||||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||||
return torrent
|
return torrent
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
from oxlib import findRe
|
from ox import findRe
|
||||||
|
|
||||||
def getData(id):
|
def getData(id):
|
||||||
'''
|
'''
|
||||||
|
@ -24,7 +24,7 @@ def getId(url):
|
||||||
|
|
||||||
def getPostersByUrl(url, group=True):
|
def getPostersByUrl(url, group=True):
|
||||||
posters = []
|
posters = []
|
||||||
html = getUrlUnicode(url)
|
html = readUrlUnicode(url)
|
||||||
if url in html:
|
if url in html:
|
||||||
if group:
|
if group:
|
||||||
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
|
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
|
||||||
|
@ -32,7 +32,7 @@ def getPostersByUrl(url, group=True):
|
||||||
posters += getPostersByUrl(result, False)
|
posters += getPostersByUrl(result, False)
|
||||||
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
|
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
html = getUrlUnicode(result)
|
html = readUrlUnicode(result)
|
||||||
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
|
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
|
||||||
return posters
|
return posters
|
||||||
|
|
|
@ -3,9 +3,9 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
from oxlib.cache import getUrl, getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
import oxlib
|
import ox
|
||||||
from oxlib import langCode2To3, langTo3Code
|
from ox import langCode2To3, langTo3Code
|
||||||
|
|
||||||
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||||
if len(language) == 2:
|
if len(language) == 2:
|
||||||
|
@ -16,7 +16,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||||
if language:
|
if language:
|
||||||
url += "sublanguageid-%s/" % language
|
url += "sublanguageid-%s/" % language
|
||||||
url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
|
url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
if "title>opensubtitles.com - search results</title" in data:
|
if "title>opensubtitles.com - search results</title" in data:
|
||||||
fd = feedparser.parse(data)
|
fd = feedparser.parse(data)
|
||||||
opensubtitleId = None
|
opensubtitleId = None
|
||||||
|
@ -26,16 +26,16 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||||
if opensubtitleId:
|
if opensubtitleId:
|
||||||
opensubtitleId = opensubtitleId[0]
|
opensubtitleId = opensubtitleId[0]
|
||||||
else:
|
else:
|
||||||
opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
|
opensubtitleId = ox.findRe(data, '/en/subtitles/(.*?)/')
|
||||||
return opensubtitleId
|
return opensubtitleId
|
||||||
|
|
||||||
def downloadSubtitleById(opensubtitle_id):
|
def downloadSubtitleById(opensubtitle_id):
|
||||||
srts = {}
|
srts = {}
|
||||||
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
||||||
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
||||||
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
||||||
name = oxlib.stripTags(f[1]).split('\n')[0]
|
name = ox.stripTags(f[1]).split('\n')[0]
|
||||||
url = "http://www.opensubtitles.com%s" % f[0]
|
url = "http://www.opensubtitles.com%s" % f[0]
|
||||||
srts[name] = getUrlUnicode(url)
|
srts[name] = readUrlUnicode(url)
|
||||||
return srts
|
return srts
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import oxlib.cache
|
import ox.cache
|
||||||
|
|
||||||
def getPosterUrl(id):
|
def getPosterUrl(id):
|
||||||
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
|
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import oxlib.cache
|
import ox.cache
|
||||||
from oxlib.cache import exists
|
from ox.cache import exists
|
||||||
|
|
||||||
|
|
||||||
def getPosterUrl(id):
|
def getPosterUrl(id):
|
|
@ -2,11 +2,11 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from oxlib.cache import getHeaders, getUrl, getUrlUnicode
|
from ox.cache import getHeaders, readUrl, readUrlUnicode
|
||||||
from oxlib import findRe, stripTags
|
from ox import findRe, stripTags
|
||||||
|
|
||||||
|
|
||||||
def getUrlByImdb(imdb):
|
def readUrlByImdb(imdb):
|
||||||
#this would also wor but does not cache:
|
#this would also wor but does not cache:
|
||||||
'''
|
'''
|
||||||
from urllib2 import urlopen
|
from urllib2 import urlopen
|
||||||
|
@ -14,7 +14,7 @@ def getUrlByImdb(imdb):
|
||||||
return u.url
|
return u.url
|
||||||
'''
|
'''
|
||||||
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
|
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
if "movie_title" in data:
|
if "movie_title" in data:
|
||||||
movies = re.compile('(/m/.*?/)').findall(data)
|
movies = re.compile('(/m/.*?/)').findall(data)
|
||||||
if movies:
|
if movies:
|
||||||
|
@ -22,7 +22,7 @@ def getUrlByImdb(imdb):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def getData(url):
|
def getData(url):
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
r = {}
|
r = {}
|
||||||
r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
|
r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
|
||||||
if '(' in r['title']:
|
if '(' in r['title']:
|
|
@ -6,9 +6,9 @@ import time
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
import oxlib.cache
|
import ox.cache
|
||||||
from oxlib.html import decodeHtml, stripTags
|
from ox.html import decodeHtml, stripTags
|
||||||
import oxlib.net
|
import ox.net
|
||||||
|
|
||||||
|
|
||||||
def getNews(year, month, day):
|
def getNews(year, month, day):
|
||||||
|
@ -23,9 +23,9 @@ def getNews(year, month, day):
|
||||||
for section in sections:
|
for section in sections:
|
||||||
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
||||||
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
||||||
html = oxlib.net.getUrl(url)
|
html = ox.net.readUrl(url)
|
||||||
else:
|
else:
|
||||||
html = oxlib.cache.getUrl(url)
|
html = ox.cache.readUrl(url)
|
||||||
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
||||||
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
||||||
try:
|
try:
|
||||||
|
@ -102,11 +102,11 @@ def formatSubsection(string):
|
||||||
|
|
||||||
def getIssue(year, week):
|
def getIssue(year, week):
|
||||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||||
if not oxlib.net.exists(coverUrl):
|
if not ox.net.exists(coverUrl):
|
||||||
return None
|
return None
|
||||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||||
contents = []
|
contents = []
|
||||||
soup = BeautifulSoup(oxlib.cache.getUrl(url))
|
soup = BeautifulSoup(ox.cache.readUrl(url))
|
||||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||||
item = str(item)
|
item = str(item)
|
||||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||||
|
@ -116,7 +116,7 @@ def getIssue(year, week):
|
||||||
pages = page + 2
|
pages = page + 2
|
||||||
for page in range(1, pages + 10):
|
for page in range(1, pages + 10):
|
||||||
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||||
if oxlib.cache.exists(url):
|
if ox.cache.exists(url):
|
||||||
pageUrl[page] = url
|
pageUrl[page] = url
|
||||||
else:
|
else:
|
||||||
pageUrl[page] = ''
|
pageUrl[page] = ''
|
||||||
|
@ -164,7 +164,7 @@ def archiveIssues():
|
||||||
f.close()
|
f.close()
|
||||||
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxlib.cache.getUrl(issue['coverUrl'])
|
data = ox.cache.readUrl(issue['coverUrl'])
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -173,7 +173,7 @@ def archiveIssues():
|
||||||
if url:
|
if url:
|
||||||
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxlib.cache.getUrl(url)
|
data = ox.cache.readUrl(url)
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -244,7 +244,7 @@ def archiveNews():
|
||||||
f.close()
|
f.close()
|
||||||
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxlib.cache.getUrl(new['imageUrl'])
|
data = ox.cache.readUrl(new['imageUrl'])
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
|
@ -6,10 +6,10 @@ import socket
|
||||||
from urllib import quote, urlencode
|
from urllib import quote, urlencode
|
||||||
from urllib2 import URLError
|
from urllib2 import URLError
|
||||||
|
|
||||||
from oxlib.cache import getUrl, getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||||
from oxlib.normalize import normalizeImdbId
|
from ox.normalize import normalizeImdbId
|
||||||
import oxlib
|
import ox
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
|
||||||
|
@ -18,13 +18,13 @@ cache_timeout = 24*60*60 # cache search only for 24 hours
|
||||||
season_episode = re.compile("S..E..", re.IGNORECASE)
|
season_episode = re.compile("S..E..", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
def _readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
|
||||||
headers = headers.copy()
|
headers = headers.copy()
|
||||||
headers['Cookie'] = 'language=en_EN'
|
headers['Cookie'] = 'language=en_EN'
|
||||||
return cache.getUrl(url, data, headers, timeout)
|
return cache.readUrl(url, data, headers, timeout)
|
||||||
|
|
||||||
def _getUrlUnicode(url, timeout=cache.cache_timeout):
|
def _readUrlUnicode(url, timeout=cache.cache_timeout):
|
||||||
return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
|
return cache.readUrlUnicode(url, _readUrl=_readUrl, timeout=timeout)
|
||||||
|
|
||||||
def findMovies(query, max_results=10):
|
def findMovies(query, max_results=10):
|
||||||
results = []
|
results = []
|
||||||
|
@ -37,7 +37,7 @@ def findMovies(query, max_results=10):
|
||||||
if not url.startswith('/'):
|
if not url.startswith('/'):
|
||||||
url = "/" + url
|
url = "/" + url
|
||||||
url = "http://thepiratebay.org" + url
|
url = "http://thepiratebay.org" + url
|
||||||
data = _getUrlUnicode(url, timeout=cache_timeout)
|
data = _readUrlUnicode(url, timeout=cache_timeout)
|
||||||
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
|
||||||
for row in re.compile(regexp, re.DOTALL).findall(data):
|
for row in re.compile(regexp, re.DOTALL).findall(data):
|
||||||
torrentType = row[0]
|
torrentType = row[0]
|
||||||
|
@ -67,7 +67,7 @@ def getId(piratebayId):
|
||||||
|
|
||||||
def exists(piratebayId):
|
def exists(piratebayId):
|
||||||
piratebayId = getId(piratebayId)
|
piratebayId = getId(piratebayId)
|
||||||
return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
||||||
|
|
||||||
def getData(piratebayId):
|
def getData(piratebayId):
|
||||||
_key_map = {
|
_key_map = {
|
||||||
|
@ -83,7 +83,7 @@ def getData(piratebayId):
|
||||||
torrent[u'domain'] = 'thepiratebay.org'
|
torrent[u'domain'] = 'thepiratebay.org'
|
||||||
torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
|
torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
|
||||||
|
|
||||||
data = _getUrlUnicode(torrent['comment_link'])
|
data = _readUrlUnicode(torrent['comment_link'])
|
||||||
torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
|
||||||
if not torrent[u'title']:
|
if not torrent[u'title']:
|
||||||
return None
|
return None
|
||||||
|
@ -99,7 +99,7 @@ def getData(piratebayId):
|
||||||
torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
|
torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
|
||||||
if torrent[u'description']:
|
if torrent[u'description']:
|
||||||
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
|
||||||
t = _getUrl(torrent[u'torrent_link'])
|
t = _readUrl(torrent[u'torrent_link'])
|
||||||
torrent[u'torrent_info'] = getTorrentInfo(t)
|
torrent[u'torrent_info'] = getTorrentInfo(t)
|
||||||
return torrent
|
return torrent
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from oxlib import intValue
|
from ox import intValue
|
||||||
|
|
||||||
|
|
||||||
class Torrent(dict):
|
class Torrent(dict):
|
|
@ -3,8 +3,8 @@
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from oxlib import stripTags, findRe
|
from ox import stripTags, findRe
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
|
|
||||||
|
|
||||||
def getEpisodeData(url):
|
def getEpisodeData(url):
|
||||||
|
@ -14,7 +14,7 @@ def getEpisodeData(url):
|
||||||
example:
|
example:
|
||||||
getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
|
getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
|
||||||
'''
|
'''
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
r = {}
|
r = {}
|
||||||
r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
|
r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
|
||||||
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
|
@ -3,8 +3,8 @@
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
||||||
import simplejson
|
import simplejson
|
||||||
from oxlib.cache import getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
from oxlib import findRe, decodeHtml
|
from ox import findRe, decodeHtml
|
||||||
|
|
||||||
|
|
||||||
def getId(url):
|
def getId(url):
|
||||||
|
@ -44,7 +44,7 @@ def getUrlByAllmovieId(allmovieId):
|
||||||
def getWikiData(wikipediaUrl):
|
def getWikiData(wikipediaUrl):
|
||||||
url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
|
url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
|
||||||
url = "%s&action=raw" % url
|
url = "%s&action=raw" % url
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def getMovieData(wikipediaUrl):
|
def getMovieData(wikipediaUrl):
|
||||||
|
@ -83,7 +83,7 @@ def getMovieData(wikipediaUrl):
|
||||||
return filmbox
|
return filmbox
|
||||||
|
|
||||||
def getImageUrl(name):
|
def getImageUrl(name):
|
||||||
data = getUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name)
|
data = readUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name)
|
||||||
url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
|
url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
@ -103,13 +103,12 @@ def getAllmovieId(wikipediaUrl):
|
||||||
return data.get('amg_id', '')
|
return data.get('amg_id', '')
|
||||||
|
|
||||||
def find(query, max_results=10):
|
def find(query, max_results=10):
|
||||||
from oxlib.cache import getUrl
|
|
||||||
query = {'action': 'query', 'list':'search', 'format': 'json',
|
query = {'action': 'query', 'list':'search', 'format': 'json',
|
||||||
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
||||||
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
if not data:
|
if not data:
|
||||||
data = getUrl(url, timeout=0)
|
data = readUrl(url, timeout=0)
|
||||||
result = simplejson.loads(data)
|
result = simplejson.loads(data)
|
||||||
results = []
|
results = []
|
||||||
if result and 'query' in result:
|
if result and 'query' in result:
|
|
@ -6,12 +6,12 @@ import xml.etree.ElementTree as ET
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
from oxlib.cache import getUrl, getUrlUnicode
|
from ox.cache import readUrl, readUrlUnicode
|
||||||
from oxlib import findString, findRe
|
from ox import findString, findRe
|
||||||
|
|
||||||
|
|
||||||
def getVideoKey(youtubeId):
|
def getVideoKey(youtubeId):
|
||||||
data = getUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
|
data = readUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
|
||||||
match = re.compile("token=(.+)&thumbnail").findall(data)
|
match = re.compile("token=(.+)&thumbnail").findall(data)
|
||||||
if match:
|
if match:
|
||||||
return unquote(match[0])
|
return unquote(match[0])
|
||||||
|
@ -31,7 +31,7 @@ def getVideoUrl(youtubeId, format='mp4'):
|
||||||
|
|
||||||
def getMovieInfo(youtubeId, video_url_base=None):
|
def getMovieInfo(youtubeId, video_url_base=None):
|
||||||
url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId
|
url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId
|
||||||
data = getUrl(url)
|
data = readUrl(url)
|
||||||
fd = feedparser.parse(data)
|
fd = feedparser.parse(data)
|
||||||
return getInfoFromAtom(fd.entries[0], video_url_base)
|
return getInfoFromAtom(fd.entries[0], video_url_base)
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ def getInfoFromAtom(entry, video_url_base=None):
|
||||||
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
||||||
query = quote(query)
|
query = quote(query)
|
||||||
url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
|
url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
fd = feedparser.parse(data)
|
fd = feedparser.parse(data)
|
||||||
videos = []
|
videos = []
|
||||||
for entry in fd.entries:
|
for entry in fd.entries:
|
||||||
|
@ -72,7 +72,7 @@ def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=No
|
||||||
'''
|
'''
|
||||||
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
|
||||||
url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
|
url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
|
||||||
data = getUrlUnicode(url)
|
data = readUrlUnicode(url)
|
||||||
regx = re.compile(' <a href="/watch.v=(.*?)" title="(.*?)" ')
|
regx = re.compile(' <a href="/watch.v=(.*?)" title="(.*?)" ')
|
||||||
regx = re.compile('<a href="/watch\?v=(\w*?)" ><img src="(.*?)" class="vimg120" title="(.*?)" alt="video">')
|
regx = re.compile('<a href="/watch\?v=(\w*?)" ><img src="(.*?)" class="vimg120" title="(.*?)" alt="video">')
|
||||||
id_title = regx.findall(data)
|
id_title = regx.findall(data)
|
Loading…
Reference in a new issue