ox.web under_score api rewrite

This commit is contained in:
j 2012-08-15 17:15:40 +02:00
parent bb35daa95c
commit a4fd3c930f
29 changed files with 268 additions and 285 deletions

View file

@ -307,6 +307,8 @@ def parse_movie_path(path):
title = title.replace('_ ', ': ')
if title.endswith('_'):
title = title[:-1] + '.'
if title.startswith('_'):
title = '.' + title[1:]
year = find_re(title, '(\(\d{4}\))')
if not year:
@ -344,8 +346,9 @@ def parse_movie_path(path):
else:
season = None
episode = find_re(parts[-1], '\.Episode (\d+)\.')
episode = find_re(parts[-1], '\.Episode[s]* ([\d+]+)\.')
if episode:
episode = episode.split('+')[0]
episode = int(episode)
else:
episode = None

View file

@ -7,7 +7,7 @@ from utils import json, ET
def get_embed_code(url, maxwidth=None, maxheight=None):
embed = {}
header = cache.getHeaders(url)
header = cache.get_headers(url)
if header.get('content-type', '').startswith('text/html'):
html = cache.readUrl(url)
json_oembed = filter(lambda l: 'json+oembed' in l, re.compile('<link.*?>').findall(html))

View file

@ -7,68 +7,68 @@ from ox import strip_tags, find_re
from ox.cache import read_url
def getId(url):
def get_id(url):
return url.split("/")[-1]
def getData(id):
def get_data(id):
'''
>>> getData('129689')['cast'][1][1]
>>> get_data('129689')['cast'][1][1]
u'Marianne'
>>> getData('129689')['credits'][0][0]
>>> get_data('129689')['credits'][0][0]
u'Jean-Luc Godard'
>>> getData('129689')['posters'][0]
>>> get_data('129689')['posters'][0]
u'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg'
>>> getData('129689')['rating']
>>> get_data('129689')['rating']
u'4.5'
'''
if id.startswith('http'):
id = getId(id)
id = get_id(id)
data = {
"url": getUrl(id)
"url": get_url(id)
}
html = read_url(data["url"], unicode=True)
data['aka'] = parseList(html, 'AKA')
data['aka'] = parse_list(html, 'AKA')
data['category'] = find_re(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
data['countries'] = parseList(html, 'countries')
data['director'] = parseEntry(html, 'directed by')
data['genres'] = parseList(html, 'genres')
data['keywords'] = parseList(html, 'keywords')
data['countries'] = parse_list(html, 'countries')
data['director'] = parse_entry(html, 'directed by')
data['genres'] = parse_list(html, 'genres')
data['keywords'] = parse_list(html, 'keywords')
data['posters'] = [find_re(html, '<img src="(http://cps-.*?)"')]
data['produced'] = parseList(html, 'produced by')
data['produced'] = parse_list(html, 'produced by')
data['rating'] = find_re(html, 'Stars" title="(.*?) Stars"')
data['released'] = parseEntry(html, 'released by')
data['releasedate'] = parseList(html, 'release date')
data['runtime'] = parseEntry(html, 'run time').replace('min.', '').strip()
data['set'] = parseEntry(html, 'set in')
data['released'] = parse_entry(html, 'released by')
data['releasedate'] = parse_list(html, 'release date')
data['runtime'] = parse_entry(html, 'run time').replace('min.', '').strip()
data['set'] = parse_entry(html, 'set in')
data['synopsis'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
data['themes'] = parseList(html, 'themes')
data['types'] = parseList(html, 'types')
data['themes'] = parse_list(html, 'themes')
data['types'] = parse_list(html, 'types')
data['year'] = find_re(html, '<span class="year">.*?(\d+)')
#data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
#html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
#data['cast'] = parseTable(html)
#data['cast'] = parse_table(html)
#html = read_url("http://allmovie.com/work/%s/credits" % id, unicode=True)
#data['credits'] = parseTable(html)
#data['credits'] = parse_table(html)
html = read_url("http://allmovie.com/work/%s/review" % id, unicode=True)
data['review'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
return data
def getUrl(id):
def get_url(id):
return "http://allmovie.com/work/%s" % id
def parseEntry(html, title):
def parse_entry(html, title):
html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
return strip_tags(html).strip()
def parseList(html, title):
def parse_list(html, title):
html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
r = map(lambda x: strip_tags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
if not r and html:
r = [strip_tags(html)]
return r
def parseTable(html):
def parse_table(html):
return map(
lambda x: map(
lambda x: strip_tags(x).strip().replace('&nbsp;', ''),
@ -77,10 +77,10 @@ def parseTable(html):
find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
)
def parseText(html, title):
def parse_text(html, title):
return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
if __name__ == '__main__':
print getData('129689')
# print getData('177524')
print get_data('129689')
# print get_data('177524')

View file

@ -13,17 +13,17 @@ def findISBN(title, author):
data = read_url(url, unicode=True)
links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)
id = find_re(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
data = getData(id)
data = get_data(id)
if author in data['authors']:
return data
return {}
def getData(id):
def get_data(id):
url = "http://www.amazon.com/title/dp/%s/" % id
data = read_url(url, unicode=True)
def findData(key):
def find_data(key):
return find_re(data, '<li><b>%s:</b>(.*?)</li>'% key).strip()
r = {}
@ -34,15 +34,15 @@ def getData(id):
t = re.compile('>(.*?)</a> \(Translator\)').findall(data)
if t:
r['translator'] = t
r['publisher'] = findData('Publisher')
r['language'] = findData('Language')
r['isbn-10'] = findData('ISBN-10')
r['isbn-13'] = findData('ISBN-13').replace('-', '')
r['publisher'] = find_data('Publisher')
r['language'] = find_data('Language')
r['isbn-10'] = find_data('ISBN-10')
r['isbn-13'] = find_data('ISBN-13').replace('-', '')
r['dimensions'] = find_re(data, '<li><b>.*?Product Dimensions:.*?</b>(.*?)</li>')
r['pages'] = findData('Paperback')
r['pages'] = find_data('Paperback')
if not r['pages']:
r['pages'] = findData('Hardcover')
r['pages'] = find_data('Hardcover')
r['review'] = strip_tags(find_re(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()

View file

@ -14,7 +14,7 @@ HEADERS = {
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
def getMovieData(title, director):
def get_movie_data(title, director):
if isinstance(title, unicode):
title = title.encode('utf-8')
if isinstance(director, unicode):
@ -60,8 +60,8 @@ def getMovieData(title, director):
return data
if __name__ == '__main__':
print getMovieData('Alphaville', 'Jean-Luc Godard')
print getMovieData('Sin City', 'Roberto Rodriguez')
print getMovieData('Breathless', 'Jean-Luc Godard')
print getMovieData('Capitalism: A Love Story', 'Michael Moore')
print getMovieData('Film Socialisme', 'Jean-Luc Godard')
print get_movie_data('Alphaville', 'Jean-Luc Godard')
print get_movie_data('Sin City', 'Roberto Rodriguez')
print get_movie_data('Breathless', 'Jean-Luc Godard')
print get_movie_data('Capitalism: A Love Story', 'Michael Moore')
print get_movie_data('Film Socialisme', 'Jean-Luc Godard')

View file

@ -3,15 +3,15 @@
from .. import cache
from ..utils import json
def getId(url):
def get_id(url):
return url.split("/")[-1]
def getUrl(id):
def get_url(id):
return "http://www.archive.org/details/%s" % id
def getData(id):
def get_data(id):
data = {}
url = getUrl(id)
url = get_url(id)
details = cache.read_url('%s?output=json' % url)
details = json.loads(details)
for key in ('title', 'description', 'runtime'):

View file

@ -9,25 +9,25 @@ from ox.text import find_re, remove_special_characters
import imdb
def getId(url):
def get_id(url):
return url.split("/")[-1]
def getUrl(id):
def get_url(id):
return "http://www.criterion.com/films/%s" % id
def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
'''
>>> getData('1333')['imdbId']
>>> get_data('1333')['imdbId']
u'0060304'
>>> getData('236')['posters'][0]
>>> get_data('236')['posters'][0]
u'http://criterion_production.s3.amazonaws.com/release_images/1586/ThirdManReplace.jpg'
>>> getData('786')['posters'][0]
>>> get_data('786')['posters'][0]
u'http://criterion_production.s3.amazonaws.com/product_images/185/343_box_348x490.jpg'
'''
data = {
"url": getUrl(id)
"url": get_url(id)
}
try:
html = read_url(data["url"], timeout=timeout, unicode=True)
@ -71,21 +71,21 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
if timeout == ox.cache.cache_timeout:
timeout = -1
if get_imdb:
data['imdbId'] = imdb.getMovieId(data['title'],
data['imdbId'] = imdb.get_movie_id(data['title'],
data['director'], data['year'], timeout=timeout)
return data
def getIds():
def get_ids():
ids = []
html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
results = re.compile("\&amp;p=(\d+)\&").findall(html)
pages = max(map(int, results))
for page in range(1, pages):
for id in getIdsByPage(page):
for id in get_idsByPage(page):
ids.append(id)
return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
def getIdsByPage(page):
def get_idsByPage(page):
ids = []
url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
html = read_url(url, unicode=True)
@ -101,4 +101,4 @@ def getIdsByPage(page):
return set(ids)
if __name__ == '__main__':
print getIds()
print get_ids()

View file

@ -5,7 +5,7 @@ from urllib import unquote
from ox.cache import read_url
def getVideoUrl(url):
def get_video_url(url):
'''
>>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'

View file

@ -9,7 +9,7 @@ from ox.cache import read_url
import google
def getShowUrl(title):
def get_show_url(title):
'''
Search Epguide Url for Show via Show Title.
Use Google to search the url, this is also done on Epguide.
@ -20,7 +20,7 @@ def getShowUrl(title):
return url
return None
def getShowData(url):
def get_show_data(url):
data = read_url(url, unicode=True)
r = {}
r['title'] = strip_tags(find_re(data, '<h1>(.*?)</h1>'))

View file

@ -9,28 +9,28 @@ from ox import find_re, strip_tags
from ox.web.imdb import ImdbCombined
def getData(id, timeout=-1):
def get_data(id, timeout=-1):
'''
>>> getData('the-matrix')['poster']
>>> get_data('the-matrix')['poster']
'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
>>> getData('0133093')['poster']
>>> get_data('0133093')['poster']
'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
>>> getData('2-or-3-things-i-know-about-her')['poster']
>>> get_data('2-or-3-things-i-know-about-her')['poster']
'http://content6.flixster.com/movie/10/95/43/10954392_gal.jpg'
>>> getData('0078875')['rottentomatoes_id']
>>> get_data('0078875')['rottentomatoes_id']
'http://www.rottentomatoes.com/m/the-tin-drum/'
'''
if len(id) == 7:
try:
int(id)
id = getIdByImdb(id)
id = get_id(imdb=id)
except:
pass
data = {
"url": getUrl(id),
"url": get_url(id),
}
html = read_url(data['url'], timeout=timeout, timeout=True)
doc = document_fromstring(html)
@ -55,21 +55,20 @@ def getData(id, timeout=-1):
return None
return data
def getIdByImdb(imdbId):
def get_id(url=None, imdb=None):
'''
>>> getIdByImdb('0133093')
>>> get_id(imdb='0133093')
u'the-matrix'
#>>> getIdByImdb('0060304')
#>>> get_id(imdb='0060304')
#u'2-or-3-things-i-know-about-her'
'''
i = ImdbCombined(imdbId)
if imdb:
i = ImdbCombined(imdb)
title = i['title']
return title.replace(' ', '-').lower().replace("'", '')
def getId(url):
return url.split('/')[-1]
def getUrl(id):
def get_url(id):
return "http://www.flixster.com/movie/%s"%id

View file

@ -5,7 +5,7 @@ import json
from ox.cache import read_url
from ox import find_re
class Imdb(dict):
class Freebase(dict):
def __init__(self, id, timeout=-1):
url = "http://ids.freebaseapps.com/get_ids?id=/authority/imdb/title/tt%s" % id
'''

View file

@ -20,7 +20,7 @@ def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.
headers = headers.copy()
return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
def getUrl(id):
def get_url(id):
return "http://www.imdb.com/title/tt%s/" % id
class Imdb(SiteParser):
@ -420,7 +420,7 @@ class ImdbCombined(Imdb):
self.regex = _regex
super(ImdbCombined, self).__init__(id, timeout)
def getMovieIdByTitle(title, timeout=-1):
def get_movie_by_title(title, timeout=-1):
'''
This only works for exact title matches from the data dump
Usually in the format
@ -431,22 +431,22 @@ def getMovieIdByTitle(title, timeout=-1):
If there is more than one film with that title for the year
Title (Year/I)
>>> getMovieIdByTitle(u'"Father Knows Best" (1954) {(#5.34)}')
>>> get_movie_by_title(u'"Father Knows Best" (1954) {(#5.34)}')
u'1602860'
>>> getMovieIdByTitle(u'The Matrix (1999)')
>>> get_movie_by_title(u'The Matrix (1999)')
u'0133093'
>>> getMovieIdByTitle(u'Little Egypt (1951)')
>>> get_movie_by_title(u'Little Egypt (1951)')
u'0043748'
>>> getMovieIdByTitle(u'Little Egypt (1897/I)')
>>> get_movie_by_title(u'Little Egypt (1897/I)')
u'0214882'
>>> getMovieIdByTitle(u'Little Egypt')
>>> get_movie_by_title(u'Little Egypt')
None
>>> getMovieIdByTitle(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
>>> get_movie_by_title(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
u'0866567'
'''
params = {'s':'tt','q': title}
@ -465,21 +465,21 @@ def getMovieIdByTitle(title, timeout=-1):
return results[0]
return None
def getMovieId(title, director='', year='', timeout=-1):
def get_movie_id(title, director='', year='', timeout=-1):
'''
>>> getMovieId('The Matrix')
>>> get_movie_id('The Matrix')
u'0133093'
>>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
>>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
u'0060304'
>>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
>>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
u'0060304'
>>> getMovieId(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
>>> get_movie_id(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
u'0179214'
>>> getMovieId(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
>>> get_movie_id(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
u'0179214'
'''
imdbId = {
@ -555,12 +555,12 @@ def getMovieId(title, director='', year='', timeout=-1):
#or nothing
return ''
def getMoviePoster(imdbId):
def get_movie_poster(imdbId):
'''
>>> getMoviePoster('0133093')
>>> get_movie_poster('0133093')
'http://ia.media-imdb.com/images/M/MV5BMjEzNjg1NTg2NV5BMl5BanBnXkFtZTYwNjY3MzQ5._V1._SX338_SY475_.jpg'
>>> getMoviePoster('0994352')
>>> get_movie_poster('0994352')
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
'''
info = ImdbCombined(imdbId)
@ -570,10 +570,10 @@ def getMoviePoster(imdbId):
poster = find_re(data, 'img id="primary-img".*?src="(.*?)"')
return poster
elif 'series' in info:
return getMoviePoster(info['series'])
return get_movie_poster(info['series'])
return ''
def maxVotes():
def max_votes():
url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
data = ox.cache.read_url(url)
votes = max([int(v.replace(',', ''))
@ -581,7 +581,7 @@ def maxVotes():
return votes
def guess(title, director='', timeout=-1):
return getMovieId(title, director, timeout=timeout)
return get_movie_id(title, director, timeout=timeout)
if __name__ == "__main__":
import json

View file

@ -7,19 +7,19 @@ from ox.html import strip_tags
from ox.text import find_re
def getData(id):
def get_data(id):
'''
>>> getData('1991/silence_of_the_lambs')['imdbId']
>>> get_data('1991/silence_of_the_lambs')['imdbId']
u'0102926'
>>> getData('1991/silence_of_the_lambs')['posters'][0]
>>> get_data('1991/silence_of_the_lambs')['posters'][0]
u'http://www.impawards.com/1991/posters/silence_of_the_lambs_ver1.jpg'
>>> getData('1991/silence_of_the_lambs')['url']
>>> get_data('1991/silence_of_the_lambs')['url']
u'http://www.impawards.com/1991/silence_of_the_lambs_ver1.html'
'''
data = {
'url': getUrl(id)
'url': get_url(id)
}
html = read_url(data['url'], unicode=True)
data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
@ -48,7 +48,7 @@ def getData(id):
return data
def getId(url):
def get_id(url):
split = url.split('/')
year = split[3]
split = split[4][:-5].split('_')
@ -59,26 +59,25 @@ def getId(url):
id = '%s/%s' % (year, '_'.join(split))
return id
def getIds():
ids = []
html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
pages = int(find_re(html, '<a href= page(.*?).html>')) + 1
for page in range(pages, 0, -1):
for id in getIdsByPage(page):
if not id in ids:
ids.append(id)
return ids
def getIdsByPage(page):
def get_ids(page=None):
ids = []
if page:
html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout = -1, unicode=True)
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
for result in results:
url = 'http://impawards.com/%s' % result
ids.append(getId(url))
ids.append(get_id(url))
return set(ids)
#get all
html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
pages = int(find_re(html, '<a href= page(.*?).html>')) + 1
for page in range(pages, 0, -1):
for id in get_ids(page):
if not id in ids:
ids.append(id)
return ids
def getUrl(id):
def get_url(id):
url = u"http://www.impawards.com/%s.html" % id
html = read_url(url, unicode=True)
if find_re(html, "No Movie Posters on This Page"):
@ -297,5 +296,5 @@ _id_map = {
}
if __name__ == '__main__':
ids = getIds()
ids = get_ids()
print sorted(ids), len(ids)

View file

@ -24,7 +24,7 @@ ITUNES_HEADERS = {
'Connection': 'close',
}
def composeUrl(request, parameters):
def compose_url(request, parameters):
if request == 'advancedSearch':
url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
if parameters['media'] == 'music':
@ -60,7 +60,7 @@ def composeUrl(request, parameters):
url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
return url
def parseXmlDict(xml):
def parse_xml_dict(xml):
values = {}
strings = xml.split('<key>')
for string in strings:
@ -78,7 +78,7 @@ def parseXmlDict(xml):
values[key] = value
return values
def parseCast(xml, title):
def parse_cast(xml, title):
list = []
try:
strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
@ -89,7 +89,7 @@ def parseCast(xml, title):
except:
return list
def parseMovies(xml, title):
def parse_movies(xml, title):
list = []
try:
strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
@ -109,17 +109,17 @@ class ItunesAlbum:
self.title = title
self.artist = artist
if not id:
self.id = self.getId()
self.id = self.get_id()
def getId(self):
url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
def get_id(self):
url = compose_url('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
xml = read_url(url, headers = ITUNES_HEADERS)
id = find_re(xml, 'viewAlbum\?id=(.*?)&')
return id
def getData(self):
def get_data(self):
data = {'id': self.id}
url = composeUrl('viewAlbum', {'id': self.id})
url = compose_url('viewAlbum', {'id': self.id})
xml = read_url(url, None, ITUNES_HEADERS)
data['albumName'] = find_re(xml, '<B>(.*?)</B>')
data['artistName'] = find_re(xml, '<b>(.*?)</b>')
@ -130,7 +130,7 @@ class ItunesAlbum:
data['tracks'] = []
strings = find_re(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
for string in strings:
data['tracks'].append(parseXmlDict(string))
data['tracks'].append(parse_xml_dict(string))
data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')
return data
@ -140,48 +140,48 @@ class ItunesMovie:
self.title = title
self.director = director
if not id:
self.id = self.getId()
self.id = self.get_id()
def getId(self):
url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
def get_id(self):
url = compose_url('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
xml = read_url(url, headers = ITUNES_HEADERS)
id = find_re(xml, 'viewMovie\?id=(.*?)&')
return id
def getData(self):
def get_data(self):
data = {'id': self.id}
url = composeUrl('viewMovie', {'id': self.id})
url = compose_url('viewMovie', {'id': self.id})
xml = read_url(url, None, ITUNES_HEADERS)
f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
f.write(xml)
f.close()
data['actors'] = parseCast(xml, 'actors')
data['actors'] = parse_cast(xml, 'actors')
string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')
data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
data['directors'] = parseCast(xml, 'directors')
data['directors'] = parse_cast(xml, 'directors')
data['format'] = find_re(xml, 'Format:(.*?)<')
data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))
data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
data['producers'] = parseCast(xml, 'producers')
data['producers'] = parse_cast(xml, 'producers')
data['rated'] = find_re(xml, 'Rated(.*?)<')
data['relatedMovies'] = parseMovies(xml, 'related movies')
data['relatedMovies'] = parse_movies(xml, 'related movies')
data['releaseDate'] = find_re(xml, 'Released(.*?)<')
data['runTime'] = find_re(xml, 'Run Time:(.*?)<')
data['screenwriters'] = parseCast(xml, 'screenwriters')
data['screenwriters'] = parse_cast(xml, 'screenwriters')
data['soundtrackId'] = find_re(xml, 'viewAlbum\?id=(.*?)&')
data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')
return data
if __name__ == '__main__':
from ox.utils import json
data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
print json.dumps(data, sort_keys = True, indent = 4)
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
print json.dumps(data, sort_keys = True, indent = 4)
for v in data['relatedMovies']:
data = ItunesMovie(id = v['id']).getData()
data = ItunesMovie(id = v['id']).get_data()
print json.dumps(data, sort_keys = True, indent = 4)
data = ItunesMovie(id='272960052').getData()
data = ItunesMovie(id='272960052').get_data()
print json.dumps(data, sort_keys = True, indent = 4)

View file

@ -5,7 +5,7 @@ from ox.html import decode_html
from ox.text import find_re
def getLyrics(title, artist):
def get_lyrics(title, artist):
html = read_url('http://lyricsfly.com/api/')
key = find_re(html, '<font color=green><b>(.*?)</b></font>')
url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)

View file

@ -7,25 +7,24 @@ from lxml.html import document_fromstring
from ox.cache import read_url
from ox import find_re, strip_tags
def getUrl(id):
return 'http://www.metacritic.com/movie/%s' % id
def getId(url):
return url.split('/')[-1]
def getUrlByImdb(imdb):
def get_url(id=None, imdb=None):
if imdb:
url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
data = read_url(url)
metacritic_url = find_re(data, '"(http://www.metacritic.com/movie/.*?)"')
return metacritic_url or None
return 'http://www.metacritic.com/movie/%s' % id
def getMetacriticShowUrl(title):
def get_id(url):
return url.split('/')[-1]
def get_show_url(title):
title = quote(title)
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
data = read_url(url)
return find_re(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
def getData(url):
def get_data(url):
data = read_url(url, unicode=True)
doc = document_fromstring(data)
score = filter(lambda s: s.attrib.get('property') == 'v:average',
@ -57,7 +56,7 @@ def getData(url):
return {
'critics': metacritics,
'id': getId(url),
'id': get_id(url),
'score': score,
'url': url,
}

View file

@ -13,7 +13,7 @@ import ox
from torrent import Torrent
def _parseResultsPage(data, max_results=10):
def _parse_results_page(data, max_results=10):
results=[]
regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data):
@ -27,22 +27,17 @@ def _parseResultsPage(data, max_results=10):
results.append((torrentTitle, torrentLink, ''))
return results
def findMovie(query, max_results=10):
def find_movie(query=None, imdb=None, max_results=10):
'''search for torrents on mininova
'''
if imdb:
url = "http://www.mininova.org/imdb/?imdb=%s" % normalize_imdbid(imdb)
else:
url = "http://www.mininova.org/search/%s/seeds" % quote(query)
data = read_url(url, unicode=True)
return _parseResultsPage(data, max_results)
return _parse_results_page(data, max_results)
def findMovieByImdb(imdbId):
'''find torrents on mininova for a given imdb id
'''
results = []
imdbId = normalize_imdbid(imdbId)
data = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdbId, unicode=True)
return _parseResultsPage(data)
def getId(mininovaId):
def get_id(mininovaId):
mininovaId = unicode(mininovaId)
d = find_re(mininovaId, "/(\d+)")
if d:
@ -54,7 +49,7 @@ def getId(mininovaId):
return mininovaId[-1]
def exists(mininovaId):
mininovaId = getId(mininovaId)
mininovaId = get_id(mininovaId)
data = ox.net.read_url("http://www.mininova.org/tor/%s" % mininovaId)
if not data or 'Torrent not found...' in data:
return False
@ -62,11 +57,11 @@ def exists(mininovaId):
return False
return True
def getData(mininovaId):
def get_data(mininovaId):
_key_map = {
'by': u'uploader',
}
mininovaId = getId(mininovaId)
mininovaId = get_id(mininovaId)
torrent = dict()
torrent[u'id'] = mininovaId
torrent[u'domain'] = 'mininova.org'
@ -101,7 +96,7 @@ class Mininova(Torrent):
'72dfa59d2338e4a48c78cec9de25964cddb64104'
'''
def __init__(self, mininovaId):
self.data = getData(mininovaId)
self.data = get_data(mininovaId)
if not self.data:
return
Torrent.__init__(self)

View file

@ -6,39 +6,39 @@ import re
from ox.cache import read_url
from ox import find_re
def getData(id):
def get_data(id):
'''
>>> getData('0060304')['posters'][0]
>>> get_data('0060304')['posters'][0]
u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
>>> getData('0123456')['posters']
>>> get_data('0123456')['posters']
[]
'''
data = {
"url": getUrl(id)
"url": get_url(id)
}
data["posters"] = getPostersByUrl(data["url"])
data["posters"] = get_posters(data["url"])
return data
def getId(url):
def get_id(url):
return url.split("/")[-2]
def getPostersByUrl(url, group=True, timeout=-1):
def get_posters(url, group=True, timeout=-1):
posters = []
html = read_url(url, timeout=timeout, unicode=True)
if url in html:
if group:
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
for result in results:
posters += getPostersByUrl(result, False)
posters += get_posters(result, False)
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
for result in results:
html = read_url(result, timeout=timeout, unicode=True)
posters.append(find_re(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
return posters
def getUrl(id):
def get_url(id):
return "http://www.movieposterdb.com/movie/%s/" % id
if __name__ == '__main__':
print getData('0060304')
print getData('0133093')
print get_data('0060304')
print get_data('0133093')

View file

@ -7,7 +7,7 @@ from ox.cache import read_url
from ox import find_re, strip_tags
from ox import langCode2To3, langTo3Code
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
def find_subtitles(imdb, parts = 1, language = "eng"):
if len(language) == 2:
language = langCode2To3(language)
elif len(language) != 3:
@ -29,7 +29,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')
return opensubtitleId
def downloadSubtitleById(opensubtitle_id):
def download_subtitle(opensubtitle_id):
srts = {}
data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import ox.cache
def getPosterUrl(id):
def get_poster_url(id):
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
if ox.cache.exists(url):
return url

View file

@ -3,7 +3,7 @@
import re
from ox.net import read_url
def getPosterUrl(id):
def get_poster_url(id):
url = 'http://piratecinema.org/posters/'
html = read_url(url, unicode=True)
results = re.compile('src="(.+)" title=".+\((\d{7})\)"').findall(html)
@ -13,5 +13,5 @@ def getPosterUrl(id):
return ''
if __name__ == '__main__':
print getPosterUrl('0749451')
print get_poster_url('0749451')

View file

@ -2,17 +2,18 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from ox.cache import getHeaders, read_url
from ox.cache import read_url
from ox import find_re, strip_tags
def getUrlByImdb(imdb):
def get_url(id=None, imdb=None):
#this would also wor but does not cache:
'''
from urllib2 import urlopen
u = urlopen(url)
return u.url
'''
if imdb:
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
data = read_url(url)
if "movie_title" in data:
@ -24,7 +25,7 @@ def getUrlByImdb(imdb):
def get_og(data, key):
return find_re(data, '<meta property="og:%s".*?content="(.*?)"' % key)
def getData(url):
def get_data(url):
data = read_url(url)
r = {}
r['title'] = find_re(data, '<h1 class="movie_title">(.*?)</h1>')

View file

@ -27,7 +27,7 @@ class SiteParser(dict):
baseUrl = ''
regex = {}
def getUrl(self, page):
def get_url(self, page):
return "%s%s" % (self.baseUrl, page)
def read_url(self, url, timeout):
@ -35,7 +35,7 @@ class SiteParser(dict):
def __init__(self, timeout=-1):
for key in self.regex:
url = self.getUrl(self.regex[key]['page'])
url = self.get_url(self.regex[key]['page'])
data = self.read_url(url, timeout)
if isinstance(self.regex[key]['re'], basestring):
data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)

View file

@ -9,7 +9,7 @@ from ox.html import decode_html, strip_tags
import ox.net
def getNews(year, month, day):
def get_news(year, month, day):
sections = [
'politik', 'wirtschaft', 'panorama', 'sport', 'kultur', 'netzwelt',
'wissenschaft', 'unispiegel', 'schulspiegel', 'reise', 'auto'
@ -27,7 +27,7 @@ def getNews(year, month, day):
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
dateString = strip_tags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
try:
description = formatString(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
description = format_string(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
except:
description = ''
try:
@ -35,7 +35,7 @@ def getNews(year, month, day):
except:
imageUrl = ''
try:
title = formatString(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
title = format_string(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
except:
title = ''
if dateString[:10] == date and description and imageUrl and title.find(': ') != -1:
@ -45,12 +45,12 @@ def getNews(year, month, day):
else:
new['date'] = '%s-%s-%s %s:%s' % (dateString[6:10], dateString[3:5], dateString[:2], dateString[12:14], dateString[15:17])
# fix decode_html
# new['description'] = formatString(decode_html(description))
new['description'] = formatString(description)
# new['description'] = format_string(decode_html(description))
new['description'] = format_string(description)
new['imageUrl'] = imageUrl
new['section'] = formatSection(section)
new['title'] = formatString(title)
new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(formatString(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
new['section'] = format_section(section)
new['title'] = format_string(title)
new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(format_string(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
if new['title1'][-1:] == ':':
new['title1'] = new['title1'][0:-1]
new['title2'] = new['title'][len(new['title1']) + 2:]
@ -67,21 +67,21 @@ def getNews(year, month, day):
'''
return news
def splitTitle(title):
def split_title(title):
title1 = re.compile('(.*?): ').findall(title)[0]
title2 = re.compile(': (.*?)$').findall(title)[0]
return [title1, title2]
def formatString(string):
def format_string(string):
string = string.replace('<span class="spOptiBreak"> </span>', '')
string = string.replace('\n', ' ').replace(' ', ' ').strip()
string = string.replace('&amp;', '&').replace('&apos;', '\'').replace('&quot;', '"')
return string
def formatSection(string):
def format_section(string):
return string[:1].upper() + string[1:].replace('spiegel', 'SPIEGEL')
def formatSubsection(string):
def format_subsection(string):
# SPIEGEL, SPIEGEL special
subsection = {
'abi': 'Abi - und dann?',
@ -98,7 +98,7 @@ def formatSubsection(string):
return subsection[string].replace(u'\xc3', 'ae')
return string[:1].upper() + string[1:]
def getIssue(year, week):
def get_issue(year, week):
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
if not ox.net.exists(coverUrl):
return None
@ -122,7 +122,7 @@ def getIssue(year, week):
return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
def archiveIssues():
def archive_issues():
'''
this is just an example of an archiving application
'''
@ -140,8 +140,8 @@ def archiveIssues():
else:
wMax = 53
for w in range(wMax, 0, -1):
print 'getIssue(%d, %d)' % (y, w)
issue = getIssue(y, w)
print 'get_issue(%d, %d)' % (y, w)
issue = get_issue(y, w)
if issue:
dirname = '%s/%d/%02d' % (archivePath, y, w)
if not os.path.exists(dirname):
@ -188,7 +188,7 @@ def archiveIssues():
print p['min'], p['sum'] / p['num'], p['max']
def archiveNews():
def archive_news():
'''
this is just an example of an archiving application
'''
@ -235,7 +235,7 @@ def archiveNews():
f.close()
filename = filename[:-5] + '.txt'
if not os.path.exists(filename) or True:
data = splitTitle(new['title'])
data = split_title(new['title'])
data.append(new['description'])
data = '\n'.join(data)
f = open(filename, 'w')
@ -256,19 +256,14 @@ def archiveNews():
count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
else:
count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
strings = splitTitle(new['title'])
strings = split_title(new['title'])
if strings[0] != new['title1'] or strings[1] != new['title2']:
colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
for key in sortDictByKey(count):
for key in sorted(count):
print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string'])
for value in colon:
print value
def sortDictByKey(d):
keys = d.keys()
keys.sort()
return keys
if __name__ == '__main__':
# spiegel = Spiegel(2008, 8)
# print spiegel.getContents()
@ -281,12 +276,12 @@ if __name__ == '__main__':
news = getNews(2008, 2, d)
for new in news:
strings = new['url'].split('/')
string = formatSection(strings[3])
string = format_section(strings[3])
if len(strings) == 6:
string += '/' + formatSubsection(strings[4])
string += '/' + format_subsection(strings[4])
if not string in x:
x.append(string)
print x
'''
# archiveIssues()
archiveNews()
# archive_issues()
archive_news()

View file

@ -22,7 +22,9 @@ def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_
headers['Cookie'] = 'language=en_EN'
return cache.read_url(url, data, headers, timeout, unicode=unicode)
def findMovies(query, max_results=10):
def find_movies(query=None, imdb=None, max_results=10):
if imdb:
query = "tt" + normalize_imdbid(imdb)
results = []
next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
page_count = 1
@ -47,10 +49,7 @@ def findMovies(query, max_results=10):
next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
return results
def findMovieByImdb(imdb):
return findMovies("tt" + normalize_imdbid(imdb))
def getId(piratebayId):
def get_id(piratebayId):
if piratebayId.startswith('http://torrents.thepiratebay.org/'):
piratebayId = piratebayId.split('org/')[1]
d = find_re(piratebayId, "tor/(\d+)")
@ -62,10 +61,10 @@ def getId(piratebayId):
return piratebayId
def exists(piratebayId):
piratebayId = getId(piratebayId)
piratebayId = get_id(piratebayId)
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
def getData(piratebayId):
def get_data(piratebayId):
_key_map = {
'spoken language(s)': u'language',
'texted language(s)': u'subtitle language',
@ -73,7 +72,7 @@ def getData(piratebayId):
'leechers': 'leecher',
'seeders': 'seeder',
}
piratebayId = getId(piratebayId)
piratebayId = get_id(piratebayId)
torrent = dict()
torrent[u'id'] = piratebayId
torrent[u'domain'] = 'thepiratebay.org'
@ -108,7 +107,7 @@ class Thepiratebay(Torrent):
'4e84415d36ed7b54066160c05a0b0f061898d12b'
'''
def __init__(self, piratebayId):
self.data = getData(piratebayId)
self.data = get_data(piratebayId)
if not self.data:
return
Torrent.__init__(self)

View file

@ -7,12 +7,12 @@ from ox import strip_tags, find_re
from ox.cache import read_url
def getEpisodeData(url):
def get_episode_data(url):
'''
prases informatin on tvcom episode pages
returns dict with title, show, description, score
example:
getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
get_episode_data('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
'''
data = read_url(url, unicode=True)
r = {}

View file

@ -8,7 +8,7 @@ from ox.cache import read_url
from ox import find_string, find_re
def getData(id):
def get_data(id):
url = 'http://www.vimeo.com/moogaloop/load/clip:%s' %id
xml = read_url(url)
tree = ET.parse(StringIO(xml))

View file

@ -8,52 +8,45 @@ from ox.cache import read_url
from ox import find_re, decode_html
def getId(url):
def get_id(url):
return url.split("/")[-1]
def getUrl(id):
def get_url(id=None, imdb=None, allmovie=None):
if imdb:
query = '"%s"'% imdbId
result = find(query)
if result:
url = result[0][1]
data = get_movie_data(url)
if 'imdb_id' in data:
return url
return ""
if allmovie:
query = '"amg_id = 1:%s"'% allmovie
result = find(query)
if result:
url = result[0][1]
return url
return ''
return "http://en.wikipedia.org/wiki/%s" % id
def getMovieId(title, director='', year=''):
def get_movie_id(title, director='', year=''):
query = '"%s" film %s %s' % (title, director, year)
result = find(query, 1)
if result:
return result[0][1]
return ''
def getUrlByImdbId(imdbId):
query = '"%s"'% imdbId
result = find(query)
if result:
url = result[0][1]
data = getMovieData(url)
if 'imdb_id' in data:
return url
return ""
def getUrlByImdb(imdbId):
# deprecated, use getUrlByImdbId()
return getUrlByImdbId(imdbId)
def getUrlByAllmovieId(allmovieId):
query = '"amg_id = 1:%s"'% allmovieId
result = find(query)
if result:
url = result[0][1]
return url
return ''
def getWikiData(wikipediaUrl):
url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
def get_wiki_data(wikipedia_url):
url = wikipedia_url.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
url = "%s&action=raw" % url
data = read_url(url).decode('utf-8')
return data
def getMovieData(wikipediaUrl):
if not wikipediaUrl.startswith('http'):
wikipediaUrl = getUrl(wikipediaUrl)
data = getWikiData(wikipediaUrl)
def get_movie_data(wikipedia_url):
if not wikipedia_url.startswith('http'):
wikipedia_url = get_url(wikipedia_url)
data = get_wiki_data(wikipedia_url)
filmbox_data = find_re(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
filmbox = {}
_box = filmbox_data.strip().split('|')
@ -104,7 +97,7 @@ def getMovieData(wikipediaUrl):
filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
return filmbox
def getImageUrl(name):
def get_image_url(name):
url = 'http://en.wikipedia.org/wiki/Image:' + name.replace(' ', '%20')
data = read_url(url, unicode=True)
url = find_re(data, 'href="(http://upload.wikimedia.org/.*?)"')
@ -114,19 +107,19 @@ def getImageUrl(name):
url = 'http:' + url
return url
def getPosterUrl(wikipediaUrl):
if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
data = getMovieData(wikipediaUrl)
def get_poster_url(wikipedia_url):
if not wikipedia_url.startswith('http'): wikipedia_url = get_url(wikipedia_url)
data = get_movie_data(wikipedia_url)
if 'image' in data:
return getImageUrl(data['image'])
return get_image_url(data['image'])
return ''
def getMoviePoster(wikipediaUrl):
# deprecated, use getPosterUrl()
return getPosterUrl(wikipediaUrl)
def get_movie_poster(wikipedia_url):
# deprecated, use get_poster_url()
return get_poster_url(wikipedia_url)
def getAllmovieId(wikipediaUrl):
data = getMovieData(wikipediaUrl)
def get_allmovie_id(wikipedia_url):
data = get_movie_data(wikipedia_url)
return data.get('amg_id', '')
def find(query, max_results=10):

View file

@ -8,7 +8,7 @@ import feedparser
from ox.cache import read_url, cache_timeout
def getVideoUrl(youtubeId, format='mp4', timeout=cache_timeout):
def video_url(youtubeId, format='mp4', timeout=cache_timeout):
"""
youtubeId - if of video
format - video format, options: webm, 1080p, 720p, mp4, high