Compare commits
No commits in common. "e480b8dcbf23aba730717e30dff5c458d5007967" and "5378970187a980a1c5197b96b4fc67730b507e74" have entirely different histories.
e480b8dcbf
...
5378970187
7 changed files with 195 additions and 129 deletions
|
@ -5,8 +5,7 @@ from __future__ import print_function
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import random
|
||||||
import hashlib
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from six import PY2
|
from six import PY2
|
||||||
|
@ -21,7 +20,8 @@ _fmt = '%%0%dd' % _width
|
||||||
|
|
||||||
def _make_boundary():
|
def _make_boundary():
|
||||||
# Craft a random boundary.
|
# Craft a random boundary.
|
||||||
boundary = ('=' * 15) + hashlib.sha1(os.urandom(32)).hexdigest() + '=='
|
token = random.randrange(sys.maxsize)
|
||||||
|
boundary = ('=' * 15) + (_fmt % token) + '=='
|
||||||
return boundary
|
return boundary
|
||||||
|
|
||||||
class MultiPartForm(object):
|
class MultiPartForm(object):
|
||||||
|
|
309
ox/web/imdb.py
309
ox/web/imdb.py
|
@ -27,52 +27,6 @@ def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_
|
||||||
def get_url(id):
|
def get_url(id):
|
||||||
return "http://www.imdb.com/title/tt%s/" % id
|
return "http://www.imdb.com/title/tt%s/" % id
|
||||||
|
|
||||||
|
|
||||||
def reference_section(id):
|
|
||||||
return {
|
|
||||||
'page': 'reference',
|
|
||||||
're': [
|
|
||||||
'<h4 name="{id}" id="{id}".*?<table(.*?)</table>'.format(id=id),
|
|
||||||
'<a href="/name/.*?>(.*?)</a>'
|
|
||||||
],
|
|
||||||
'type': 'list'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def zebra_list(label, more=None):
|
|
||||||
conditions = {
|
|
||||||
'page': 'reference',
|
|
||||||
're': [
|
|
||||||
label + '</td>.*?<ul(.*?)</ul>',
|
|
||||||
'<li.*?>(.*?)</li>'
|
|
||||||
],
|
|
||||||
'type': 'list',
|
|
||||||
}
|
|
||||||
if more:
|
|
||||||
conditions['re'] += more
|
|
||||||
return conditions
|
|
||||||
|
|
||||||
def zebra_table(label, more=None, type='string'):
|
|
||||||
conditions = {
|
|
||||||
'page': 'reference',
|
|
||||||
're': [
|
|
||||||
'_label">' + label + '</td>.*?<td>(.*?)</td>',
|
|
||||||
],
|
|
||||||
'type': type,
|
|
||||||
}
|
|
||||||
if more:
|
|
||||||
conditions['re'] += more
|
|
||||||
return conditions
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
'posterIds': {
|
|
||||||
'page': 'posters',
|
|
||||||
're': '/unknown-thumbnail/media/rm(.*?)/tt',
|
|
||||||
'type': 'list'
|
|
||||||
},
|
|
||||||
'''
|
|
||||||
|
|
||||||
class Imdb(SiteParser):
|
class Imdb(SiteParser):
|
||||||
'''
|
'''
|
||||||
>>> Imdb('0068646')['title'] == text_type(u'The Godfather')
|
>>> Imdb('0068646')['title'] == text_type(u'The Godfather')
|
||||||
|
@ -91,29 +45,49 @@ class Imdb(SiteParser):
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'aspectratio': {
|
'aspectratio': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': 'Aspect Ratio</td>.*?ipl-inline-list__item">\s+([\d\.]+)',
|
're': 'Aspect Ratio:</h5><div class="info-content">([\d\.]+)',
|
||||||
'type': 'float',
|
'type': 'float',
|
||||||
},
|
},
|
||||||
'budget': zebra_table('Budget', more=[
|
'budget': {
|
||||||
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
'page': 'business',
|
||||||
], type='int'),
|
|
||||||
'cast': {
|
|
||||||
'page': 'reference',
|
|
||||||
're': [
|
're': [
|
||||||
' <table class="cast_list">(.*?)</table>',
|
'<h5>Budget</h5>\s*?\$(.*?)<br',
|
||||||
'<td.*?itemprop="actor".*?>.*?>(.*?)</a>.*?<td class="character">(.*?)</td>',
|
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
||||||
|
],
|
||||||
|
'type': 'int'
|
||||||
|
},
|
||||||
|
'cast': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
|
||||||
lambda ll: [strip_tags(l) for l in ll]
|
lambda ll: [strip_tags(l) for l in ll]
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'cinematographer': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
lambda data: data.split('Series Crew')[0],
|
||||||
|
'Cinematography by</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?/">(.*?)</a>'
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'cinematographer': reference_section('cinematographers'),
|
|
||||||
'connections': {
|
'connections': {
|
||||||
'page': 'movieconnections',
|
'page': 'movieconnections',
|
||||||
're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n <a|<script)',
|
're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n <a|<script)',
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'country': zebra_list('Country', more=['<a.*?>(.*?)</a>']),
|
'country': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<div class="info"><h5>Country:</h5>.*?<div class="info">',
|
||||||
|
#'<a href="/country/.*?">(.*?)</a>', #links changed to work with existing caches, just take all links
|
||||||
|
'<a.*?>(.*?)</a>',
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
'creator': {
|
'creator': {
|
||||||
'page': '',
|
'page': '',
|
||||||
're': [
|
're': [
|
||||||
|
@ -123,12 +97,44 @@ class Imdb(SiteParser):
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'director': reference_section('directors'),
|
'director': {
|
||||||
'editor': reference_section('editors'),
|
'page': 'combined',
|
||||||
'composer': reference_section('composers'),
|
're': [
|
||||||
|
lambda data: data.split('<b>Series Crew</b>')[0],
|
||||||
|
'Directed by</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'_director': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<h5>Director:</h5>.*?<div class="info-content">(.*?)</div>',
|
||||||
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'editor': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
lambda data: data.split('Series Crew')[0],
|
||||||
|
'Film Editing by</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'composer': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
lambda data: data.split('Series Crew')[0],
|
||||||
|
'Original Music by</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
'episodeTitle': {
|
'episodeTitle': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': '<h3 itemprop="name">(.*?)<',
|
're': '<div id="tn15title">.*?<em>(.*?)</em>',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'filmingLocations': {
|
'filmingLocations': {
|
||||||
|
@ -139,44 +145,77 @@ class Imdb(SiteParser):
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'genre': zebra_list('Genres', more=['<a.*?>(.*?)</a>', lambda x: x[0]]),
|
'genre': {
|
||||||
'gross': zebra_table('Cumulative Worldwide Gross', more=[
|
'page': 'combined',
|
||||||
lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
|
're': [
|
||||||
], type='int'),
|
'<h5>Genre:</h5>(.*?)<hr',
|
||||||
|
'<a href="/Sections/Genres/.*?/">(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'gross': {
|
||||||
|
'page': 'business',
|
||||||
|
're': [
|
||||||
|
'<h5>Gross</h5>\s*?\$(.*?)<br',
|
||||||
|
lambda data: find_re(data.replace(',', ''), '\d+')
|
||||||
|
],
|
||||||
|
'type': 'int'
|
||||||
|
},
|
||||||
'keyword': {
|
'keyword': {
|
||||||
'page': 'keywords',
|
'page': 'keywords',
|
||||||
're': '<a href="/keyword/.*?>(.*?)</a>',
|
're': '<a href="/keyword/.*?>(.*?)</a>',
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'language': zebra_list('Language', more=['<a.*?>(.*?)</a>']),
|
'language': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
#'<h5>Language:</h5>.*?<div class="info">',
|
||||||
|
'<h5>Language:</h5>.*?</div>',
|
||||||
|
#'<a href="/language/.*?">(.*?)</a>', #links changed to work with existing caches, just take all links
|
||||||
|
'<a.*?>(.*?)</a>',
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
'originalTitle': {
|
'originalTitle': {
|
||||||
'page': 'releaseinfo',
|
'page': 'releaseinfo',
|
||||||
're': '<td>\(original title\)</td>\s*<td>(.*?)</td>',
|
're': '<td>\(original title\)</td>\s*<td>(.*?)</td>',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'summary': zebra_table('Plot Summary', more=[
|
'summary': {
|
||||||
'<p>(.*?)<em'
|
'page': 'plotsummary',
|
||||||
]),
|
're': '<p class="plotSummary">(.*?)<\/p>',
|
||||||
'posterId': {
|
|
||||||
'page': 'reference',
|
|
||||||
're': '<img.*?class="titlereference-primary-image".*?src="(.*?)".*?>',
|
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'producer': reference_section('producers'),
|
'posterId': {
|
||||||
'productionCompany': {
|
'page': 'combined',
|
||||||
'page': 'reference',
|
're': '<img.*?id="primary-poster".*?src="(.*?)".*?>',
|
||||||
|
'type': 'string'
|
||||||
|
},
|
||||||
|
'posterIds': {
|
||||||
|
'page': 'posters',
|
||||||
|
're': '/unknown-thumbnail/media/rm(.*?)/tt',
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'producer': {
|
||||||
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
'Production Companies.*?<ul(.*?)</ul>',
|
lambda data: data.split('Series Crew')[0],
|
||||||
|
'Produced by</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?/">(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'productionCompany': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'Production Companies</b><ul>(.*?)</ul>',
|
||||||
'<a href="/company/.*?/">(.*?)</a>'
|
'<a href="/company/.*?/">(.*?)</a>'
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
'rating': {
|
'rating': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': [
|
're': '<div class="starbar-meta">.*?<b>([\d,.]+?)/10</b>',
|
||||||
'<div class="ipl-rating-star ">(.*?)</div>',
|
|
||||||
'ipl-rating-star__rating">([\d,.]+?)</span>',
|
|
||||||
],
|
|
||||||
'type': 'float'
|
'type': 'float'
|
||||||
},
|
},
|
||||||
'releasedate': {
|
'releasedate': {
|
||||||
|
@ -187,43 +226,59 @@ class Imdb(SiteParser):
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
#FIXME using some /offsite/ redirect now
|
'reviews': {
|
||||||
#'reviews': {
|
'page': 'externalreviews',
|
||||||
# 'page': 'externalreviews',
|
|
||||||
# 're': [
|
|
||||||
# '<ul class="simpleList">(.*?)</ul>',
|
|
||||||
# '<li>.*?<a href="(http.*?)".*?>(.*?)</a>.*?</li>'
|
|
||||||
# ],
|
|
||||||
# 'type': 'list'
|
|
||||||
#},
|
|
||||||
'runtime': zebra_list('Runtime'),
|
|
||||||
'color': zebra_list('Color', more=['<a.*?>(.*?)</a>']),
|
|
||||||
'sound': zebra_list('Sound Mix', more=['<a.*?>(.*?)</a>', lambda x: x[0]]),
|
|
||||||
|
|
||||||
'season': {
|
|
||||||
'page': 'reference',
|
|
||||||
're': [
|
're': [
|
||||||
'<ul class="ipl-inline-list titlereference-overview-season-episode-numbers">(.*?)</ul>',
|
'<ol>(.*?)</ol>',
|
||||||
'Season (\d+)',
|
'<li><a href="(http.*?)".*?>(.*?)</a></li>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'runtime': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': '<h5>Runtime:</h5><div class="info-content">.*?([0-9]+ sec|[0-9]+ min).*?</div>',
|
||||||
|
'type': 'string'
|
||||||
|
},
|
||||||
|
'color': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<h5>Color:</h5><div class="info-content">(.*?)</div>',
|
||||||
|
'<a.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'sound': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<h5>Sound Mix:</h5><div class="info-content">(.*?)</div>',
|
||||||
|
'<a.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
|
'season': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<h5>Original Air Date:</h5>.*?<div class="info-content">(.*?)</div>',
|
||||||
|
'\(Season (\d+), Episode \d+\)',
|
||||||
],
|
],
|
||||||
'type': 'int'
|
'type': 'int'
|
||||||
},
|
},
|
||||||
'episode': {
|
'episode': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
'<ul class="ipl-inline-list titlereference-overview-season-episode-numbers">(.*?)</ul>',
|
'<h5>Original Air Date:</h5>.*?<div class="info-content">(.*?)</div>',
|
||||||
'Episode (\d+)',
|
'\(Season \d+, Episode (\d+)\)',
|
||||||
],
|
],
|
||||||
'type': 'int'
|
'type': 'int'
|
||||||
},
|
},
|
||||||
'series': {
|
'series': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': '<h4 itemprop="name">.*?<a href="/title/tt(\d{7})',
|
're': '<h5>TV Series:</h5>.*?<a href="/title/tt(\d{7})',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'isSeries': {
|
'isSeries': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': 'property=\'og:title\'.*?content=".*?(TV series|TV mini-series).*?"',
|
're': '<span class="tv-extra">(TV series|TV mini-series) ',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'title': {
|
'title': {
|
||||||
|
@ -240,17 +295,22 @@ class Imdb(SiteParser):
|
||||||
'type': 'list',
|
'type': 'list',
|
||||||
},
|
},
|
||||||
'votes': {
|
'votes': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': [
|
're': '<a href="ratings" class="tn15more">([\d,]*?) votes</a>',
|
||||||
'class="ipl-rating-star__total-votes">\((.*?)\)',
|
|
||||||
lambda r: r.replace(',', '')
|
|
||||||
],
|
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'writer': reference_section('writers'),
|
'writer': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
lambda data: data.split('Series Crew')[0],
|
||||||
|
'Writing credits</a>(.*?)</table>',
|
||||||
|
'<a href="/name/.*?/">(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
'year': {
|
'year': {
|
||||||
'page': 'reference',
|
'page': 'combined',
|
||||||
're': '=["\']og:title["\'] content="[^"]*?\((\d{4}).*?"',
|
're': '="og:title" content="[^"]*?\((\d{4}).*?"',
|
||||||
'type': 'int'
|
'type': 'int'
|
||||||
},
|
},
|
||||||
'credits': {
|
'credits': {
|
||||||
|
@ -275,7 +335,7 @@ class Imdb(SiteParser):
|
||||||
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
|
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
|
||||||
super(Imdb, self).__init__(timeout)
|
super(Imdb, self).__init__(timeout)
|
||||||
|
|
||||||
url = self.baseUrl + 'reference'
|
url = self.baseUrl + 'combined'
|
||||||
page = self.read_url(url, timeout=-1)
|
page = self.read_url(url, timeout=-1)
|
||||||
if '<title>IMDb: Page not found</title>' in page \
|
if '<title>IMDb: Page not found</title>' in page \
|
||||||
or 'The requested URL was not found on our server.' in page:
|
or 'The requested URL was not found on our server.' in page:
|
||||||
|
@ -293,6 +353,8 @@ class Imdb(SiteParser):
|
||||||
if 'country' in self:
|
if 'country' in self:
|
||||||
self['country'] = [normalize_country_name(c) or c for c in self['country']]
|
self['country'] = [normalize_country_name(c) or c for c in self['country']]
|
||||||
|
|
||||||
|
if 'sound' in self:
|
||||||
|
self['sound'] = list(set(self['sound']))
|
||||||
|
|
||||||
def cleanup_title(title):
|
def cleanup_title(title):
|
||||||
if title.startswith('"') and title.endswith('"'):
|
if title.startswith('"') and title.endswith('"'):
|
||||||
|
@ -327,8 +389,6 @@ class Imdb(SiteParser):
|
||||||
del self['alternativeTitles']
|
del self['alternativeTitles']
|
||||||
|
|
||||||
if 'runtime' in self and self['runtime']:
|
if 'runtime' in self and self['runtime']:
|
||||||
if isinstance(self['runtime'], list):
|
|
||||||
self['runtime'] = self['runtime'][0]
|
|
||||||
if 'min' in self['runtime']:
|
if 'min' in self['runtime']:
|
||||||
base = 60
|
base = 60
|
||||||
else:
|
else:
|
||||||
|
@ -336,9 +396,8 @@ class Imdb(SiteParser):
|
||||||
self['runtime'] = int(find_re(self['runtime'], '([0-9]+)')) * base
|
self['runtime'] = int(find_re(self['runtime'], '([0-9]+)')) * base
|
||||||
if 'runtime' in self and not self['runtime']:
|
if 'runtime' in self and not self['runtime']:
|
||||||
del self['runtime']
|
del self['runtime']
|
||||||
|
if 'votes' in self:
|
||||||
if 'sound' in self:
|
self['votes'] = self['votes'].replace(',', '')
|
||||||
self['sound'] = list(sorted(set(self['sound'])))
|
|
||||||
|
|
||||||
if 'cast' in self:
|
if 'cast' in self:
|
||||||
if isinstance(self['cast'][0], string_types):
|
if isinstance(self['cast'][0], string_types):
|
||||||
|
@ -346,7 +405,6 @@ class Imdb(SiteParser):
|
||||||
self['actor'] = [c[0] for c in self['cast']]
|
self['actor'] = [c[0] for c in self['cast']]
|
||||||
def cleanup_character(c):
|
def cleanup_character(c):
|
||||||
c = c.replace('(uncredited)', '').strip()
|
c = c.replace('(uncredited)', '').strip()
|
||||||
c = re.sub('\s+', ' ', c)
|
|
||||||
return c
|
return c
|
||||||
self['cast'] = [{'actor': x[0], 'character': cleanup_character(x[1])}
|
self['cast'] = [{'actor': x[0], 'character': cleanup_character(x[1])}
|
||||||
for x in self['cast']]
|
for x in self['cast']]
|
||||||
|
@ -370,11 +428,18 @@ class Imdb(SiteParser):
|
||||||
return r
|
return r
|
||||||
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
|
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
|
||||||
|
|
||||||
|
|
||||||
self['connections'] = cc
|
self['connections'] = cc
|
||||||
|
|
||||||
for key in ('country', 'genre'):
|
for key in ('country', 'genre'):
|
||||||
if key in self:
|
if key in self:
|
||||||
self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
|
self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
|
||||||
|
#0092999
|
||||||
|
if '_director' in self:
|
||||||
|
if 'series' in self or 'isSeries' in self:
|
||||||
|
self['creator'] = self.pop('_director')
|
||||||
|
else:
|
||||||
|
del self['_director']
|
||||||
if 'isSeries' in self:
|
if 'isSeries' in self:
|
||||||
del self['isSeries']
|
del self['isSeries']
|
||||||
self['isSeries'] = True
|
self['isSeries'] = True
|
||||||
|
@ -493,7 +558,7 @@ class ImdbCombined(Imdb):
|
||||||
def __init__(self, id, timeout=-1):
|
def __init__(self, id, timeout=-1):
|
||||||
_regex = {}
|
_regex = {}
|
||||||
for key in self.regex:
|
for key in self.regex:
|
||||||
if self.regex[key]['page'] in ('releaseinfo', 'reference'):
|
if self.regex[key]['page'] in ('combined', 'releaseinfo'):
|
||||||
_regex[key] = self.regex[key]
|
_regex[key] = self.regex[key]
|
||||||
self.regex = _regex
|
self.regex = _regex
|
||||||
super(ImdbCombined, self).__init__(id, timeout)
|
super(ImdbCombined, self).__init__(id, timeout)
|
||||||
|
|
|
@ -2,12 +2,12 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import feedparser
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
from ox import find_re, strip_tags
|
from ox import find_re, strip_tags
|
||||||
from ox.iso import langCode2To3, langTo3Code
|
from ox.iso import langCode2To3, langTo3Code
|
||||||
|
|
||||||
def find_subtitles(imdb, parts = 1, language = "eng"):
|
def find_subtitles(imdb, parts = 1, language = "eng"):
|
||||||
import feedparser
|
|
||||||
if len(language) == 2:
|
if len(language) == 2:
|
||||||
language = langCode2To3(language)
|
language = langCode2To3(language)
|
||||||
elif len(language) != 3:
|
elif len(language) != 3:
|
||||||
|
|
|
@ -33,7 +33,7 @@ class SiteParser(dict):
|
||||||
return "%s%s" % (self.baseUrl, page)
|
return "%s%s" % (self.baseUrl, page)
|
||||||
|
|
||||||
def read_url(self, url, timeout):
|
def read_url(self, url, timeout):
|
||||||
if url not in self._cache:
|
if not url in self._cache:
|
||||||
self._cache[url] = read_url(url, timeout=timeout, unicode=True)
|
self._cache[url] = read_url(url, timeout=timeout, unicode=True)
|
||||||
return self._cache[url]
|
return self._cache[url]
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import re
|
||||||
from xml.dom.minidom import parseString
|
from xml.dom.minidom import parseString
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import feedparser
|
||||||
import ox
|
import ox
|
||||||
from ox.cache import read_url, cache_timeout
|
from ox.cache import read_url, cache_timeout
|
||||||
|
|
||||||
|
@ -60,7 +61,6 @@ def get_video_info(id):
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def find(query, max_results=10, offset=1, orderBy='relevance'):
|
def find(query, max_results=10, offset=1, orderBy='relevance'):
|
||||||
import feedparser
|
|
||||||
query = quote(query)
|
query = quote(query)
|
||||||
url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
|
url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
|
||||||
data = read_url(url)
|
data = read_url(url)
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
chardet
|
chardet
|
||||||
|
feedparser
|
||||||
six>=1.5.2
|
six>=1.5.2
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -50,7 +50,7 @@ setup(
|
||||||
download_url="https://code.0x2620.org/python-ox/download",
|
download_url="https://code.0x2620.org/python-ox/download",
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
packages=['ox', 'ox.torrent', 'ox.web'],
|
packages=['ox', 'ox.torrent', 'ox.web'],
|
||||||
install_requires=['six>=1.5.2', 'chardet'],
|
install_requires=['six>=1.5.2', 'chardet', 'feedparser'],
|
||||||
keywords=[
|
keywords=[
|
||||||
],
|
],
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
|
Loading…
Reference in a new issue