cleanup pylint errors and py2/3 issues
This commit is contained in:
parent
4e7898ae57
commit
77f8876fca
20 changed files with 232 additions and 197 deletions
|
|
@ -2,6 +2,7 @@ from __future__ import print_function
|
|||
import json
|
||||
import re
|
||||
|
||||
from six import text_type
|
||||
from ox.cache import read_url
|
||||
|
||||
HEADERS = {
|
||||
|
|
@ -16,9 +17,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
|
|||
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
|
||||
|
||||
def get_movie_data(title, director):
|
||||
if isinstance(title, unicode):
|
||||
if isinstance(title, text_type):
|
||||
title = title.encode('utf-8')
|
||||
if isinstance(director, unicode):
|
||||
if isinstance(director, text_type):
|
||||
director = director.encode('utf-8')
|
||||
data = {}
|
||||
# itunes section (preferred source for link)
|
||||
|
|
@ -45,7 +46,7 @@ def get_movie_data(title, director):
|
|||
results = js['results']
|
||||
if results:
|
||||
url = host + results[0]['location']
|
||||
if not 'link' in data:
|
||||
if 'link' not in data:
|
||||
data['link'] = url
|
||||
headers = {
|
||||
'User-Agent': USER_AGENT
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
from six.moves.urllib.parse import unquote
|
||||
from ox.cache import read_url
|
||||
|
||||
|
||||
def get_video_url(url):
|
||||
'''
|
||||
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
|
||||
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
|
||||
|
||||
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
|
||||
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
|
||||
'''
|
||||
data = read_url(url)
|
||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||
for v in video:
|
||||
v = unquote(v).split('@@')[0]
|
||||
return v
|
||||
return ''
|
||||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
from six.moves.urllib.parse import unquote
|
||||
from ox.cache import read_url
|
||||
|
||||
|
||||
def get_video_url(url):
|
||||
'''
|
||||
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
|
||||
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
|
||||
|
||||
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
|
||||
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
|
||||
'''
|
||||
data = read_url(url)
|
||||
video = re.compile('''video", "(.*?)"''').findall(data)
|
||||
for v in video:
|
||||
v = unquote(v).split('@@')[0]
|
||||
return v
|
||||
return ''
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import time
|
|||
from ox import strip_tags, find_re
|
||||
from ox.cache import read_url
|
||||
|
||||
import google
|
||||
from . import google
|
||||
|
||||
|
||||
def get_show_url(title):
|
||||
|
|
|
|||
|
|
@ -28,22 +28,32 @@ def get_show_url(title):
|
|||
def get_data(url):
|
||||
data = read_url(url, unicode=True)
|
||||
doc = document_fromstring(data)
|
||||
score = filter(lambda s: s.attrib.get('property') == 'v:average',
|
||||
doc.xpath('//span[@class="score_value"]'))
|
||||
score = [s for s in doc.xpath('//span[@class="score_value"]')
|
||||
if s.attrib.get('property') == 'v:average']
|
||||
if score:
|
||||
score = int(score[0].text)
|
||||
else:
|
||||
score = -1
|
||||
authors = [a.text
|
||||
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')]
|
||||
sources = [d.text
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')]
|
||||
reviews = [d.text
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')]
|
||||
scores = [int(d.text.strip())
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')]
|
||||
urls = [a.attrib['href']
|
||||
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')]
|
||||
authors = [
|
||||
a.text
|
||||
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')
|
||||
]
|
||||
sources = [
|
||||
d.text
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')
|
||||
]
|
||||
reviews = [
|
||||
d.text
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')
|
||||
]
|
||||
scores = [
|
||||
int(d.text.strip())
|
||||
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')
|
||||
]
|
||||
urls = [
|
||||
a.attrib['href']
|
||||
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')
|
||||
]
|
||||
|
||||
metacritics = []
|
||||
for i in range(len(authors)):
|
||||
|
|
@ -54,7 +64,7 @@ def get_data(url):
|
|||
'quote': strip_tags(reviews[i]).strip(),
|
||||
'score': scores[i],
|
||||
})
|
||||
|
||||
|
||||
return {
|
||||
'critics': metacritics,
|
||||
'id': get_id(url),
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ def get_data(url):
|
|||
r['summary'] = get_og(data, 'description')
|
||||
|
||||
meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
|
||||
meter = filter(lambda m: m[1].isdigit(), meter)
|
||||
meter = [m for m in meter if m[1].isdigit()]
|
||||
if meter:
|
||||
r['tomatometer'] = meter[0][1]
|
||||
r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ def format_subsection(string):
|
|||
'ussports': 'US-Sports',
|
||||
'wunderbar': 'wunderBAR'
|
||||
}
|
||||
if subsection.has_key(string):
|
||||
if string in subsection:
|
||||
return subsection[string].replace(u'\xc3', 'ae')
|
||||
return string[:1].upper() + string[1:]
|
||||
|
||||
|
|
@ -219,8 +219,8 @@ def archive_news():
|
|||
else:
|
||||
dMax = days[m]
|
||||
for d in range(dMax, 0, -1):
|
||||
print('getNews(%d, %d, %d)' % (y, m, d))
|
||||
news = getNews(y, m ,d)
|
||||
print('get_news(%d, %d, %d)' % (y, m, d))
|
||||
news = get_news(y, m, d)
|
||||
for new in news:
|
||||
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
|
||||
if not os.path.exists(dirname):
|
||||
|
|
@ -230,7 +230,7 @@ def archive_news():
|
|||
else:
|
||||
filename = dirname + '/' + new['url'] + '.json'
|
||||
if not os.path.exists(filename) or True:
|
||||
data = json.dumps(new, ensure_ascii = False)
|
||||
data = json.dumps(new, ensure_ascii=False)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
|
@ -253,7 +253,7 @@ def archive_news():
|
|||
string = strings[3]
|
||||
if len(strings) == 6:
|
||||
string += '/' + strings[4]
|
||||
if not count.has_key(string):
|
||||
if string not in count:
|
||||
count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
|
||||
else:
|
||||
count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
|
||||
|
|
@ -269,12 +269,12 @@ if __name__ == '__main__':
|
|||
# spiegel = Spiegel(2008, 8)
|
||||
# print(spiegel.getContents())
|
||||
# news = News(2001, 9, 10)
|
||||
# output(news.getNews())
|
||||
# output(news.get_news())
|
||||
'''
|
||||
x = []
|
||||
for d in range(10, 30):
|
||||
print('2/%d' % d)
|
||||
news = getNews(2008, 2, d)
|
||||
news = get_news(2008, 2, d)
|
||||
for new in news:
|
||||
strings = new['url'].split('/')
|
||||
string = format_section(strings[3])
|
||||
|
|
|
|||
|
|
@ -27,15 +27,15 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
|||
"""
|
||||
fmt = None
|
||||
if format == '4k':
|
||||
fmt=38
|
||||
fmt = 38
|
||||
elif format == '1080p':
|
||||
fmt=37
|
||||
fmt = 37
|
||||
elif format == '720p':
|
||||
fmt=22
|
||||
fmt = 22
|
||||
elif format == 'mp4':
|
||||
fmt=18
|
||||
fmt = 18
|
||||
elif format == 'high':
|
||||
fmt=35
|
||||
fmt = 35
|
||||
elif format == 'webm':
|
||||
streams = videos(youtubeId, 'webm')
|
||||
return streams[max(streams.keys())]['url']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue