cleanup pylint errors and py2/3 issues

This commit is contained in:
j 2016-06-08 15:32:46 +02:00
commit 77f8876fca
20 changed files with 232 additions and 197 deletions

View file

@ -2,6 +2,7 @@ from __future__ import print_function
import json
import re
from six import text_type
from ox.cache import read_url
HEADERS = {
@ -16,9 +17,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
def get_movie_data(title, director):
if isinstance(title, unicode):
if isinstance(title, text_type):
title = title.encode('utf-8')
if isinstance(director, unicode):
if isinstance(director, text_type):
director = director.encode('utf-8')
data = {}
# itunes section (preferred source for link)
@ -45,7 +46,7 @@ def get_movie_data(title, director):
results = js['results']
if results:
url = host + results[0]['location']
if not 'link' in data:
if 'link' not in data:
data['link'] = url
headers = {
'User-Agent': USER_AGENT

View file

@ -1,21 +1,21 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves.urllib.parse import unquote
from ox.cache import read_url
def get_video_url(url):
'''
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
'''
data = read_url(url)
video = re.compile('''video", "(.*?)"''').findall(data)
for v in video:
v = unquote(v).split('@@')[0]
return v
return ''
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from six.moves.urllib.parse import unquote
from ox.cache import read_url
def get_video_url(url):
'''
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
>>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
'''
data = read_url(url)
video = re.compile('''video", "(.*?)"''').findall(data)
for v in video:
v = unquote(v).split('@@')[0]
return v
return ''

View file

@ -7,7 +7,7 @@ import time
from ox import strip_tags, find_re
from ox.cache import read_url
import google
from . import google
def get_show_url(title):

View file

@ -28,22 +28,32 @@ def get_show_url(title):
def get_data(url):
data = read_url(url, unicode=True)
doc = document_fromstring(data)
score = filter(lambda s: s.attrib.get('property') == 'v:average',
doc.xpath('//span[@class="score_value"]'))
score = [s for s in doc.xpath('//span[@class="score_value"]')
if s.attrib.get('property') == 'v:average']
if score:
score = int(score[0].text)
else:
score = -1
authors = [a.text
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')]
sources = [d.text
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')]
reviews = [d.text
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')]
scores = [int(d.text.strip())
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')]
urls = [a.attrib['href']
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')]
authors = [
a.text
for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')
]
sources = [
d.text
for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')
]
reviews = [
d.text
for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')
]
scores = [
int(d.text.strip())
for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')
]
urls = [
a.attrib['href']
for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')
]
metacritics = []
for i in range(len(authors)):
@ -54,7 +64,7 @@ def get_data(url):
'quote': strip_tags(reviews[i]).strip(),
'score': scores[i],
})
return {
'critics': metacritics,
'id': get_id(url),

View file

@ -32,7 +32,7 @@ def get_data(url):
r['summary'] = get_og(data, 'description')
meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
meter = filter(lambda m: m[1].isdigit(), meter)
meter = [m for m in meter if m[1].isdigit()]
if meter:
r['tomatometer'] = meter[0][1]
r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')

View file

@ -95,7 +95,7 @@ def format_subsection(string):
'ussports': 'US-Sports',
'wunderbar': 'wunderBAR'
}
if subsection.has_key(string):
if string in subsection:
return subsection[string].replace(u'\xc3', 'ae')
return string[:1].upper() + string[1:]
@ -219,8 +219,8 @@ def archive_news():
else:
dMax = days[m]
for d in range(dMax, 0, -1):
print('getNews(%d, %d, %d)' % (y, m, d))
news = getNews(y, m ,d)
print('get_news(%d, %d, %d)' % (y, m, d))
news = get_news(y, m, d)
for new in news:
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
if not os.path.exists(dirname):
@ -230,7 +230,7 @@ def archive_news():
else:
filename = dirname + '/' + new['url'] + '.json'
if not os.path.exists(filename) or True:
data = json.dumps(new, ensure_ascii = False)
data = json.dumps(new, ensure_ascii=False)
f = open(filename, 'w')
f.write(data)
f.close()
@ -253,7 +253,7 @@ def archive_news():
string = strings[3]
if len(strings) == 6:
string += '/' + strings[4]
if not count.has_key(string):
if string not in count:
count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
else:
count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
@ -269,12 +269,12 @@ if __name__ == '__main__':
# spiegel = Spiegel(2008, 8)
# print(spiegel.getContents())
# news = News(2001, 9, 10)
# output(news.getNews())
# output(news.get_news())
'''
x = []
for d in range(10, 30):
print('2/%d' % d)
news = getNews(2008, 2, d)
news = get_news(2008, 2, d)
for new in news:
strings = new['url'].split('/')
string = format_section(strings[3])

View file

@ -27,15 +27,15 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
"""
fmt = None
if format == '4k':
fmt=38
fmt = 38
elif format == '1080p':
fmt=37
fmt = 37
elif format == '720p':
fmt=22
fmt = 22
elif format == 'mp4':
fmt=18
fmt = 18
elif format == 'high':
fmt=35
fmt = 35
elif format == 'webm':
streams = videos(youtubeId, 'webm')
return streams[max(streams.keys())]['url']