from __futre__ import print_function

This commit is contained in:
j 2014-09-30 21:27:26 +02:00
parent a9002374b1
commit ec252440d9
14 changed files with 49 additions and 34 deletions

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import datetime import datetime
from django.utils import datetime_safe from django.utils import datetime_safe
from django.http import HttpResponse, Http404 from django.http import HttpResponse, Http404
@ -34,7 +35,7 @@ def render_to_json_response(dictionary, content_type="text/json", status=200):
content_type = "text/javascript" content_type = "text/javascript"
indent = 2 indent = 2
if getattr(settings, 'JSON_DEBUG', False): if getattr(settings, 'JSON_DEBUG', False):
print json.dumps(dictionary, indent=2, default=_to_json, ensure_ascii=False).encode('utf-8') print(json.dumps(dictionary, indent=2, default=_to_json, ensure_ascii=False).encode('utf-8'))
return HttpResponse(json.dumps(dictionary, indent=indent, default=_to_json, return HttpResponse(json.dumps(dictionary, indent=indent, default=_to_json,
ensure_ascii=False).encode('utf-8'), content_type=content_type, status=status) ensure_ascii=False).encode('utf-8'), content_type=content_type, status=status)

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
from ox import strip_tags, find_re from ox import strip_tags, find_re
@ -80,6 +81,6 @@ def parse_text(html, title):
return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip() return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
if __name__ == '__main__': if __name__ == '__main__':
print get_data('129689') print(get_data('129689'))
# print get_data('177524') # print(get_data('177524'))

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
from six.moves.urllib.parse import quote from six.moves.urllib.parse import quote
@ -34,7 +35,7 @@ def get_data(id):
r['authors'] = [] r['authors'] = []
doc = lxml.html.document_fromstring(data) doc = lxml.html.document_fromstring(data)
for e in doc.xpath("//span[contains(@class, 'author')]"): for e in doc.xpath("//span[contains(@class, 'author')]"):
print e print(e)
for secondary in e.xpath(".//span[contains(@class, 'a-color-secondary')]"): for secondary in e.xpath(".//span[contains(@class, 'a-color-secondary')]"):
if 'Author' in secondary.text: if 'Author' in secondary.text:
author = e.xpath(".//span[contains(@class, 'a-size-medium')]") author = e.xpath(".//span[contains(@class, 'a-size-medium')]")

View file

@ -1,3 +1,4 @@
from __future__ import print_function
import json import json
import re import re
@ -60,8 +61,8 @@ def get_movie_data(title, director):
return data return data
if __name__ == '__main__': if __name__ == '__main__':
print get_movie_data('Alphaville', 'Jean-Luc Godard') print(get_movie_data('Alphaville', 'Jean-Luc Godard'))
print get_movie_data('Sin City', 'Roberto Rodriguez') print(get_movie_data('Sin City', 'Roberto Rodriguez'))
print get_movie_data('Breathless', 'Jean-Luc Godard') print(get_movie_data('Breathless', 'Jean-Luc Godard'))
print get_movie_data('Capitalism: A Love Story', 'Michael Moore') print(get_movie_data('Capitalism: A Love Story', 'Michael Moore'))
print get_movie_data('Film Socialisme', 'Jean-Luc Godard') print(get_movie_data('Film Socialisme', 'Jean-Luc Godard'))

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import json import json
import os import os
import re import re
@ -60,11 +61,11 @@ def backup(filename):
if info: if info:
data[i] = info data[i] = info
if len(data) % 10 == 0: if len(data) % 10 == 0:
print 'save', filename, len(data) print('save', filename, len(data))
with open(filename, 'w') as f: with open(filename, 'w') as f:
json.dump(data, f) json.dump(data, f)
else: else:
print 'ignore', i print('ignore', i)
with open(filename, 'w') as f: with open(filename, 'w') as f:
json.dump(data, f) json.dump(data, f)
return data return data

View file

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# GPL 2009 # GPL 2009
from __future__ import print_function
import os import os
from ox.utils import json from ox.utils import json
@ -15,7 +16,7 @@ def get(key):
auth = json.loads(data) auth = json.loads(data)
if key in auth: if key in auth:
return auth[key] return auth[key]
print "please add key %s to json file '%s'" % (key, user_auth) print("please add key %s to json file '%s'" % (key, user_auth))
raise Exception,"no key %s found" % key raise Exception,"no key %s found" % key
def update(key, value): def update(key, value):

View file

@ -1,5 +1,6 @@
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
import ox.cache import ox.cache
@ -97,4 +98,4 @@ def get_ids(page=None):
return sorted(set(ids), key=int) return sorted(set(ids), key=int)
if __name__ == '__main__': if __name__ == '__main__':
print get_ids() print(get_ids())

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
import time import time
@ -44,6 +45,6 @@ def get_show_data(url):
'title':episode[5], 'title':episode[5],
} }
except: except:
print "oxweb.epguides failed,", url print("oxweb.epguides failed,", url)
return r return r

View file

@ -1,5 +1,6 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# encoding: utf-8 # encoding: utf-8
from __future__ import print_function
import re import re
from ox.cache import read_url from ox.cache import read_url
@ -297,4 +298,4 @@ _id_map = {
if __name__ == '__main__': if __name__ == '__main__':
ids = get_ids() ids = get_ids()
print sorted(ids), len(ids) print(sorted(ids), len(ids))

View file

@ -1,5 +1,6 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# encoding: utf-8 # encoding: utf-8
from __future__ import print_function
import re import re
import urllib import urllib
@ -176,12 +177,12 @@ class ItunesMovie:
if __name__ == '__main__': if __name__ == '__main__':
from ox.utils import json from ox.utils import json
data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data() data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
print json.dumps(data, sort_keys = True, indent = 4) print(json.dumps(data, sort_keys = True, indent = 4))
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data() data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
print json.dumps(data, sort_keys = True, indent = 4) print(json.dumps(data, sort_keys = True, indent = 4))
for v in data['relatedMovies']: for v in data['relatedMovies']:
data = ItunesMovie(id = v['id']).get_data() data = ItunesMovie(id = v['id']).get_data()
print json.dumps(data, sort_keys = True, indent = 4) print(json.dumps(data, sort_keys = True, indent = 4))
data = ItunesMovie(id='272960052').get_data() data = ItunesMovie(id='272960052').get_data()
print json.dumps(data, sort_keys = True, indent = 4) print(json.dumps(data, sort_keys = True, indent = 4))

View file

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
from ox.cache import read_url from ox.cache import read_url
from ox.html import decode_html from ox.html import decode_html
from ox.text import find_re from ox.text import find_re
@ -18,4 +20,4 @@ def get_lyrics(title, artist):
return lyrics return lyrics
if __name__ == '__main__': if __name__ == '__main__':
print getLyrics('Election Day', 'Arcadia') print(get_lyrics('Election Day', 'Arcadia'))

View file

@ -1,5 +1,6 @@
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
@ -40,5 +41,5 @@ def get_url(id):
return "http://www.movieposterdb.com/movie/%s/" % id return "http://www.movieposterdb.com/movie/%s/" % id
if __name__ == '__main__': if __name__ == '__main__':
print get_data('0060304') print(get_data('0060304'))
print get_data('0133093') print(get_data('0133093'))

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
from datetime import datetime from datetime import datetime
import re import re
import time import time
@ -58,12 +59,12 @@ def get_news(year, month, day):
if new['url'][:1] == '/': if new['url'][:1] == '/':
new['url'] = 'http://www.spiegel.de' + new['url'] new['url'] = 'http://www.spiegel.de' + new['url']
news.append(new) news.append(new)
# print '%s, %s' % (new['section'], dateString) # print('%s, %s' % (new['section'], dateString))
''' '''
elif dateString[:10] == date and not description: elif dateString[:10] == date and not description:
print dateString + ' - no description' print(dateString + ' - no description')
elif dateString[:10] == date and not imageUrl: elif dateString[:10] == date and not imageUrl:
print dateString + ' - no image' print(dateString + ' - no image')
''' '''
return news return news
@ -140,7 +141,7 @@ def archive_issues():
else: else:
wMax = 53 wMax = 53
for w in range(wMax, 0, -1): for w in range(wMax, 0, -1):
print 'get_issue(%d, %d)' % (y, w) print('get_issue(%d, %d)' % (y, w))
issue = get_issue(y, w) issue = get_issue(y, w)
if issue: if issue:
dirname = '%s/%d/%02d' % (archivePath, y, w) dirname = '%s/%d/%02d' % (archivePath, y, w)
@ -185,7 +186,7 @@ def archive_issues():
p['min'] = issue['pages'] p['min'] = issue['pages']
if issue['pages'] > p['max']: if issue['pages'] > p['max']:
p['max'] = issue['pages'] p['max'] = issue['pages']
print p['min'], p['sum'] / p['num'], p['max'] print(p['min'], p['sum'] / p['num'], p['max'])
def archive_news(): def archive_news():
@ -218,7 +219,7 @@ def archive_news():
else: else:
dMax = days[m] dMax = days[m]
for d in range(dMax, 0, -1): for d in range(dMax, 0, -1):
print 'getNews(%d, %d, %d)' % (y, m, d) print('getNews(%d, %d, %d)' % (y, m, d))
news = getNews(y, m ,d) news = getNews(y, m ,d)
for new in news: for new in news:
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16] dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
@ -260,19 +261,19 @@ def archive_news():
if strings[0] != new['title1'] or strings[1] != new['title2']: if strings[0] != new['title1'] or strings[1] != new['title2']:
colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2'])) colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
for key in sorted(count): for key in sorted(count):
print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string']) print('%6d %-24s %s' % (count[key]['count'], key, count[key]['string']))
for value in colon: for value in colon:
print value print(value)
if __name__ == '__main__': if __name__ == '__main__':
# spiegel = Spiegel(2008, 8) # spiegel = Spiegel(2008, 8)
# print spiegel.getContents() # print(spiegel.getContents())
# news = News(2001, 9, 10) # news = News(2001, 9, 10)
# output(news.getNews()) # output(news.getNews())
''' '''
x = [] x = []
for d in range(10, 30): for d in range(10, 30):
print '2/%d' % d print('2/%d' % d)
news = getNews(2008, 2, d) news = getNews(2008, 2, d)
for new in news: for new in news:
strings = new['url'].split('/') strings = new['url'].split('/')
@ -281,7 +282,7 @@ if __name__ == '__main__':
string += '/' + format_subsection(strings[4]) string += '/' + format_subsection(strings[4])
if not string in x: if not string in x:
x.append(string) x.append(string)
print x print(x)
''' '''
# archive_issues() # archive_issues()
archive_news() archive_news()

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import re import re
from ox import find_re, strip_tags, decode_html from ox import find_re, strip_tags, decode_html
@ -31,7 +32,7 @@ def get_data(url):
del m['video'] del m['video']
m['title'] = strip_tags(decode_html(title)).strip() m['title'] = strip_tags(decode_html(title)).strip()
if not 'url' in m: if not 'url' in m:
print url, 'missing' print(url, 'missing')
if 'title' in m: if 'title' in m:
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title']) m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])