from __futre__ import print_function
This commit is contained in:
parent
a9002374b1
commit
ec252440d9
14 changed files with 49 additions and 34 deletions
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import datetime
|
import datetime
|
||||||
from django.utils import datetime_safe
|
from django.utils import datetime_safe
|
||||||
from django.http import HttpResponse, Http404
|
from django.http import HttpResponse, Http404
|
||||||
|
@ -34,7 +35,7 @@ def render_to_json_response(dictionary, content_type="text/json", status=200):
|
||||||
content_type = "text/javascript"
|
content_type = "text/javascript"
|
||||||
indent = 2
|
indent = 2
|
||||||
if getattr(settings, 'JSON_DEBUG', False):
|
if getattr(settings, 'JSON_DEBUG', False):
|
||||||
print json.dumps(dictionary, indent=2, default=_to_json, ensure_ascii=False).encode('utf-8')
|
print(json.dumps(dictionary, indent=2, default=_to_json, ensure_ascii=False).encode('utf-8'))
|
||||||
|
|
||||||
return HttpResponse(json.dumps(dictionary, indent=indent, default=_to_json,
|
return HttpResponse(json.dumps(dictionary, indent=indent, default=_to_json,
|
||||||
ensure_ascii=False).encode('utf-8'), content_type=content_type, status=status)
|
ensure_ascii=False).encode('utf-8'), content_type=content_type, status=status)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ox import strip_tags, find_re
|
from ox import strip_tags, find_re
|
||||||
|
@ -80,6 +81,6 @@ def parse_text(html, title):
|
||||||
return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
|
return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print get_data('129689')
|
print(get_data('129689'))
|
||||||
# print get_data('177524')
|
# print(get_data('177524'))
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
from six.moves.urllib.parse import quote
|
from six.moves.urllib.parse import quote
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ def get_data(id):
|
||||||
r['authors'] = []
|
r['authors'] = []
|
||||||
doc = lxml.html.document_fromstring(data)
|
doc = lxml.html.document_fromstring(data)
|
||||||
for e in doc.xpath("//span[contains(@class, 'author')]"):
|
for e in doc.xpath("//span[contains(@class, 'author')]"):
|
||||||
print e
|
print(e)
|
||||||
for secondary in e.xpath(".//span[contains(@class, 'a-color-secondary')]"):
|
for secondary in e.xpath(".//span[contains(@class, 'a-color-secondary')]"):
|
||||||
if 'Author' in secondary.text:
|
if 'Author' in secondary.text:
|
||||||
author = e.xpath(".//span[contains(@class, 'a-size-medium')]")
|
author = e.xpath(".//span[contains(@class, 'a-size-medium')]")
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from __future__ import print_function
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -60,8 +61,8 @@ def get_movie_data(title, director):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print get_movie_data('Alphaville', 'Jean-Luc Godard')
|
print(get_movie_data('Alphaville', 'Jean-Luc Godard'))
|
||||||
print get_movie_data('Sin City', 'Roberto Rodriguez')
|
print(get_movie_data('Sin City', 'Roberto Rodriguez'))
|
||||||
print get_movie_data('Breathless', 'Jean-Luc Godard')
|
print(get_movie_data('Breathless', 'Jean-Luc Godard'))
|
||||||
print get_movie_data('Capitalism: A Love Story', 'Michael Moore')
|
print(get_movie_data('Capitalism: A Love Story', 'Michael Moore'))
|
||||||
print get_movie_data('Film Socialisme', 'Jean-Luc Godard')
|
print(get_movie_data('Film Socialisme', 'Jean-Luc Godard'))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -60,11 +61,11 @@ def backup(filename):
|
||||||
if info:
|
if info:
|
||||||
data[i] = info
|
data[i] = info
|
||||||
if len(data) % 10 == 0:
|
if len(data) % 10 == 0:
|
||||||
print 'save', filename, len(data)
|
print('save', filename, len(data))
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
json.dump(data, f)
|
json.dump(data, f)
|
||||||
else:
|
else:
|
||||||
print 'ignore', i
|
print('ignore', i)
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
json.dump(data, f)
|
json.dump(data, f)
|
||||||
return data
|
return data
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2009
|
# GPL 2009
|
||||||
|
from __future__ import print_function
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from ox.utils import json
|
from ox.utils import json
|
||||||
|
@ -15,7 +16,7 @@ def get(key):
|
||||||
auth = json.loads(data)
|
auth = json.loads(data)
|
||||||
if key in auth:
|
if key in auth:
|
||||||
return auth[key]
|
return auth[key]
|
||||||
print "please add key %s to json file '%s'" % (key, user_auth)
|
print("please add key %s to json file '%s'" % (key, user_auth))
|
||||||
raise Exception,"no key %s found" % key
|
raise Exception,"no key %s found" % key
|
||||||
|
|
||||||
def update(key, value):
|
def update(key, value):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import ox.cache
|
import ox.cache
|
||||||
|
@ -97,4 +98,4 @@ def get_ids(page=None):
|
||||||
return sorted(set(ids), key=int)
|
return sorted(set(ids), key=int)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print get_ids()
|
print(get_ids())
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -44,6 +45,6 @@ def get_show_data(url):
|
||||||
'title':episode[5],
|
'title':episode[5],
|
||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
print "oxweb.epguides failed,", url
|
print("oxweb.epguides failed,", url)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
@ -297,4 +298,4 @@ _id_map = {
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
ids = get_ids()
|
ids = get_ids()
|
||||||
print sorted(ids), len(ids)
|
print(sorted(ids), len(ids))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
|
@ -176,12 +177,12 @@ class ItunesMovie:
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from ox.utils import json
|
from ox.utils import json
|
||||||
data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
|
data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
|
||||||
print json.dumps(data, sort_keys = True, indent = 4)
|
print(json.dumps(data, sort_keys = True, indent = 4))
|
||||||
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
|
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
|
||||||
print json.dumps(data, sort_keys = True, indent = 4)
|
print(json.dumps(data, sort_keys = True, indent = 4))
|
||||||
for v in data['relatedMovies']:
|
for v in data['relatedMovies']:
|
||||||
data = ItunesMovie(id = v['id']).get_data()
|
data = ItunesMovie(id = v['id']).get_data()
|
||||||
print json.dumps(data, sort_keys = True, indent = 4)
|
print(json.dumps(data, sort_keys = True, indent = 4))
|
||||||
data = ItunesMovie(id='272960052').get_data()
|
data = ItunesMovie(id='272960052').get_data()
|
||||||
print json.dumps(data, sort_keys = True, indent = 4)
|
print(json.dumps(data, sort_keys = True, indent = 4))
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
from ox.html import decode_html
|
from ox.html import decode_html
|
||||||
from ox.text import find_re
|
from ox.text import find_re
|
||||||
|
@ -18,4 +20,4 @@ def get_lyrics(title, artist):
|
||||||
return lyrics
|
return lyrics
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print getLyrics('Election Day', 'Arcadia')
|
print(get_lyrics('Election Day', 'Arcadia'))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -40,5 +41,5 @@ def get_url(id):
|
||||||
return "http://www.movieposterdb.com/movie/%s/" % id
|
return "http://www.movieposterdb.com/movie/%s/" % id
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print get_data('0060304')
|
print(get_data('0060304'))
|
||||||
print get_data('0133093')
|
print(get_data('0133093'))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -58,12 +59,12 @@ def get_news(year, month, day):
|
||||||
if new['url'][:1] == '/':
|
if new['url'][:1] == '/':
|
||||||
new['url'] = 'http://www.spiegel.de' + new['url']
|
new['url'] = 'http://www.spiegel.de' + new['url']
|
||||||
news.append(new)
|
news.append(new)
|
||||||
# print '%s, %s' % (new['section'], dateString)
|
# print('%s, %s' % (new['section'], dateString))
|
||||||
'''
|
'''
|
||||||
elif dateString[:10] == date and not description:
|
elif dateString[:10] == date and not description:
|
||||||
print dateString + ' - no description'
|
print(dateString + ' - no description')
|
||||||
elif dateString[:10] == date and not imageUrl:
|
elif dateString[:10] == date and not imageUrl:
|
||||||
print dateString + ' - no image'
|
print(dateString + ' - no image')
|
||||||
'''
|
'''
|
||||||
return news
|
return news
|
||||||
|
|
||||||
|
@ -140,7 +141,7 @@ def archive_issues():
|
||||||
else:
|
else:
|
||||||
wMax = 53
|
wMax = 53
|
||||||
for w in range(wMax, 0, -1):
|
for w in range(wMax, 0, -1):
|
||||||
print 'get_issue(%d, %d)' % (y, w)
|
print('get_issue(%d, %d)' % (y, w))
|
||||||
issue = get_issue(y, w)
|
issue = get_issue(y, w)
|
||||||
if issue:
|
if issue:
|
||||||
dirname = '%s/%d/%02d' % (archivePath, y, w)
|
dirname = '%s/%d/%02d' % (archivePath, y, w)
|
||||||
|
@ -185,7 +186,7 @@ def archive_issues():
|
||||||
p['min'] = issue['pages']
|
p['min'] = issue['pages']
|
||||||
if issue['pages'] > p['max']:
|
if issue['pages'] > p['max']:
|
||||||
p['max'] = issue['pages']
|
p['max'] = issue['pages']
|
||||||
print p['min'], p['sum'] / p['num'], p['max']
|
print(p['min'], p['sum'] / p['num'], p['max'])
|
||||||
|
|
||||||
|
|
||||||
def archive_news():
|
def archive_news():
|
||||||
|
@ -218,7 +219,7 @@ def archive_news():
|
||||||
else:
|
else:
|
||||||
dMax = days[m]
|
dMax = days[m]
|
||||||
for d in range(dMax, 0, -1):
|
for d in range(dMax, 0, -1):
|
||||||
print 'getNews(%d, %d, %d)' % (y, m, d)
|
print('getNews(%d, %d, %d)' % (y, m, d))
|
||||||
news = getNews(y, m ,d)
|
news = getNews(y, m ,d)
|
||||||
for new in news:
|
for new in news:
|
||||||
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
|
dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
|
||||||
|
@ -260,19 +261,19 @@ def archive_news():
|
||||||
if strings[0] != new['title1'] or strings[1] != new['title2']:
|
if strings[0] != new['title1'] or strings[1] != new['title2']:
|
||||||
colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
|
colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
|
||||||
for key in sorted(count):
|
for key in sorted(count):
|
||||||
print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string'])
|
print('%6d %-24s %s' % (count[key]['count'], key, count[key]['string']))
|
||||||
for value in colon:
|
for value in colon:
|
||||||
print value
|
print(value)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# spiegel = Spiegel(2008, 8)
|
# spiegel = Spiegel(2008, 8)
|
||||||
# print spiegel.getContents()
|
# print(spiegel.getContents())
|
||||||
# news = News(2001, 9, 10)
|
# news = News(2001, 9, 10)
|
||||||
# output(news.getNews())
|
# output(news.getNews())
|
||||||
'''
|
'''
|
||||||
x = []
|
x = []
|
||||||
for d in range(10, 30):
|
for d in range(10, 30):
|
||||||
print '2/%d' % d
|
print('2/%d' % d)
|
||||||
news = getNews(2008, 2, d)
|
news = getNews(2008, 2, d)
|
||||||
for new in news:
|
for new in news:
|
||||||
strings = new['url'].split('/')
|
strings = new['url'].split('/')
|
||||||
|
@ -281,7 +282,7 @@ if __name__ == '__main__':
|
||||||
string += '/' + format_subsection(strings[4])
|
string += '/' + format_subsection(strings[4])
|
||||||
if not string in x:
|
if not string in x:
|
||||||
x.append(string)
|
x.append(string)
|
||||||
print x
|
print(x)
|
||||||
'''
|
'''
|
||||||
# archive_issues()
|
# archive_issues()
|
||||||
archive_news()
|
archive_news()
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ox import find_re, strip_tags, decode_html
|
from ox import find_re, strip_tags, decode_html
|
||||||
|
@ -31,7 +32,7 @@ def get_data(url):
|
||||||
del m['video']
|
del m['video']
|
||||||
m['title'] = strip_tags(decode_html(title)).strip()
|
m['title'] = strip_tags(decode_html(title)).strip()
|
||||||
if not 'url' in m:
|
if not 'url' in m:
|
||||||
print url, 'missing'
|
print(url, 'missing')
|
||||||
if 'title' in m:
|
if 'title' in m:
|
||||||
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
|
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue