openmedialibrary_platform/Shared/lib/python2.7/site-packages/ox/web/reddit.py

30 lines
926 B
Python
Raw Normal View History

2013-10-11 17:28:32 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import ox
from ox.cache import read_url, cache_timeout
def subreddit(name, offset=0, n=0, timeout=cache_timeout):
url = 'http://www.reddit.com/r/%s/' % name
if offset:
url += '?count=%d' % offset
data = read_url(url, unicode=True, timeout=timeout)
more = True
links = []
while more:
l = re.compile('<a class="title " href="(.*?)".*?>(.*?)<\/a>').findall(data)
if l:
links += [{
'url': ox.decode_html(a[0]),
'title': ox.decode_html(a[1])
} for a in l]
more = re.compile('<a href="(.*?)" rel="nofollow next" >next &rsaquo;<\/a>').findall(data)
if more and (n == 0 or len(links) < n):
url = ox.decode_html(more[0].split('"')[-1])
data = read_url(url, unicode=True)
else:
more = False
return links