# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re import ox from ox.cache import read_url, cache_timeout def subreddit(name, offset=0, n=0, timeout=cache_timeout): url = 'http://www.reddit.com/r/%s/' % name if offset: url += '?count=%d' % offset data = read_url(url, unicode=True, timeout=timeout) more = True links = [] while more: l = re.compile('(.*?)<\/a>').findall(data) if l: links += [{ 'url': ox.decode_html(a[0]), 'title': ox.decode_html(a[1]) } for a in l] more = re.compile('next ›<\/a>').findall(data) if more and (n == 0 or len(links) < n): url = ox.decode_html(more[0].split('"')[-1]) data = read_url(url, unicode=True) else: more = False return links