30 lines
926 B
Python
30 lines
926 B
Python
|
# -*- coding: utf-8 -*-
|
||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||
|
import re
|
||
|
|
||
|
import ox
|
||
|
from ox.cache import read_url, cache_timeout
|
||
|
|
||
|
|
||
|
def subreddit(name, offset=0, n=0, timeout=cache_timeout):
|
||
|
url = 'http://www.reddit.com/r/%s/' % name
|
||
|
if offset:
|
||
|
url += '?count=%d' % offset
|
||
|
data = read_url(url, unicode=True, timeout=timeout)
|
||
|
more = True
|
||
|
links = []
|
||
|
while more:
|
||
|
l = re.compile('<a class="title " href="(.*?)".*?>(.*?)<\/a>').findall(data)
|
||
|
if l:
|
||
|
links += [{
|
||
|
'url': ox.decode_html(a[0]),
|
||
|
'title': ox.decode_html(a[1])
|
||
|
} for a in l]
|
||
|
more = re.compile('<a href="(.*?)" rel="nofollow next" >next ›<\/a>').findall(data)
|
||
|
if more and (n == 0 or len(links) < n):
|
||
|
url = ox.decode_html(more[0].split('"')[-1])
|
||
|
data = read_url(url, unicode=True)
|
||
|
else:
|
||
|
more = False
|
||
|
return links
|