Open Media Library Platform

This commit is contained in:
j 2013-10-11 19:28:32 +02:00
commit 411ad5b16f
5849 changed files with 1778641 additions and 0 deletions

View file

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import ox
from ox.cache import read_url, cache_timeout
def subreddit(name, offset=0, n=0, timeout=cache_timeout):
url = 'http://www.reddit.com/r/%s/' % name
if offset:
url += '?count=%d' % offset
data = read_url(url, unicode=True, timeout=timeout)
more = True
links = []
while more:
l = re.compile('<a class="title " href="(.*?)".*?>(.*?)<\/a>').findall(data)
if l:
links += [{
'url': ox.decode_html(a[0]),
'title': ox.decode_html(a[1])
} for a in l]
more = re.compile('<a href="(.*?)" rel="nofollow next" >next &rsaquo;<\/a>').findall(data)
if more and (n == 0 or len(links) < n):
url = ox.decode_html(more[0].split('"')[-1])
data = read_url(url, unicode=True)
else:
more = False
return links