openmedialibrary_platform/Shared/lib/python2.7/site-packages/ox/web/reddit.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re

import ox
from ox.cache import read_url, cache_timeout


def subreddit(name, offset=0, n=0, timeout=cache_timeout):
    url = 'http://www.reddit.com/r/%s/' % name
    if offset:
        url += '?count=%d' % offset
    data = read_url(url, unicode=True, timeout=timeout)
    more = True
    links = []
    while more:
        l = re.compile('<a class="title " href="(.*?)".*?>(.*?)<\/a>').findall(data)
        if l:
            links += [{
                'url': ox.decode_html(a[0]),
                'title': ox.decode_html(a[1])
            } for a in l]
        more = re.compile('<a href="(.*?)" rel="nofollow next" >next &rsaquo;<\/a>').findall(data)
        if more and (n == 0 or len(links) < n):
            url = ox.decode_html(more[0].split('"')[-1])
            data = read_url(url, unicode=True)
        else:
            more = False
    return links
Open Media Library Platform 2013-10-11 17:28:32 +00:00			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
			`import re`

			`import ox`
			`from ox.cache import read_url, cache_timeout`


			`def subreddit(name, offset=0, n=0, timeout=cache_timeout):`
			`url = 'http://www.reddit.com/r/%s/' % name`
			`if offset:`
			`url += '?count=%d' % offset`
			`data = read_url(url, unicode=True, timeout=timeout)`
			`more = True`
			`links = []`
			`while more:`
			`l = re.compile('<a class="title " href="(.?)".?>(.*?)<\/a>').findall(data)`
			`if l:`
			`links += [{`
			`'url': ox.decode_html(a[0]),`
			`'title': ox.decode_html(a[1])`
			`} for a in l]`
			`more = re.compile('<a href="(.*?)" rel="nofollow next" >next &rsaquo;<\/a>').findall(data)`
			`if more and (n == 0 or len(links) < n):`
			`url = ox.decode_html(more[0].split('"')[-1])`
			`data = read_url(url, unicode=True)`
			`else:`
			`more = False`
			`return links`