From bb78574bc09ee2ca961b235efea82da96fa9c366 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Thu, 31 Jan 2013 19:48:07 +0530 Subject: [PATCH] add ox.web.twitter --- ox/web/twitter.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 ox/web/twitter.py diff --git a/ox/web/twitter.py b/ox/web/twitter.py new file mode 100644 index 0000000..c1e9116 --- /dev/null +++ b/ox/web/twitter.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +from datetime import datetime +from urllib import quote + +import lxml.html +from ox.cache import read_url + + +def find(query): + url = 'https://twitter.com/search/' + quote(query) + data = ox.cache.read_url(url, timeout=60) + doc = lxml.html.document_fromstring(data) + tweets = [] + for e in doc.xpath("//div[contains(@class, 'original-tweet')]"): + t = lxml.html.tostring(e) + text = e.xpath(".//p[contains(@class, 'js-tweet-text')]")[0] + text = ox.decode_html(ox.strip_tags(lxml.html.tostring(text))).strip() + user = re.compile('data-name="(.*?)"').findall(t)[0] + user = ox.decode_html(ox.strip_tags(user)).strip() + tweets.append({ + 'id': re.compile('data-tweet-id="(\d+)"').findall(t)[0], + 'user-id': re.compile('data-user-id="(\d+)"').findall(t)[0], + 'name': re.compile('data-screen-name="(.*?)"').findall(t)[0], + 'time': datetime.fromtimestamp(int(re.compile('data-time="(\d+)"').findall(t)[0])), + 'user': user, + 'text': text, + }) + return tweets