From 32d2df094117ec7b0f64976fa7a7986d6bfee3fe Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 3 Nov 2010 19:09:13 +0100 Subject: [PATCH] oxwebkit --- oxwebkit/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 oxwebkit/__init__.py create mode 100644 setup.py diff --git a/oxwebkit/__init__.py b/oxwebkit/__init__.py new file mode 100644 index 0000000..7e8003e --- /dev/null +++ b/oxwebkit/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import sys, thread +import gtk +import webkit +import warnings +from time import sleep + + +class WebView(webkit.WebView): + def get_html(self): + self.execute_script('oldtitle=document.title;document.title=document.documentElement.innerHTML;') + html = self.get_main_frame().get_title() + self.execute_script('document.title=oldtitle;') + return html + +class Browser(gtk.Window): + def __init__(self): + gtk.gdk.threads_init() + gtk.Window.__init__(self) + self.view = WebView() + self.view.connect('load-finished', self._finished_loading) + self.add(self.view) + + def open(self, url, cb=None): + self.view.open(url) + self.cb = cb + + def crawl(self): + view = WebView() + view.open(self._url) + view.connect('load-finished', self._finished_loading) + gtk.main() + + def _finished_loading(self, view, frame): + if self.cb: + self.cb(view, frame) + + def quit(self): + gtk.main_quit() + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c4bd8aa --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# vi:si:et:sw=4:sts=4:ts=4 +# encoding: utf-8 +from distutils.core import setup + +def get_bzr_version(): + import os + rev = int(os.popen('bzr revno').read()) + if rev: + return u'%s' % rev + return u'unknown' + +setup( + name="oxwebkit", + version="0.0.%s" % get_bzr_version() , + description="python-oxwebkit scrape with webkit", + author="0x2620", + author_email="0x2620@0x2620.org", + url="http://code.0x2620.org/python-ox", + download_url="http://code.0x2620.org/python-oxwebkit/download", + license="GPLv3", + packages=['oxwebkit'], + keywords = [ + ], + classifiers = [ + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], +) +