oxwebkit
This commit is contained in:
commit
32d2df0941
2 changed files with 73 additions and 0 deletions
42
oxwebkit/__init__.py
Normal file
42
oxwebkit/__init__.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import sys, thread
|
||||
import gtk
|
||||
import webkit
|
||||
import warnings
|
||||
from time import sleep
|
||||
|
||||
|
||||
class WebView(webkit.WebView):
|
||||
def get_html(self):
|
||||
self.execute_script('oldtitle=document.title;document.title=document.documentElement.innerHTML;')
|
||||
html = self.get_main_frame().get_title()
|
||||
self.execute_script('document.title=oldtitle;')
|
||||
return html
|
||||
|
||||
class Browser(gtk.Window):
|
||||
def __init__(self):
|
||||
gtk.gdk.threads_init()
|
||||
gtk.Window.__init__(self)
|
||||
self.view = WebView()
|
||||
self.view.connect('load-finished', self._finished_loading)
|
||||
self.add(self.view)
|
||||
|
||||
def open(self, url, cb=None):
|
||||
self.view.open(url)
|
||||
self.cb = cb
|
||||
|
||||
def crawl(self):
|
||||
view = WebView()
|
||||
view.open(self._url)
|
||||
view.connect('load-finished', self._finished_loading)
|
||||
gtk.main()
|
||||
|
||||
def _finished_loading(self, view, frame):
|
||||
if self.cb:
|
||||
self.cb(view, frame)
|
||||
|
||||
def quit(self):
|
||||
gtk.main_quit()
|
||||
|
31
setup.py
Normal file
31
setup.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env python
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
# encoding: utf-8
|
||||
from distutils.core import setup
|
||||
|
||||
def get_bzr_version():
|
||||
import os
|
||||
rev = int(os.popen('bzr revno').read())
|
||||
if rev:
|
||||
return u'%s' % rev
|
||||
return u'unknown'
|
||||
|
||||
setup(
|
||||
name="oxwebkit",
|
||||
version="0.0.%s" % get_bzr_version() ,
|
||||
description="python-oxwebkit scrape with webkit",
|
||||
author="0x2620",
|
||||
author_email="0x2620@0x2620.org",
|
||||
url="http://code.0x2620.org/python-ox",
|
||||
download_url="http://code.0x2620.org/python-oxwebkit/download",
|
||||
license="GPLv3",
|
||||
packages=['oxwebkit'],
|
||||
keywords = [
|
||||
],
|
||||
classifiers = [
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||
],
|
||||
)
|
||||
|
Loading…
Reference in a new issue