back to oxlib, package_dir does not work with pip/python setup.py develop

This commit is contained in:
j 2009-10-12 17:18:59 +02:00
parent 16eeaf8b25
commit 1bd6615f16
27 changed files with 85 additions and 86 deletions

View File

@ -3,8 +3,8 @@
import re import re
import time import time
from ox import stripTags, findRe from oxlib import stripTags, findRe
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
def getId(url): def getId(url):

View File

@ -2,10 +2,10 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
import ox.cache import oxlib.cache
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
from ox.html import stripTags from oxlib.html import stripTags
from ox.text import findRe, removeSpecialCharacters from oxlib.text import findRe, removeSpecialCharacters
import imdb import imdb
@ -32,7 +32,7 @@ def getData(id):
try: try:
html = readUrlUnicode(data["url"]) html = readUrlUnicode(data["url"])
except: except:
html = ox.cache.getUrl(data["url"]) html = oxlib.cache.getUrl(data["url"])
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>") data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>") data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>") data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")

View File

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
from urllib import unquote from urllib import unquote
from ox.cache import readUrl from oxlib.cache import readUrl
def getVideoUrl(url): def getVideoUrl(url):

View File

@ -3,8 +3,8 @@
import re import re
import time import time
from ox import stripTags, findRe from oxlib import stripTags, findRe
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
import google import google

View File

@ -10,8 +10,8 @@ import Queue
import simplejson import simplejson
import ox import oxlib
from ox import stripTags from oxlib import stripTags
''' '''
@ -30,8 +30,8 @@ FIXME: how search depper than first page?
DEFAULT_MAX_RESULTS = 10 DEFAULT_MAX_RESULTS = 10
DEFAULT_TIMEOUT = 24*60*60 DEFAULT_TIMEOUT = 24*60*60
def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT): def readUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
return ox.cache.readUrl(url, data, headers, timeout) return oxlib.cache.readUrl(url, data, headers, timeout)
def quote_plus(s): def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8')) return urllib.quote_plus(s.encode('utf-8'))
@ -52,6 +52,6 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
def _find(query): def _find(query):
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query) url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
results = simplejson.loads(ox.cache.readUrlUnicode(url))['responseData']['results'] results = simplejson.loads(oxlib.cache.readUrlUnicode(url))['responseData']['results']
return results return results

View File

@ -8,19 +8,19 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import chardet import chardet
import ox import oxlib
from ox import stripTags, decodeHtml, findRe, findString from oxlib import stripTags, decodeHtml, findRe, findString
import ox.cache import oxlib.cache
from ox.normalize import normalizeTitle, normalizeImdbId from oxlib.normalize import normalizeTitle, normalizeImdbId
from ox import * from oxlib import *
import google import google
''' '''
never timeout imdb data, to update cache remove data from cache folder never timeout imdb data, to update cache remove data from cache folder
''' '''
def readUrlUnicode(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=-1): def readUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
return ox.cache.readUrlUnicode(url, data, headers, timeout) return oxlib.cache.readUrlUnicode(url, data, headers, timeout)
''' '''
check if result is valid while updating check if result is valid while updating
@ -28,8 +28,8 @@ def validate(result, header):
return header['status'] == u'200' return header['status'] == u'200'
try: try:
d = ox.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate) d = oxlib.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
except ox.cache.InvalidResult, e: except oxlib.cache.InvalidResult, e:
print e.headers print e.headers
''' '''
@ -682,10 +682,10 @@ def guess(title, director=''):
search = 'site:imdb.com "%s"' % title search = 'site:imdb.com "%s"' % title
for (name, url, desc) in google.find(search, 2): for (name, url, desc) in google.find(search, 2):
if url.startswith('http://www.imdb.com/title/tt'): if url.startswith('http://www.imdb.com/title/tt'):
return normalizeImdbId(int(ox.intValue(url))) return normalizeImdbId(int(oxlib.intValue(url)))
try: try:
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS) req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req) u = urllib2.urlopen(req)
data = u.read() data = u.read()
return_url = u.url return_url = u.url
@ -700,7 +700,7 @@ def guess(title, director=''):
return imdb_id return imdb_id
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8')) imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS) req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req) u = urllib2.urlopen(req)
data = u.read() data = u.read()
return_url = u.url return_url = u.url

View File

@ -2,9 +2,9 @@
# encoding: utf-8 # encoding: utf-8
import re import re
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
from ox.html import stripTags from oxlib.html import stripTags
from ox.text import findRe from oxlib.text import findRe
import imdb import imdb

View File

@ -3,10 +3,10 @@
import re import re
import urllib import urllib
from ox.cache import readUrl from oxlib.cache import readUrl
from ox.html import decodeHtml, stripTags from oxlib.html import decodeHtml, stripTags
from ox.text import findRe from oxlib.text import findRe
from ox.text import findString from oxlib.text import findString
# to sniff itunes traffic, use something like # to sniff itunes traffic, use something like

View File

@ -1,7 +1,7 @@
import re import re
from ox import cache from oxlib import cache
from ox.html import stripTags from oxlib.html import stripTags
from ox.text import findRe from oxlib.text import findRe
import auth import auth

View File

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from ox.cache import readUrl from oxlib.cache import readUrl
from ox.html import decodeHtml from oxlib.html import decodeHtml
from ox.text import findRe from oxlib.text import findRe
def getLyrics(title, artist): def getLyrics(title, artist):

View File

@ -3,8 +3,8 @@
import re import re
from urllib import quote from urllib import quote
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
from ox import findRe, decodeHtml, stripTags from oxlib import findRe, decodeHtml, stripTags
def getMetacriticShowUrl(title): def getMetacriticShowUrl(title):

View File

@ -5,10 +5,10 @@ import re
import socket import socket
from urllib import quote from urllib import quote
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from ox.normalize import normalizeImdbId from oxlib.normalize import normalizeImdbId
import ox import oxlib
from torrent import Torrent from torrent import Torrent
@ -55,7 +55,7 @@ def getId(mininovaId):
def exists(mininovaId): def exists(mininovaId):
mininovaId = getId(mininovaId) mininovaId = getId(mininovaId)
data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId) data = oxlib.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
if not data or 'Torrent not found...' in data: if not data or 'Torrent not found...' in data:
return False return False
if 'tracker</a> of this torrent requires registration.' in data: if 'tracker</a> of this torrent requires registration.' in data:

View File

@ -3,8 +3,8 @@
import re import re
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
from ox import findRe from oxlib import findRe
def getData(id): def getData(id):
''' '''

View File

@ -3,9 +3,9 @@
import re import re
import feedparser import feedparser
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
import ox from oxlib import findRe, stripTags
from ox import langCode2To3, langTo3Code from oxlib import langCode2To3, langTo3Code
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"): def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if len(language) == 2: if len(language) == 2:
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if opensubtitleId: if opensubtitleId:
opensubtitleId = opensubtitleId[0] opensubtitleId = opensubtitleId[0]
else: else:
opensubtitleId = ox.findRe(data, '/en/subtitles/(.*?)/') opensubtitleId = findRe(data, '/en/subtitles/(.*?)/')
return opensubtitleId return opensubtitleId
def downloadSubtitleById(opensubtitle_id): def downloadSubtitleById(opensubtitle_id):
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id) data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>' reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
for f in re.compile(reg_exp, re.DOTALL).findall(data): for f in re.compile(reg_exp, re.DOTALL).findall(data):
name = ox.stripTags(f[1]).split('\n')[0] name = stripTags(f[1]).split('\n')[0]
url = "http://www.opensubtitles.com%s" % f[0] url = "http://www.opensubtitles.com%s" % f[0]
srts[name] = readUrlUnicode(url) srts[name] = readUrlUnicode(url)
return srts return srts

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import ox.cache import oxlib.cache
def getPosterUrl(id): def getPosterUrl(id):
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id url = "http://0xdb.org/%s/poster.0xdb.jpg" % id

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import ox.cache import oxlib.cache
from ox.cache import exists from oxlib.cache import exists
def getPosterUrl(id): def getPosterUrl(id):

View File

@ -2,8 +2,8 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
from ox.cache import getHeaders, readUrl, readUrlUnicode from oxlib.cache import getHeaders, readUrl, readUrlUnicode
from ox import findRe, stripTags from oxlib import findRe, stripTags
def readUrlByImdb(imdb): def readUrlByImdb(imdb):

View File

@ -6,9 +6,9 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import ox.cache import oxlib.cache
from ox.html import decodeHtml, stripTags from oxlib.html import decodeHtml, stripTags
import ox.net import oxlib.net
def getNews(year, month, day): def getNews(year, month, day):
@ -23,9 +23,9 @@ def getNews(year, month, day):
for section in sections: for section in sections:
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day) url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
if date == time.strftime('%d.%m.%Y', time.localtime()): if date == time.strftime('%d.%m.%Y', time.localtime()):
html = ox.net.readUrl(url) html = oxlib.net.readUrl(url)
else: else:
html = ox.cache.readUrl(url) html = oxlib.cache.readUrl(url)
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html): for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip() dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
try: try:
@ -102,11 +102,11 @@ def formatSubsection(string):
def getIssue(year, week): def getIssue(year, week):
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week) coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
if not ox.net.exists(coverUrl): if not oxlib.net.exists(coverUrl):
return None return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = [] contents = []
soup = BeautifulSoup(ox.cache.readUrl(url)) soup = BeautifulSoup(oxlib.cache.readUrl(url))
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}): for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
item = str(item) item = str(item)
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0]) page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
@ -116,7 +116,7 @@ def getIssue(year, week):
pages = page + 2 pages = page + 2
for page in range(1, pages + 10): for page in range(1, pages + 10):
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page) url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
if ox.cache.exists(url): if oxlib.cache.exists(url):
pageUrl[page] = url pageUrl[page] = url
else: else:
pageUrl[page] = '' pageUrl[page] = ''
@ -164,7 +164,7 @@ def archiveIssues():
f.close() f.close()
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w) filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
if not os.path.exists(filename): if not os.path.exists(filename):
data = ox.cache.readUrl(issue['coverUrl']) data = oxlib.cache.readUrl(issue['coverUrl'])
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()
@ -173,7 +173,7 @@ def archiveIssues():
if url: if url:
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page) filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
if not os.path.exists(filename): if not os.path.exists(filename):
data = ox.cache.readUrl(url) data = oxlib.cache.readUrl(url)
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()
@ -244,7 +244,7 @@ def archiveNews():
f.close() f.close()
filename = dirname + '/' + new['imageUrl'].split('/')[-1] filename = dirname + '/' + new['imageUrl'].split('/')[-1]
if not os.path.exists(filename): if not os.path.exists(filename):
data = ox.cache.readUrl(new['imageUrl']) data = oxlib.cache.readUrl(new['imageUrl'])
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()

View File

@ -6,10 +6,10 @@ import socket
from urllib import quote, urlencode from urllib import quote, urlencode
from urllib2 import URLError from urllib2 import URLError
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from ox.normalize import normalizeImdbId from oxlib.normalize import normalizeImdbId
import ox import oxlib
from torrent import Torrent from torrent import Torrent
@ -67,7 +67,7 @@ def getId(piratebayId):
def exists(piratebayId): def exists(piratebayId):
piratebayId = getId(piratebayId) piratebayId = getId(piratebayId)
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId) return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
def getData(piratebayId): def getData(piratebayId):
_key_map = { _key_map = {

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from ox import intValue from oxlib import intValue
class Torrent(dict): class Torrent(dict):

View File

@ -3,8 +3,8 @@
import re import re
import time import time
from ox import stripTags, findRe from oxlib import stripTags, findRe
from ox.cache import readUrlUnicode from oxlib.cache import readUrlUnicode
def getEpisodeData(url): def getEpisodeData(url):

View File

@ -3,8 +3,8 @@
from urllib import urlencode from urllib import urlencode
import simplejson import simplejson
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
from ox import findRe, decodeHtml from oxlib import findRe, decodeHtml
def getId(url): def getId(url):

View File

@ -6,8 +6,8 @@ import xml.etree.ElementTree as ET
import re import re
import feedparser import feedparser
from ox.cache import readUrl, readUrlUnicode from oxlib.cache import readUrl, readUrlUnicode
from ox import findString, findRe from oxlib import findString, findRe
def getVideoKey(youtubeId): def getVideoKey(youtubeId):

View File

@ -1 +1 @@
ox oxlib

View File

@ -16,11 +16,10 @@ setup(
description="collection of scrapers for the web", description="collection of scrapers for the web",
author="0x", author="0x",
author_email="code@0xdb.org", author_email="code@0xdb.org",
url="http://code.0xdb.org/oxweb", url="http://code.0xdb.org/python-oxweb",
download_url="http://code.0xdb.org/oxweb/download", download_url="http://code.0xdb.org/oxweb/download",
license="GPLv3", license="GPLv3",
package_dir = {'ox.web': 'web'}, packages=['oxweb'],
packages=['ox.web'],
keywords = [ keywords = [
], ],
classifiers = [ classifiers = [