back to oxlib, package_dir does not work with pip/python setup.py develop

This commit is contained in:
j 2009-10-12 17:18:59 +02:00
parent 16eeaf8b25
commit 1bd6615f16
27 changed files with 85 additions and 86 deletions

View file

@ -3,8 +3,8 @@
import re
import time
from ox import stripTags, findRe
from ox.cache import readUrlUnicode
from oxlib import stripTags, findRe
from oxlib.cache import readUrlUnicode
def getId(url):

View file

@ -2,10 +2,10 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
import ox.cache
from ox.cache import readUrlUnicode
from ox.html import stripTags
from ox.text import findRe, removeSpecialCharacters
import oxlib.cache
from oxlib.cache import readUrlUnicode
from oxlib.html import stripTags
from oxlib.text import findRe, removeSpecialCharacters
import imdb
@ -32,7 +32,7 @@ def getData(id):
try:
html = readUrlUnicode(data["url"])
except:
html = ox.cache.getUrl(data["url"])
html = oxlib.cache.getUrl(data["url"])
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from urllib import unquote
from ox.cache import readUrl
from oxlib.cache import readUrl
def getVideoUrl(url):

View file

@ -3,8 +3,8 @@
import re
import time
from ox import stripTags, findRe
from ox.cache import readUrlUnicode
from oxlib import stripTags, findRe
from oxlib.cache import readUrlUnicode
import google

View file

@ -10,8 +10,8 @@ import Queue
import simplejson
import ox
from ox import stripTags
import oxlib
from oxlib import stripTags
'''
@ -30,8 +30,8 @@ FIXME: how search depper than first page?
DEFAULT_MAX_RESULTS = 10
DEFAULT_TIMEOUT = 24*60*60
def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
return ox.cache.readUrl(url, data, headers, timeout)
def readUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
return oxlib.cache.readUrl(url, data, headers, timeout)
def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8'))
@ -52,6 +52,6 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
def _find(query):
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
results = simplejson.loads(ox.cache.readUrlUnicode(url))['responseData']['results']
results = simplejson.loads(oxlib.cache.readUrlUnicode(url))['responseData']['results']
return results

View file

@ -8,19 +8,19 @@ import time
from BeautifulSoup import BeautifulSoup
import chardet
import ox
from ox import stripTags, decodeHtml, findRe, findString
import ox.cache
from ox.normalize import normalizeTitle, normalizeImdbId
from ox import *
import oxlib
from oxlib import stripTags, decodeHtml, findRe, findString
import oxlib.cache
from oxlib.normalize import normalizeTitle, normalizeImdbId
from oxlib import *
import google
'''
never timeout imdb data, to update cache remove data from cache folder
'''
def readUrlUnicode(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=-1):
return ox.cache.readUrlUnicode(url, data, headers, timeout)
def readUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
return oxlib.cache.readUrlUnicode(url, data, headers, timeout)
'''
check if result is valid while updating
@ -28,8 +28,8 @@ def validate(result, header):
return header['status'] == u'200'
try:
d = ox.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
except ox.cache.InvalidResult, e:
d = oxlib.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
except oxlib.cache.InvalidResult, e:
print e.headers
'''
@ -682,10 +682,10 @@ def guess(title, director=''):
search = 'site:imdb.com "%s"' % title
for (name, url, desc) in google.find(search, 2):
if url.startswith('http://www.imdb.com/title/tt'):
return normalizeImdbId(int(ox.intValue(url)))
return normalizeImdbId(int(oxlib.intValue(url)))
try:
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req)
data = u.read()
return_url = u.url
@ -700,7 +700,7 @@ def guess(title, director=''):
return imdb_id
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req)
data = u.read()
return_url = u.url

View file

@ -2,9 +2,9 @@
# encoding: utf-8
import re
from ox.cache import readUrlUnicode
from ox.html import stripTags
from ox.text import findRe
from oxlib.cache import readUrlUnicode
from oxlib.html import stripTags
from oxlib.text import findRe
import imdb

View file

@ -3,10 +3,10 @@
import re
import urllib
from ox.cache import readUrl
from ox.html import decodeHtml, stripTags
from ox.text import findRe
from ox.text import findString
from oxlib.cache import readUrl
from oxlib.html import decodeHtml, stripTags
from oxlib.text import findRe
from oxlib.text import findString
# to sniff itunes traffic, use something like

View file

@ -1,7 +1,7 @@
import re
from ox import cache
from ox.html import stripTags
from ox.text import findRe
from oxlib import cache
from oxlib.html import stripTags
from oxlib.text import findRe
import auth

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from ox.cache import readUrl
from ox.html import decodeHtml
from ox.text import findRe
from oxlib.cache import readUrl
from oxlib.html import decodeHtml
from oxlib.text import findRe
def getLyrics(title, artist):

View file

@ -3,8 +3,8 @@
import re
from urllib import quote
from ox.cache import readUrl, readUrlUnicode
from ox import findRe, decodeHtml, stripTags
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findRe, decodeHtml, stripTags
def getMetacriticShowUrl(title):

View file

@ -5,10 +5,10 @@ import re
import socket
from urllib import quote
from ox.cache import readUrl, readUrlUnicode
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from ox.normalize import normalizeImdbId
import ox
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxlib.normalize import normalizeImdbId
import oxlib
from torrent import Torrent
@ -55,7 +55,7 @@ def getId(mininovaId):
def exists(mininovaId):
mininovaId = getId(mininovaId)
data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
data = oxlib.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
if not data or 'Torrent not found...' in data:
return False
if 'tracker</a> of this torrent requires registration.' in data:

View file

@ -3,8 +3,8 @@
import re
from ox.cache import readUrlUnicode
from ox import findRe
from oxlib.cache import readUrlUnicode
from oxlib import findRe
def getData(id):
'''

View file

@ -3,9 +3,9 @@
import re
import feedparser
from ox.cache import readUrl, readUrlUnicode
import ox
from ox import langCode2To3, langTo3Code
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findRe, stripTags
from oxlib import langCode2To3, langTo3Code
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if len(language) == 2:
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if opensubtitleId:
opensubtitleId = opensubtitleId[0]
else:
opensubtitleId = ox.findRe(data, '/en/subtitles/(.*?)/')
opensubtitleId = findRe(data, '/en/subtitles/(.*?)/')
return opensubtitleId
def downloadSubtitleById(opensubtitle_id):
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
for f in re.compile(reg_exp, re.DOTALL).findall(data):
name = ox.stripTags(f[1]).split('\n')[0]
name = stripTags(f[1]).split('\n')[0]
url = "http://www.opensubtitles.com%s" % f[0]
srts[name] = readUrlUnicode(url)
return srts

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import ox.cache
import oxlib.cache
def getPosterUrl(id):
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import ox.cache
from ox.cache import exists
import oxlib.cache
from oxlib.cache import exists
def getPosterUrl(id):

View file

@ -2,8 +2,8 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from ox.cache import getHeaders, readUrl, readUrlUnicode
from ox import findRe, stripTags
from oxlib.cache import getHeaders, readUrl, readUrlUnicode
from oxlib import findRe, stripTags
def readUrlByImdb(imdb):

View file

@ -6,9 +6,9 @@ import time
from BeautifulSoup import BeautifulSoup
import ox.cache
from ox.html import decodeHtml, stripTags
import ox.net
import oxlib.cache
from oxlib.html import decodeHtml, stripTags
import oxlib.net
def getNews(year, month, day):
@ -23,9 +23,9 @@ def getNews(year, month, day):
for section in sections:
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
if date == time.strftime('%d.%m.%Y', time.localtime()):
html = ox.net.readUrl(url)
html = oxlib.net.readUrl(url)
else:
html = ox.cache.readUrl(url)
html = oxlib.cache.readUrl(url)
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
try:
@ -102,11 +102,11 @@ def formatSubsection(string):
def getIssue(year, week):
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
if not ox.net.exists(coverUrl):
if not oxlib.net.exists(coverUrl):
return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = []
soup = BeautifulSoup(ox.cache.readUrl(url))
soup = BeautifulSoup(oxlib.cache.readUrl(url))
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
item = str(item)
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
@ -116,7 +116,7 @@ def getIssue(year, week):
pages = page + 2
for page in range(1, pages + 10):
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
if ox.cache.exists(url):
if oxlib.cache.exists(url):
pageUrl[page] = url
else:
pageUrl[page] = ''
@ -164,7 +164,7 @@ def archiveIssues():
f.close()
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
if not os.path.exists(filename):
data = ox.cache.readUrl(issue['coverUrl'])
data = oxlib.cache.readUrl(issue['coverUrl'])
f = open(filename, 'w')
f.write(data)
f.close()
@ -173,7 +173,7 @@ def archiveIssues():
if url:
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
if not os.path.exists(filename):
data = ox.cache.readUrl(url)
data = oxlib.cache.readUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
@ -244,7 +244,7 @@ def archiveNews():
f.close()
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
if not os.path.exists(filename):
data = ox.cache.readUrl(new['imageUrl'])
data = oxlib.cache.readUrl(new['imageUrl'])
f = open(filename, 'w')
f.write(data)
f.close()

View file

@ -6,10 +6,10 @@ import socket
from urllib import quote, urlencode
from urllib2 import URLError
from ox.cache import readUrl, readUrlUnicode
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from ox.normalize import normalizeImdbId
import ox
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxlib.normalize import normalizeImdbId
import oxlib
from torrent import Torrent
@ -67,7 +67,7 @@ def getId(piratebayId):
def exists(piratebayId):
piratebayId = getId(piratebayId)
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
def getData(piratebayId):
_key_map = {

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from ox import intValue
from oxlib import intValue
class Torrent(dict):

View file

@ -3,8 +3,8 @@
import re
import time
from ox import stripTags, findRe
from ox.cache import readUrlUnicode
from oxlib import stripTags, findRe
from oxlib.cache import readUrlUnicode
def getEpisodeData(url):

View file

@ -3,8 +3,8 @@
from urllib import urlencode
import simplejson
from ox.cache import readUrl, readUrlUnicode
from ox import findRe, decodeHtml
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findRe, decodeHtml
def getId(url):

View file

@ -6,8 +6,8 @@ import xml.etree.ElementTree as ET
import re
import feedparser
from ox.cache import readUrl, readUrlUnicode
from ox import findString, findRe
from oxlib.cache import readUrl, readUrlUnicode
from oxlib import findString, findRe
def getVideoKey(youtubeId):

View file

@ -1 +1 @@
ox
oxlib

View file

@ -16,11 +16,10 @@ setup(
description="collection of scrapers for the web",
author="0x",
author_email="code@0xdb.org",
url="http://code.0xdb.org/oxweb",
url="http://code.0xdb.org/python-oxweb",
download_url="http://code.0xdb.org/oxweb/download",
license="GPLv3",
package_dir = {'ox.web': 'web'},
packages=['ox.web'],
packages=['oxweb'],
keywords = [
],
classifiers = [