back to oxlib, package_dir does not work with pip/python setup.py develop
This commit is contained in:
parent
16eeaf8b25
commit
1bd6615f16
27 changed files with 85 additions and 86 deletions
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import readUrlUnicode
|
||||
|
||||
|
||||
def getId(url):
|
|
@ -2,10 +2,10 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
|
||||
import ox.cache
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe, removeSpecialCharacters
|
||||
import oxlib.cache
|
||||
from oxlib.cache import readUrlUnicode
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe, removeSpecialCharacters
|
||||
|
||||
import imdb
|
||||
|
||||
|
@ -32,7 +32,7 @@ def getData(id):
|
|||
try:
|
||||
html = readUrlUnicode(data["url"])
|
||||
except:
|
||||
html = ox.cache.getUrl(data["url"])
|
||||
html = oxlib.cache.getUrl(data["url"])
|
||||
data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
|
||||
data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
|
||||
data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
|
|
@ -2,7 +2,7 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
from urllib import unquote
|
||||
from ox.cache import readUrl
|
||||
from oxlib.cache import readUrl
|
||||
|
||||
|
||||
def getVideoUrl(url):
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import readUrlUnicode
|
||||
|
||||
import google
|
||||
|
|
@ -10,8 +10,8 @@ import Queue
|
|||
import simplejson
|
||||
|
||||
|
||||
import ox
|
||||
from ox import stripTags
|
||||
import oxlib
|
||||
from oxlib import stripTags
|
||||
|
||||
|
||||
'''
|
||||
|
@ -30,8 +30,8 @@ FIXME: how search depper than first page?
|
|||
DEFAULT_MAX_RESULTS = 10
|
||||
DEFAULT_TIMEOUT = 24*60*60
|
||||
|
||||
def readUrl(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
||||
return ox.cache.readUrl(url, data, headers, timeout)
|
||||
def readUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
|
||||
return oxlib.cache.readUrl(url, data, headers, timeout)
|
||||
|
||||
def quote_plus(s):
|
||||
return urllib.quote_plus(s.encode('utf-8'))
|
||||
|
@ -52,6 +52,6 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
|
|||
|
||||
def _find(query):
|
||||
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
|
||||
results = simplejson.loads(ox.cache.readUrlUnicode(url))['responseData']['results']
|
||||
results = simplejson.loads(oxlib.cache.readUrlUnicode(url))['responseData']['results']
|
||||
return results
|
||||
|
|
@ -8,19 +8,19 @@ import time
|
|||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
import chardet
|
||||
import ox
|
||||
from ox import stripTags, decodeHtml, findRe, findString
|
||||
import ox.cache
|
||||
from ox.normalize import normalizeTitle, normalizeImdbId
|
||||
from ox import *
|
||||
import oxlib
|
||||
from oxlib import stripTags, decodeHtml, findRe, findString
|
||||
import oxlib.cache
|
||||
from oxlib.normalize import normalizeTitle, normalizeImdbId
|
||||
from oxlib import *
|
||||
|
||||
import google
|
||||
|
||||
'''
|
||||
never timeout imdb data, to update cache remove data from cache folder
|
||||
'''
|
||||
def readUrlUnicode(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=-1):
|
||||
return ox.cache.readUrlUnicode(url, data, headers, timeout)
|
||||
def readUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
|
||||
return oxlib.cache.readUrlUnicode(url, data, headers, timeout)
|
||||
|
||||
'''
|
||||
check if result is valid while updating
|
||||
|
@ -28,8 +28,8 @@ def validate(result, header):
|
|||
return header['status'] == u'200'
|
||||
|
||||
try:
|
||||
d = ox.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
||||
except ox.cache.InvalidResult, e:
|
||||
d = oxlib.cache.readUrlUnicode(url, data, headers, timeout=0, valid=validate)
|
||||
except oxlib.cache.InvalidResult, e:
|
||||
print e.headers
|
||||
|
||||
'''
|
||||
|
@ -682,10 +682,10 @@ def guess(title, director=''):
|
|||
search = 'site:imdb.com "%s"' % title
|
||||
for (name, url, desc) in google.find(search, 2):
|
||||
if url.startswith('http://www.imdb.com/title/tt'):
|
||||
return normalizeImdbId(int(ox.intValue(url)))
|
||||
return normalizeImdbId(int(oxlib.intValue(url)))
|
||||
|
||||
try:
|
||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||
u = urllib2.urlopen(req)
|
||||
data = u.read()
|
||||
return_url = u.url
|
||||
|
@ -700,7 +700,7 @@ def guess(title, director=''):
|
|||
return imdb_id
|
||||
|
||||
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
||||
req = urllib2.Request(imdb_url, None, ox.net.DEFAULT_HEADERS)
|
||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||
u = urllib2.urlopen(req)
|
||||
data = u.read()
|
||||
return_url = u.url
|
|
@ -2,9 +2,9 @@
|
|||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe
|
||||
from oxlib.cache import readUrlUnicode
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe
|
||||
|
||||
import imdb
|
||||
|
|
@ -3,10 +3,10 @@
|
|||
import re
|
||||
import urllib
|
||||
|
||||
from ox.cache import readUrl
|
||||
from ox.html import decodeHtml, stripTags
|
||||
from ox.text import findRe
|
||||
from ox.text import findString
|
||||
from oxlib.cache import readUrl
|
||||
from oxlib.html import decodeHtml, stripTags
|
||||
from oxlib.text import findRe
|
||||
from oxlib.text import findString
|
||||
|
||||
|
||||
# to sniff itunes traffic, use something like
|
|
@ -1,7 +1,7 @@
|
|||
import re
|
||||
from ox import cache
|
||||
from ox.html import stripTags
|
||||
from ox.text import findRe
|
||||
from oxlib import cache
|
||||
from oxlib.html import stripTags
|
||||
from oxlib.text import findRe
|
||||
|
||||
import auth
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from ox.cache import readUrl
|
||||
from ox.html import decodeHtml
|
||||
from ox.text import findRe
|
||||
from oxlib.cache import readUrl
|
||||
from oxlib.html import decodeHtml
|
||||
from oxlib.text import findRe
|
||||
|
||||
|
||||
def getLyrics(title, artist):
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
from urllib import quote
|
||||
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, decodeHtml, stripTags
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findRe, decodeHtml, stripTags
|
||||
|
||||
|
||||
def getMetacriticShowUrl(title):
|
|
@ -5,10 +5,10 @@ import re
|
|||
import socket
|
||||
from urllib import quote
|
||||
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||
from ox.normalize import normalizeImdbId
|
||||
import ox
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||
from oxlib.normalize import normalizeImdbId
|
||||
import oxlib
|
||||
|
||||
from torrent import Torrent
|
||||
|
||||
|
@ -55,7 +55,7 @@ def getId(mininovaId):
|
|||
|
||||
def exists(mininovaId):
|
||||
mininovaId = getId(mininovaId)
|
||||
data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||
data = oxlib.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||
if not data or 'Torrent not found...' in data:
|
||||
return False
|
||||
if 'tracker</a> of this torrent requires registration.' in data:
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
import re
|
||||
|
||||
from ox.cache import readUrlUnicode
|
||||
from ox import findRe
|
||||
from oxlib.cache import readUrlUnicode
|
||||
from oxlib import findRe
|
||||
|
||||
def getData(id):
|
||||
'''
|
|
@ -3,9 +3,9 @@
|
|||
import re
|
||||
|
||||
import feedparser
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
import ox
|
||||
from ox import langCode2To3, langTo3Code
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findRe, stripTags
|
||||
from oxlib import langCode2To3, langTo3Code
|
||||
|
||||
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||
if len(language) == 2:
|
||||
|
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
|||
if opensubtitleId:
|
||||
opensubtitleId = opensubtitleId[0]
|
||||
else:
|
||||
opensubtitleId = ox.findRe(data, '/en/subtitles/(.*?)/')
|
||||
opensubtitleId = findRe(data, '/en/subtitles/(.*?)/')
|
||||
return opensubtitleId
|
||||
|
||||
def downloadSubtitleById(opensubtitle_id):
|
||||
|
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
|
|||
data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
||||
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
||||
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
||||
name = ox.stripTags(f[1]).split('\n')[0]
|
||||
name = stripTags(f[1]).split('\n')[0]
|
||||
url = "http://www.opensubtitles.com%s" % f[0]
|
||||
srts[name] = readUrlUnicode(url)
|
||||
return srts
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import ox.cache
|
||||
import oxlib.cache
|
||||
|
||||
def getPosterUrl(id):
|
||||
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import ox.cache
|
||||
from ox.cache import exists
|
||||
import oxlib.cache
|
||||
from oxlib.cache import exists
|
||||
|
||||
|
||||
def getPosterUrl(id):
|
|
@ -2,8 +2,8 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
import re
|
||||
|
||||
from ox.cache import getHeaders, readUrl, readUrlUnicode
|
||||
from ox import findRe, stripTags
|
||||
from oxlib.cache import getHeaders, readUrl, readUrlUnicode
|
||||
from oxlib import findRe, stripTags
|
||||
|
||||
|
||||
def readUrlByImdb(imdb):
|
|
@ -6,9 +6,9 @@ import time
|
|||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
import ox.cache
|
||||
from ox.html import decodeHtml, stripTags
|
||||
import ox.net
|
||||
import oxlib.cache
|
||||
from oxlib.html import decodeHtml, stripTags
|
||||
import oxlib.net
|
||||
|
||||
|
||||
def getNews(year, month, day):
|
||||
|
@ -23,9 +23,9 @@ def getNews(year, month, day):
|
|||
for section in sections:
|
||||
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
||||
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
||||
html = ox.net.readUrl(url)
|
||||
html = oxlib.net.readUrl(url)
|
||||
else:
|
||||
html = ox.cache.readUrl(url)
|
||||
html = oxlib.cache.readUrl(url)
|
||||
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
||||
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
||||
try:
|
||||
|
@ -102,11 +102,11 @@ def formatSubsection(string):
|
|||
|
||||
def getIssue(year, week):
|
||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||
if not ox.net.exists(coverUrl):
|
||||
if not oxlib.net.exists(coverUrl):
|
||||
return None
|
||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||
contents = []
|
||||
soup = BeautifulSoup(ox.cache.readUrl(url))
|
||||
soup = BeautifulSoup(oxlib.cache.readUrl(url))
|
||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||
item = str(item)
|
||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||
|
@ -116,7 +116,7 @@ def getIssue(year, week):
|
|||
pages = page + 2
|
||||
for page in range(1, pages + 10):
|
||||
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||
if ox.cache.exists(url):
|
||||
if oxlib.cache.exists(url):
|
||||
pageUrl[page] = url
|
||||
else:
|
||||
pageUrl[page] = ''
|
||||
|
@ -164,7 +164,7 @@ def archiveIssues():
|
|||
f.close()
|
||||
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||
if not os.path.exists(filename):
|
||||
data = ox.cache.readUrl(issue['coverUrl'])
|
||||
data = oxlib.cache.readUrl(issue['coverUrl'])
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
@ -173,7 +173,7 @@ def archiveIssues():
|
|||
if url:
|
||||
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||
if not os.path.exists(filename):
|
||||
data = ox.cache.readUrl(url)
|
||||
data = oxlib.cache.readUrl(url)
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
@ -244,7 +244,7 @@ def archiveNews():
|
|||
f.close()
|
||||
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
||||
if not os.path.exists(filename):
|
||||
data = ox.cache.readUrl(new['imageUrl'])
|
||||
data = oxlib.cache.readUrl(new['imageUrl'])
|
||||
f = open(filename, 'w')
|
||||
f.write(data)
|
||||
f.close()
|
|
@ -6,10 +6,10 @@ import socket
|
|||
from urllib import quote, urlencode
|
||||
from urllib2 import URLError
|
||||
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||
from ox.normalize import normalizeImdbId
|
||||
import ox
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||
from oxlib.normalize import normalizeImdbId
|
||||
import oxlib
|
||||
|
||||
from torrent import Torrent
|
||||
|
||||
|
@ -67,7 +67,7 @@ def getId(piratebayId):
|
|||
|
||||
def exists(piratebayId):
|
||||
piratebayId = getId(piratebayId)
|
||||
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
||||
return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
|
||||
|
||||
def getData(piratebayId):
|
||||
_key_map = {
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from ox import intValue
|
||||
from oxlib import intValue
|
||||
|
||||
|
||||
class Torrent(dict):
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from ox import stripTags, findRe
|
||||
from ox.cache import readUrlUnicode
|
||||
from oxlib import stripTags, findRe
|
||||
from oxlib.cache import readUrlUnicode
|
||||
|
||||
|
||||
def getEpisodeData(url):
|
|
@ -3,8 +3,8 @@
|
|||
from urllib import urlencode
|
||||
|
||||
import simplejson
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findRe, decodeHtml
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findRe, decodeHtml
|
||||
|
||||
|
||||
def getId(url):
|
|
@ -6,8 +6,8 @@ import xml.etree.ElementTree as ET
|
|||
import re
|
||||
|
||||
import feedparser
|
||||
from ox.cache import readUrl, readUrlUnicode
|
||||
from ox import findString, findRe
|
||||
from oxlib.cache import readUrl, readUrlUnicode
|
||||
from oxlib import findString, findRe
|
||||
|
||||
|
||||
def getVideoKey(youtubeId):
|
|
@ -1 +1 @@
|
|||
ox
|
||||
oxlib
|
||||
|
|
5
setup.py
5
setup.py
|
@ -16,11 +16,10 @@ setup(
|
|||
description="collection of scrapers for the web",
|
||||
author="0x",
|
||||
author_email="code@0xdb.org",
|
||||
url="http://code.0xdb.org/oxweb",
|
||||
url="http://code.0xdb.org/python-oxweb",
|
||||
download_url="http://code.0xdb.org/oxweb/download",
|
||||
license="GPLv3",
|
||||
package_dir = {'ox.web': 'web'},
|
||||
packages=['ox.web'],
|
||||
packages=['oxweb'],
|
||||
keywords = [
|
||||
],
|
||||
classifiers = [
|
||||
|
|
Loading…
Reference in a new issue