rename oxutils -> oxlib

This commit is contained in:
j 2008-07-03 11:24:49 +02:00
parent 6a16a0af30
commit 68f7621ac7
15 changed files with 59 additions and 59 deletions

View file

@ -2,9 +2,9 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags
from oxutils.text import findRe, removeSpecialCharacters
from oxlib.cache import getUrlUnicode
from oxlib.html import stripTags
from oxlib.text import findRe, removeSpecialCharacters
import imdb

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4
import re
from urllib import unquote
from oxutils.cache import getUrl
from oxlib.cache import getUrl
def getVideoUrl(url):

View file

@ -8,8 +8,8 @@ import weakref
import threading
import Queue
import oxutils
from oxutils import stripTags
import oxlib
from oxlib import stripTags
'''
@ -27,9 +27,9 @@ FIXME: how search depper than first page?
'''
DEFAULT_MAX_RESULTS = 10
def getUrl(url, data=None, headers=oxutils.net.DEFAULT_HEADERS):
def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS):
google_timeout=24*60*60
return oxutils.cache.getUrl(url, data, headers, google_timeout)
return oxlib.cache.getUrl(url, data, headers, google_timeout)
def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8'))

View file

@ -8,11 +8,11 @@ import time
from BeautifulSoup import BeautifulSoup
import chardet
import oxutils
from oxutils import stripTags, decodeHtml, findRe, findString
from oxutils.cache import getUrl, getUrlUnicode
from oxutils.normalize import normalizeTitle, normalizeImdbId
from oxutils import *
import oxlib
from oxlib import stripTags, decodeHtml, findRe, findString
from oxlib.cache import getUrl, getUrlUnicode
from oxlib.normalize import normalizeTitle, normalizeImdbId
from oxlib import *
import google
@ -614,7 +614,7 @@ def guess(title, director=''):
return url[28:35]
try:
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS)
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req)
data = u.read()
return_url = u.url
@ -629,7 +629,7 @@ def guess(title, director=''):
return imdb_id
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS)
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req)
data = u.read()
return_url = u.url

View file

@ -2,9 +2,9 @@
# encoding: utf-8
import re
from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags
from oxutils.text import findRe
from oxlib.cache import getUrlUnicode
from oxlib.html import stripTags
from oxlib.text import findRe
import imdb
@ -61,7 +61,7 @@ def parsePosterPage(html, year):
def archivePosters():
import os
from oxutils.net import getUrl
from oxlib.net import getUrl
pathname = '/Volumes/Rolux Home/Desktop/Data/impawards.com'
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
pages = int(findRe(html, '<a href = page(.*?).html>'))

View file

@ -3,10 +3,10 @@
import re
import urllib
from oxutils.cache import getUrl
from oxutils.html import decodeHtml, stripTags
from oxutils.text import findRe
from oxutils.text import findString
from oxlib.cache import getUrl
from oxlib.html import decodeHtml, stripTags
from oxlib.text import findRe
from oxlib.text import findString
# to sniff itunes traffic, use something like

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from oxutils.cache import getUrl
from oxutils.html import decodeHtml
from oxutils.text import findRe
from oxlib.cache import getUrl
from oxlib.html import decodeHtml
from oxlib.text import findRe
def getLyrics(title, artist):

View file

@ -5,10 +5,10 @@ import re
import socket
from urllib import quote
from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId
import oxutils
from oxlib.cache import getUrl, getUrlUnicode
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxlib.normalize import normalizeImdbId
import oxlib
from torrent import Torrent
@ -55,7 +55,7 @@ def getId(mininovaId):
def exists(mininovaId):
mininovaId = getId(mininovaId)
data = oxutils.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
if not data or 'Torrent not found...' in data:
return False
if 'tracker</a> of this torrent requires registration.' in data:

View file

@ -3,9 +3,9 @@
import re
import feedparser
from oxutils.cache import getUrl, getUrlUnicode
import oxutils
from oxutils.lang import langCode2To3, langTo3Code
from oxlib.cache import getUrl, getUrlUnicode
import oxlib
from oxlib.lang import langCode2To3, langTo3Code
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if len(language) == 2:
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if opensubtitleId:
opensubtitleId = opensubtitleId[0]
else:
opensubtitleId = oxutils.findRe(data, '/en/subtitles/(.*?)/')
opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
return opensubtitleId
def downloadSubtitleById(opensubtitle_id):
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
for f in re.compile(reg_exp, re.DOTALL).findall(data):
name = oxutils.stripTags(f[1]).split('\n')[0]
name = oxlib.stripTags(f[1]).split('\n')[0]
url = "http://www.opensubtitles.com%s" % f[0]
srts[name] = getUrlUnicode(url)
return srts

View file

@ -6,9 +6,9 @@ import time
from BeautifulSoup import BeautifulSoup
import oxutils.cache
from oxutils.html import decodeHtml, stripTags
import oxutils.net
import oxlib.cache
from oxlib.html import decodeHtml, stripTags
import oxlib.net
def getNews(year, month, day):
@ -23,9 +23,9 @@ def getNews(year, month, day):
for section in sections:
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
if date == time.strftime('%d.%m.%Y', time.localtime()):
html = oxutils.net.getUrl(url)
html = oxlib.net.getUrl(url)
else:
html = oxutils.cache.getUrl(url)
html = oxlib.cache.getUrl(url)
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
try:
@ -102,11 +102,11 @@ def formatSubsection(string):
def getIssue(year, week):
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
if not oxutils.net.exists(coverUrl):
if not oxlib.net.exists(coverUrl):
return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = []
soup = BeautifulSoup(oxutils.cache.getUrl(url))
soup = BeautifulSoup(oxlib.cache.getUrl(url))
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
item = str(item)
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
@ -116,7 +116,7 @@ def getIssue(year, week):
pages = page + 2
for page in range(1, pages + 10):
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
if oxutils.cache.exists(url):
if oxlib.cache.exists(url):
pageUrl[page] = url
else:
pageUrl[page] = ''
@ -164,7 +164,7 @@ def archiveIssues():
f.close()
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
if not os.path.exists(filename):
data = oxutils.cache.getUrl(issue['coverUrl'])
data = oxlib.cache.getUrl(issue['coverUrl'])
f = open(filename, 'w')
f.write(data)
f.close()
@ -173,7 +173,7 @@ def archiveIssues():
if url:
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
if not os.path.exists(filename):
data = oxutils.cache.getUrl(url)
data = oxlib.cache.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
@ -244,7 +244,7 @@ def archiveNews():
f.close()
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
if not os.path.exists(filename):
data = oxutils.cache.getUrl(new['imageUrl'])
data = oxlib.cache.getUrl(new['imageUrl'])
f = open(filename, 'w')
f.write(data)
f.close()

View file

@ -6,10 +6,10 @@ import socket
from urllib import quote, urlencode
from urllib2 import URLError
from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxutils.normalize import normalizeImdbId
import oxutils
from oxlib.cache import getUrl, getUrlUnicode
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxlib.normalize import normalizeImdbId
import oxlib
from torrent import Torrent
@ -63,7 +63,7 @@ def getId(piratebayId):
def exists(piratebayId):
piratebayId = getId(piratebayId)
return oxutils.net.exists("http://thepiratebay.org/tor/%s" % piratebayId)
return oxlib.net.exists("http://thepiratebay.org/tor/%s" % piratebayId)
def getData(piratebayId):
_key_map = {

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from oxutils import intValue
from oxlib import intValue
class Torrent(dict):

View file

@ -3,8 +3,8 @@
from urllib import urlencode
import simplejson
from oxutils.cache import getUrl, getUrlUnicode
from oxutils import findRe, decodeHtml
from oxlib.cache import getUrl, getUrlUnicode
from oxlib import findRe, decodeHtml
def getMovieId(title, director='', year=''):

View file

@ -4,8 +4,8 @@ from urllib import quote
import xml.etree.ElementTree as ET
import feedparser
from oxutils.cache import getUrl
from oxutils import findString
from oxlib.cache import getUrl
from oxlib import findString
def getVideoUrl(youtubeId, format='mp4'):

View file

@ -11,8 +11,8 @@ setup(
description="collection of scrapers for various websites",
author="0x",
author_email="code@0xdb.org",
url="http://code.0xdb.org/ox",
download_url="http://code.0xdb.org/ox/download",
url="http://code.0xdb.org/oxweb",
download_url="http://code.0xdb.org/oxweb/download",
license="GPLv3",
packages=find_packages(),
zip_safe=False,