rename oxutils -> oxlib

This commit is contained in:
j 2008-07-03 11:24:49 +02:00
parent 6a16a0af30
commit 68f7621ac7
15 changed files with 59 additions and 59 deletions

View file

@ -2,9 +2,9 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
from oxutils.cache import getUrlUnicode from oxlib.cache import getUrlUnicode
from oxutils.html import stripTags from oxlib.html import stripTags
from oxutils.text import findRe, removeSpecialCharacters from oxlib.text import findRe, removeSpecialCharacters
import imdb import imdb

View file

@ -2,7 +2,7 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
from urllib import unquote from urllib import unquote
from oxutils.cache import getUrl from oxlib.cache import getUrl
def getVideoUrl(url): def getVideoUrl(url):

View file

@ -8,8 +8,8 @@ import weakref
import threading import threading
import Queue import Queue
import oxutils import oxlib
from oxutils import stripTags from oxlib import stripTags
''' '''
@ -27,9 +27,9 @@ FIXME: how search depper than first page?
''' '''
DEFAULT_MAX_RESULTS = 10 DEFAULT_MAX_RESULTS = 10
def getUrl(url, data=None, headers=oxutils.net.DEFAULT_HEADERS): def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS):
google_timeout=24*60*60 google_timeout=24*60*60
return oxutils.cache.getUrl(url, data, headers, google_timeout) return oxlib.cache.getUrl(url, data, headers, google_timeout)
def quote_plus(s): def quote_plus(s):
return urllib.quote_plus(s.encode('utf-8')) return urllib.quote_plus(s.encode('utf-8'))

View file

@ -8,11 +8,11 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import chardet import chardet
import oxutils import oxlib
from oxutils import stripTags, decodeHtml, findRe, findString from oxlib import stripTags, decodeHtml, findRe, findString
from oxutils.cache import getUrl, getUrlUnicode from oxlib.cache import getUrl, getUrlUnicode
from oxutils.normalize import normalizeTitle, normalizeImdbId from oxlib.normalize import normalizeTitle, normalizeImdbId
from oxutils import * from oxlib import *
import google import google
@ -614,7 +614,7 @@ def guess(title, director=''):
return url[28:35] return url[28:35]
try: try:
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS) req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req) u = urllib2.urlopen(req)
data = u.read() data = u.read()
return_url = u.url return_url = u.url
@ -629,7 +629,7 @@ def guess(title, director=''):
return imdb_id return imdb_id
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8')) imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS) req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
u = urllib2.urlopen(req) u = urllib2.urlopen(req)
data = u.read() data = u.read()
return_url = u.url return_url = u.url

View file

@ -2,9 +2,9 @@
# encoding: utf-8 # encoding: utf-8
import re import re
from oxutils.cache import getUrlUnicode from oxlib.cache import getUrlUnicode
from oxutils.html import stripTags from oxlib.html import stripTags
from oxutils.text import findRe from oxlib.text import findRe
import imdb import imdb
@ -61,7 +61,7 @@ def parsePosterPage(html, year):
def archivePosters(): def archivePosters():
import os import os
from oxutils.net import getUrl from oxlib.net import getUrl
pathname = '/Volumes/Rolux Home/Desktop/Data/impawards.com' pathname = '/Volumes/Rolux Home/Desktop/Data/impawards.com'
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0) html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
pages = int(findRe(html, '<a href = page(.*?).html>')) pages = int(findRe(html, '<a href = page(.*?).html>'))

View file

@ -3,10 +3,10 @@
import re import re
import urllib import urllib
from oxutils.cache import getUrl from oxlib.cache import getUrl
from oxutils.html import decodeHtml, stripTags from oxlib.html import decodeHtml, stripTags
from oxutils.text import findRe from oxlib.text import findRe
from oxutils.text import findString from oxlib.text import findString
# to sniff itunes traffic, use something like # to sniff itunes traffic, use something like

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from oxutils.cache import getUrl from oxlib.cache import getUrl
from oxutils.html import decodeHtml from oxlib.html import decodeHtml
from oxutils.text import findRe from oxlib.text import findRe
def getLyrics(title, artist): def getLyrics(title, artist):

View file

@ -5,10 +5,10 @@ import re
import socket import socket
from urllib import quote from urllib import quote
from oxutils.cache import getUrl, getUrlUnicode from oxlib.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
from oxutils.normalize import normalizeImdbId from oxlib.normalize import normalizeImdbId
import oxutils import oxlib
from torrent import Torrent from torrent import Torrent
@ -55,7 +55,7 @@ def getId(mininovaId):
def exists(mininovaId): def exists(mininovaId):
mininovaId = getId(mininovaId) mininovaId = getId(mininovaId)
data = oxutils.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId) data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
if not data or 'Torrent not found...' in data: if not data or 'Torrent not found...' in data:
return False return False
if 'tracker</a> of this torrent requires registration.' in data: if 'tracker</a> of this torrent requires registration.' in data:

View file

@ -3,9 +3,9 @@
import re import re
import feedparser import feedparser
from oxutils.cache import getUrl, getUrlUnicode from oxlib.cache import getUrl, getUrlUnicode
import oxutils import oxlib
from oxutils.lang import langCode2To3, langTo3Code from oxlib.lang import langCode2To3, langTo3Code
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"): def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if len(language) == 2: if len(language) == 2:
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
if opensubtitleId: if opensubtitleId:
opensubtitleId = opensubtitleId[0] opensubtitleId = opensubtitleId[0]
else: else:
opensubtitleId = oxutils.findRe(data, '/en/subtitles/(.*?)/') opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
return opensubtitleId return opensubtitleId
def downloadSubtitleById(opensubtitle_id): def downloadSubtitleById(opensubtitle_id):
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id) data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>' reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
for f in re.compile(reg_exp, re.DOTALL).findall(data): for f in re.compile(reg_exp, re.DOTALL).findall(data):
name = oxutils.stripTags(f[1]).split('\n')[0] name = oxlib.stripTags(f[1]).split('\n')[0]
url = "http://www.opensubtitles.com%s" % f[0] url = "http://www.opensubtitles.com%s" % f[0]
srts[name] = getUrlUnicode(url) srts[name] = getUrlUnicode(url)
return srts return srts

View file

@ -6,9 +6,9 @@ import time
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
import oxutils.cache import oxlib.cache
from oxutils.html import decodeHtml, stripTags from oxlib.html import decodeHtml, stripTags
import oxutils.net import oxlib.net
def getNews(year, month, day): def getNews(year, month, day):
@ -23,9 +23,9 @@ def getNews(year, month, day):
for section in sections: for section in sections:
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day) url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
if date == time.strftime('%d.%m.%Y', time.localtime()): if date == time.strftime('%d.%m.%Y', time.localtime()):
html = oxutils.net.getUrl(url) html = oxlib.net.getUrl(url)
else: else:
html = oxutils.cache.getUrl(url) html = oxlib.cache.getUrl(url)
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html): for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip() dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
try: try:
@ -102,11 +102,11 @@ def formatSubsection(string):
def getIssue(year, week): def getIssue(year, week):
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week) coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
if not oxutils.net.exists(coverUrl): if not oxlib.net.exists(coverUrl):
return None return None
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week) url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
contents = [] contents = []
soup = BeautifulSoup(oxutils.cache.getUrl(url)) soup = BeautifulSoup(oxlib.cache.getUrl(url))
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}): for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
item = str(item) item = str(item)
page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0]) page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
@ -116,7 +116,7 @@ def getIssue(year, week):
pages = page + 2 pages = page + 2
for page in range(1, pages + 10): for page in range(1, pages + 10):
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page) url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
if oxutils.cache.exists(url): if oxlib.cache.exists(url):
pageUrl[page] = url pageUrl[page] = url
else: else:
pageUrl[page] = '' pageUrl[page] = ''
@ -164,7 +164,7 @@ def archiveIssues():
f.close() f.close()
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w) filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
if not os.path.exists(filename): if not os.path.exists(filename):
data = oxutils.cache.getUrl(issue['coverUrl']) data = oxlib.cache.getUrl(issue['coverUrl'])
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()
@ -173,7 +173,7 @@ def archiveIssues():
if url: if url:
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page) filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
if not os.path.exists(filename): if not os.path.exists(filename):
data = oxutils.cache.getUrl(url) data = oxlib.cache.getUrl(url)
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()
@ -244,7 +244,7 @@ def archiveNews():
f.close() f.close()
filename = dirname + '/' + new['imageUrl'].split('/')[-1] filename = dirname + '/' + new['imageUrl'].split('/')[-1]
if not os.path.exists(filename): if not os.path.exists(filename):
data = oxutils.cache.getUrl(new['imageUrl']) data = oxlib.cache.getUrl(new['imageUrl'])
f = open(filename, 'w') f = open(filename, 'w')
f.write(data) f.write(data)
f.close() f.close()

View file

@ -6,10 +6,10 @@ import socket
from urllib import quote, urlencode from urllib import quote, urlencode
from urllib2 import URLError from urllib2 import URLError
from oxutils.cache import getUrl, getUrlUnicode from oxlib.cache import getUrl, getUrlUnicode
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
from oxutils.normalize import normalizeImdbId from oxlib.normalize import normalizeImdbId
import oxutils import oxlib
from torrent import Torrent from torrent import Torrent
@ -63,7 +63,7 @@ def getId(piratebayId):
def exists(piratebayId): def exists(piratebayId):
piratebayId = getId(piratebayId) piratebayId = getId(piratebayId)
return oxutils.net.exists("http://thepiratebay.org/tor/%s" % piratebayId) return oxlib.net.exists("http://thepiratebay.org/tor/%s" % piratebayId)
def getData(piratebayId): def getData(piratebayId):
_key_map = { _key_map = {

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from oxutils import intValue from oxlib import intValue
class Torrent(dict): class Torrent(dict):

View file

@ -3,8 +3,8 @@
from urllib import urlencode from urllib import urlencode
import simplejson import simplejson
from oxutils.cache import getUrl, getUrlUnicode from oxlib.cache import getUrl, getUrlUnicode
from oxutils import findRe, decodeHtml from oxlib import findRe, decodeHtml
def getMovieId(title, director='', year=''): def getMovieId(title, director='', year=''):

View file

@ -4,8 +4,8 @@ from urllib import quote
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import feedparser import feedparser
from oxutils.cache import getUrl from oxlib.cache import getUrl
from oxutils import findString from oxlib import findString
def getVideoUrl(youtubeId, format='mp4'): def getVideoUrl(youtubeId, format='mp4'):

View file

@ -11,8 +11,8 @@ setup(
description="collection of scrapers for various websites", description="collection of scrapers for various websites",
author="0x", author="0x",
author_email="code@0xdb.org", author_email="code@0xdb.org",
url="http://code.0xdb.org/ox", url="http://code.0xdb.org/oxweb",
download_url="http://code.0xdb.org/ox/download", download_url="http://code.0xdb.org/oxweb/download",
license="GPLv3", license="GPLv3",
packages=find_packages(), packages=find_packages(),
zip_safe=False, zip_safe=False,