rename oxutils -> oxlib
This commit is contained in:
parent
6a16a0af30
commit
68f7621ac7
15 changed files with 59 additions and 59 deletions
|
@ -2,9 +2,9 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from oxutils.cache import getUrlUnicode
|
from oxlib.cache import getUrlUnicode
|
||||||
from oxutils.html import stripTags
|
from oxlib.html import stripTags
|
||||||
from oxutils.text import findRe, removeSpecialCharacters
|
from oxlib.text import findRe, removeSpecialCharacters
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import re
|
import re
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from oxutils.cache import getUrl
|
from oxlib.cache import getUrl
|
||||||
|
|
||||||
|
|
||||||
def getVideoUrl(url):
|
def getVideoUrl(url):
|
||||||
|
|
|
@ -8,8 +8,8 @@ import weakref
|
||||||
import threading
|
import threading
|
||||||
import Queue
|
import Queue
|
||||||
|
|
||||||
import oxutils
|
import oxlib
|
||||||
from oxutils import stripTags
|
from oxlib import stripTags
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -27,9 +27,9 @@ FIXME: how search depper than first page?
|
||||||
'''
|
'''
|
||||||
DEFAULT_MAX_RESULTS = 10
|
DEFAULT_MAX_RESULTS = 10
|
||||||
|
|
||||||
def getUrl(url, data=None, headers=oxutils.net.DEFAULT_HEADERS):
|
def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS):
|
||||||
google_timeout=24*60*60
|
google_timeout=24*60*60
|
||||||
return oxutils.cache.getUrl(url, data, headers, google_timeout)
|
return oxlib.cache.getUrl(url, data, headers, google_timeout)
|
||||||
|
|
||||||
def quote_plus(s):
|
def quote_plus(s):
|
||||||
return urllib.quote_plus(s.encode('utf-8'))
|
return urllib.quote_plus(s.encode('utf-8'))
|
||||||
|
|
|
@ -8,11 +8,11 @@ import time
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
import chardet
|
import chardet
|
||||||
import oxutils
|
import oxlib
|
||||||
from oxutils import stripTags, decodeHtml, findRe, findString
|
from oxlib import stripTags, decodeHtml, findRe, findString
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
from oxutils.normalize import normalizeTitle, normalizeImdbId
|
from oxlib.normalize import normalizeTitle, normalizeImdbId
|
||||||
from oxutils import *
|
from oxlib import *
|
||||||
|
|
||||||
import google
|
import google
|
||||||
|
|
||||||
|
@ -614,7 +614,7 @@ def guess(title, director=''):
|
||||||
return url[28:35]
|
return url[28:35]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS)
|
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
data = u.read()
|
data = u.read()
|
||||||
return_url = u.url
|
return_url = u.url
|
||||||
|
@ -629,7 +629,7 @@ def guess(title, director=''):
|
||||||
return imdb_id
|
return imdb_id
|
||||||
|
|
||||||
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
|
||||||
req = urllib2.Request(imdb_url, None, oxutils.net.DEFAULT_HEADERS)
|
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
data = u.read()
|
data = u.read()
|
||||||
return_url = u.url
|
return_url = u.url
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from oxutils.cache import getUrlUnicode
|
from oxlib.cache import getUrlUnicode
|
||||||
from oxutils.html import stripTags
|
from oxlib.html import stripTags
|
||||||
from oxutils.text import findRe
|
from oxlib.text import findRe
|
||||||
|
|
||||||
import imdb
|
import imdb
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ def parsePosterPage(html, year):
|
||||||
|
|
||||||
def archivePosters():
|
def archivePosters():
|
||||||
import os
|
import os
|
||||||
from oxutils.net import getUrl
|
from oxlib.net import getUrl
|
||||||
pathname = '/Volumes/Rolux Home/Desktop/Data/impawards.com'
|
pathname = '/Volumes/Rolux Home/Desktop/Data/impawards.com'
|
||||||
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
|
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
|
||||||
pages = int(findRe(html, '<a href = page(.*?).html>'))
|
pages = int(findRe(html, '<a href = page(.*?).html>'))
|
||||||
|
|
|
@ -3,10 +3,10 @@
|
||||||
import re
|
import re
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from oxutils.cache import getUrl
|
from oxlib.cache import getUrl
|
||||||
from oxutils.html import decodeHtml, stripTags
|
from oxlib.html import decodeHtml, stripTags
|
||||||
from oxutils.text import findRe
|
from oxlib.text import findRe
|
||||||
from oxutils.text import findString
|
from oxlib.text import findString
|
||||||
|
|
||||||
|
|
||||||
# to sniff itunes traffic, use something like
|
# to sniff itunes traffic, use something like
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from oxutils.cache import getUrl
|
from oxlib.cache import getUrl
|
||||||
from oxutils.html import decodeHtml
|
from oxlib.html import decodeHtml
|
||||||
from oxutils.text import findRe
|
from oxlib.text import findRe
|
||||||
|
|
||||||
|
|
||||||
def getLyrics(title, artist):
|
def getLyrics(title, artist):
|
||||||
|
|
|
@ -5,10 +5,10 @@ import re
|
||||||
import socket
|
import socket
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
|
||||||
from oxutils.normalize import normalizeImdbId
|
from oxlib.normalize import normalizeImdbId
|
||||||
import oxutils
|
import oxlib
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ def getId(mininovaId):
|
||||||
|
|
||||||
def exists(mininovaId):
|
def exists(mininovaId):
|
||||||
mininovaId = getId(mininovaId)
|
mininovaId = getId(mininovaId)
|
||||||
data = oxutils.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
|
||||||
if not data or 'Torrent not found...' in data:
|
if not data or 'Torrent not found...' in data:
|
||||||
return False
|
return False
|
||||||
if 'tracker</a> of this torrent requires registration.' in data:
|
if 'tracker</a> of this torrent requires registration.' in data:
|
||||||
|
|
|
@ -3,9 +3,9 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
import oxutils
|
import oxlib
|
||||||
from oxutils.lang import langCode2To3, langTo3Code
|
from oxlib.lang import langCode2To3, langTo3Code
|
||||||
|
|
||||||
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||||
if len(language) == 2:
|
if len(language) == 2:
|
||||||
|
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
|
||||||
if opensubtitleId:
|
if opensubtitleId:
|
||||||
opensubtitleId = opensubtitleId[0]
|
opensubtitleId = opensubtitleId[0]
|
||||||
else:
|
else:
|
||||||
opensubtitleId = oxutils.findRe(data, '/en/subtitles/(.*?)/')
|
opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
|
||||||
return opensubtitleId
|
return opensubtitleId
|
||||||
|
|
||||||
def downloadSubtitleById(opensubtitle_id):
|
def downloadSubtitleById(opensubtitle_id):
|
||||||
|
@ -34,7 +34,7 @@ def downloadSubtitleById(opensubtitle_id):
|
||||||
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
||||||
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
|
||||||
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
for f in re.compile(reg_exp, re.DOTALL).findall(data):
|
||||||
name = oxutils.stripTags(f[1]).split('\n')[0]
|
name = oxlib.stripTags(f[1]).split('\n')[0]
|
||||||
url = "http://www.opensubtitles.com%s" % f[0]
|
url = "http://www.opensubtitles.com%s" % f[0]
|
||||||
srts[name] = getUrlUnicode(url)
|
srts[name] = getUrlUnicode(url)
|
||||||
return srts
|
return srts
|
||||||
|
|
|
@ -6,9 +6,9 @@ import time
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
import oxutils.cache
|
import oxlib.cache
|
||||||
from oxutils.html import decodeHtml, stripTags
|
from oxlib.html import decodeHtml, stripTags
|
||||||
import oxutils.net
|
import oxlib.net
|
||||||
|
|
||||||
|
|
||||||
def getNews(year, month, day):
|
def getNews(year, month, day):
|
||||||
|
@ -23,9 +23,9 @@ def getNews(year, month, day):
|
||||||
for section in sections:
|
for section in sections:
|
||||||
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
|
||||||
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
if date == time.strftime('%d.%m.%Y', time.localtime()):
|
||||||
html = oxutils.net.getUrl(url)
|
html = oxlib.net.getUrl(url)
|
||||||
else:
|
else:
|
||||||
html = oxutils.cache.getUrl(url)
|
html = oxlib.cache.getUrl(url)
|
||||||
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
|
||||||
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
|
||||||
try:
|
try:
|
||||||
|
@ -102,11 +102,11 @@ def formatSubsection(string):
|
||||||
|
|
||||||
def getIssue(year, week):
|
def getIssue(year, week):
|
||||||
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
|
||||||
if not oxutils.net.exists(coverUrl):
|
if not oxlib.net.exists(coverUrl):
|
||||||
return None
|
return None
|
||||||
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
|
||||||
contents = []
|
contents = []
|
||||||
soup = BeautifulSoup(oxutils.cache.getUrl(url))
|
soup = BeautifulSoup(oxlib.cache.getUrl(url))
|
||||||
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
|
||||||
item = str(item)
|
item = str(item)
|
||||||
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
page = int(re.compile('&SE=(.*?)"').findall(item)[0])
|
||||||
|
@ -116,7 +116,7 @@ def getIssue(year, week):
|
||||||
pages = page + 2
|
pages = page + 2
|
||||||
for page in range(1, pages + 10):
|
for page in range(1, pages + 10):
|
||||||
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
|
||||||
if oxutils.cache.exists(url):
|
if oxlib.cache.exists(url):
|
||||||
pageUrl[page] = url
|
pageUrl[page] = url
|
||||||
else:
|
else:
|
||||||
pageUrl[page] = ''
|
pageUrl[page] = ''
|
||||||
|
@ -164,7 +164,7 @@ def archiveIssues():
|
||||||
f.close()
|
f.close()
|
||||||
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxutils.cache.getUrl(issue['coverUrl'])
|
data = oxlib.cache.getUrl(issue['coverUrl'])
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -173,7 +173,7 @@ def archiveIssues():
|
||||||
if url:
|
if url:
|
||||||
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxutils.cache.getUrl(url)
|
data = oxlib.cache.getUrl(url)
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -244,7 +244,7 @@ def archiveNews():
|
||||||
f.close()
|
f.close()
|
||||||
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
filename = dirname + '/' + new['imageUrl'].split('/')[-1]
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
data = oxutils.cache.getUrl(new['imageUrl'])
|
data = oxlib.cache.getUrl(new['imageUrl'])
|
||||||
f = open(filename, 'w')
|
f = open(filename, 'w')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
|
@ -6,10 +6,10 @@ import socket
|
||||||
from urllib import quote, urlencode
|
from urllib import quote, urlencode
|
||||||
from urllib2 import URLError
|
from urllib2 import URLError
|
||||||
|
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
from oxutils import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
|
||||||
from oxutils.normalize import normalizeImdbId
|
from oxlib.normalize import normalizeImdbId
|
||||||
import oxutils
|
import oxlib
|
||||||
|
|
||||||
from torrent import Torrent
|
from torrent import Torrent
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ def getId(piratebayId):
|
||||||
|
|
||||||
def exists(piratebayId):
|
def exists(piratebayId):
|
||||||
piratebayId = getId(piratebayId)
|
piratebayId = getId(piratebayId)
|
||||||
return oxutils.net.exists("http://thepiratebay.org/tor/%s" % piratebayId)
|
return oxlib.net.exists("http://thepiratebay.org/tor/%s" % piratebayId)
|
||||||
|
|
||||||
def getData(piratebayId):
|
def getData(piratebayId):
|
||||||
_key_map = {
|
_key_map = {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
from oxutils import intValue
|
from oxlib import intValue
|
||||||
|
|
||||||
|
|
||||||
class Torrent(dict):
|
class Torrent(dict):
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
||||||
import simplejson
|
import simplejson
|
||||||
from oxutils.cache import getUrl, getUrlUnicode
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
from oxutils import findRe, decodeHtml
|
from oxlib import findRe, decodeHtml
|
||||||
|
|
||||||
|
|
||||||
def getMovieId(title, director='', year=''):
|
def getMovieId(title, director='', year=''):
|
||||||
|
|
|
@ -4,8 +4,8 @@ from urllib import quote
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
from oxutils.cache import getUrl
|
from oxlib.cache import getUrl
|
||||||
from oxutils import findString
|
from oxlib import findString
|
||||||
|
|
||||||
|
|
||||||
def getVideoUrl(youtubeId, format='mp4'):
|
def getVideoUrl(youtubeId, format='mp4'):
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -11,8 +11,8 @@ setup(
|
||||||
description="collection of scrapers for various websites",
|
description="collection of scrapers for various websites",
|
||||||
author="0x",
|
author="0x",
|
||||||
author_email="code@0xdb.org",
|
author_email="code@0xdb.org",
|
||||||
url="http://code.0xdb.org/ox",
|
url="http://code.0xdb.org/oxweb",
|
||||||
download_url="http://code.0xdb.org/ox/download",
|
download_url="http://code.0xdb.org/oxweb/download",
|
||||||
license="GPLv3",
|
license="GPLv3",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
|
Loading…
Reference in a new issue