allow custom getUrl to be passed to getUrlUnicode, error pages can be gziped too
This commit is contained in:
parent
5e567665c4
commit
49b47f7a46
1 changed files with 7 additions and 2 deletions
|
@ -1,6 +1,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=2:sts=2:ts=2
|
||||
# 2008
|
||||
import gzip
|
||||
import StringIO
|
||||
import os
|
||||
import sha
|
||||
import time
|
||||
|
@ -43,13 +45,16 @@ def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
|||
try:
|
||||
url_headers, result = net.getUrl(url, data, headers, returnHeaders=True)
|
||||
except urllib2.HTTPError, e:
|
||||
e.headers['Status'] = "%s" % e.code
|
||||
url_headers = dict(e.headers)
|
||||
result = e.read()
|
||||
if url_headers.get('content-encoding', None) == 'gzip':
|
||||
result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
|
||||
saveUrlCache(url_cache_file, result, url_headers)
|
||||
return result
|
||||
|
||||
def getUrlUnicode(url):
|
||||
data = getUrl(url)
|
||||
def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl):
|
||||
data = _getUrl(url, data, headers, timeout)
|
||||
encoding = chardet.detect(data)['encoding']
|
||||
if not encoding:
|
||||
encoding = 'latin-1'
|
||||
|
|
Loading…
Reference in a new issue