From 8b2d9c5a870f1cb074040672193c36f3a57f290a Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 15 Jul 2009 15:53:40 +0200 Subject: [PATCH] detecht iso-8859-1 in html header --- oxlib/net.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/oxlib/net.py b/oxlib/net.py index d509804..255b5c5 100644 --- a/oxlib/net.py +++ b/oxlib/net.py @@ -57,16 +57,6 @@ def getUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False): return dict(f.headers), data return data -def saveUrl(url, filename, overwrite=False): - if not os.path.exists(filename) or overwrite: - dirname = os.path.dirname(filename) - if not os.path.exists(dirname): - os.makedirs(dirname) - data = getUrl(url) - f = open(filename, 'w') - f.write(data) - f.close() - def getUrlUnicode(url): data = getUrl(url) encoding = getEncoding(data) @@ -77,6 +67,8 @@ def getUrlUnicode(url): def getEncoding(data): if 'content="text/html; charset=utf-8"' in data: return 'utf-8' + elif 'content="text/html; charset=iso-8859-1"' in data: + return 'iso-8859-1' detector = UniversalDetector() for line in data.split('\n'): detector.feed(line) @@ -85,3 +77,13 @@ def getEncoding(data): detector.close() return detector.result['encoding'] +def saveUrl(url, filename, overwrite=False): + if not os.path.exists(filename) or overwrite: + dirname = os.path.dirname(filename) + if not os.path.exists(dirname): + os.makedirs(dirname) + data = getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() +