From 686bf450e4d924cb5f520f66cd92c99e2215af1c Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 15 Feb 2010 12:38:42 +0530
Subject: [PATCH] add aaarg

---
 oxweb/aaaarg.py    | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 oxweb/karagarga.py |  2 ++
 2 files changed, 63 insertions(+)
 create mode 100644 oxweb/aaaarg.py
diff --git a/oxweb/aaaarg.py b/oxweb/aaaarg.py
new file mode 100644
index 0000000..cca6385
--- /dev/null
+++ b/oxweb/aaaarg.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import os
+import string
+
+from oxlib import cache
+from oxlib.html import stripTags, decodeHtml
+from oxlib.text import findRe
+from oxlib.normalize import canonicalName
+import auth
+
+
+def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
+    headers = headers.copy()
+    headers["Cookie"] = auth.get("aaaarg.cookie")
+    return cache.readUrl(url, data, headers, timeout)
+
+def readUrlUnicode(url, timeout=cache.cache_timeout):
+   return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout)
+
+def downloadText(id, filename=None):
+    #FIXME, what about the cache, this keeps all pdfs in oxcache...
+    url='http://a.aaaarg.org/node/%d/download' % id
+    data = readUrl(url, timeout=-1)
+    headers = cache.getHeaders(url, timeout=-1)
+    if filename:
+        with open(filename, "w") as f:
+            f.write(data)
+        return
+    return data
+
+def getTextByLetter(letter):
+    texts = []
+    url = 'http://a.aaaarg.org/library/%s' % letter
+    data = readUrlUnicode(url)
+    txts = re.compile('<li class="author">(.*?)</li><li class="title"><a href="(.*?)">(.*?)</a></li>').findall(data)
+    author = 'Unknown Author'
+    for r in txts:
+        if r[0] != '&nbsp;':
+            author = r[0]
+        link = r[1]
+        id = findRe(link, '/(\d+)')
+        title = decodeHtml(r[2])
+        author_foder =  canonicalName(author)
+        author_foder = os.path.join(author_foder[0], author_foder)
+        filename = os.path.join(author_foder, '%s (aaarg %s).pdf' %  (title, id))
+        texts.append({
+            'author': author,
+            'title': title,
+            'id': id,
+            'filename': filename,
+         })
+    return texts
+
+def getTexts():
+    texts = []
+    for letter in string.letters[:26]:
+        texts += getTextByLetter(letter)
+    return texts
+
diff --git a/oxweb/karagarga.py b/oxweb/karagarga.py
index 8f49656..e976d47 100644
--- a/oxweb/karagarga.py
+++ b/oxweb/karagarga.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
 import re
 from oxlib import cache
 from oxlib.html import stripTags