From 025297a2316f613cc4f1086a01ed00f58783c81e Mon Sep 17 00:00:00 2001
From: Rolux <rolux@Rolux.local>
Date: Wed, 30 Apr 2008 14:24:33 +0200
Subject: [PATCH] adding spiegel.py

---
 ox/spiegel.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 ox/spiegel.py

diff --git a/ox/spiegel.py b/ox/spiegel.py
new file mode 100644
index 0000000..2394858
--- /dev/null
+++ b/ox/spiegel.py
@@ -0,0 +1,44 @@
+import re
+from time import gmtime, strftime
+
+from BeautifulSoup import BeautifulSoup
+
+from oxutils.cache import getUrl
+from oxutils.html import stripTags
+from oxutils.text import findRegexp
+
+class Spiegel:
+  def __init__(self, year, week):
+    # fixme: simply check if cover exists
+    thisYear = int(strftime('%Y', gmtime()))
+    thisWeek = int(strftime('%W', gmtime()))
+    years = range(1994, thisYear + 1)
+    if year == thisYear:
+      weeks = range(1, thisWeek + 2)
+    elif year in [1998, 2004]:
+      weeks = range(1, 54)
+    else:
+      weeks = range(1, 53)
+    if year not in years or week not in weeks:
+      return None
+    # end fixme
+    self.year = year
+    self.week = week
+    self.coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d0%02d0001-312.jpg' % (self.year, self.week, self.year, self.week)
+    self.contentsUrl = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (self.year, self.week)
+
+  def getContents(self):
+    self.contents = []
+    soup = BeautifulSoup(getUrl(self.contentsUrl))
+    for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
+      item = str(item)
+      title = stripTags(item).strip()
+      print item, '\n'
+      page = re.compile('&amp;SE=(.*?)"').findall(item)[0]
+      self.contents.append({'title': title, 'page': page})
+    return self.contents
+
+if __name__ == '__main__':
+  spiegel = Spiegel(2008, 8)
+  spiegel.getContents()
+  print spiegel.contents
\ No newline at end of file