From fdaed4a1690298d20e49034656fc3e65ca5412b4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Mar 2010 09:46:54 +0530
Subject: [PATCH] IEEE Spectrum by Franco Venturi

---
 resources/recipes/ieeespectrum.recipe | 67 +++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 resources/recipes/ieeespectrum.recipe
diff --git a/resources/recipes/ieeespectrum.recipe b/resources/recipes/ieeespectrum.recipe
new file mode 100644
index 0000000000..79a107cd9d
--- /dev/null
+++ b/resources/recipes/ieeespectrum.recipe
@@ -0,0 +1,67 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Franco Venturi <fventuri at comcast.net>'
+'''
+spectrum.ieee.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from string import capwords
+from urlparse import urljoin
+
+class IEEESpectrum(BasicNewsRecipe):
+    title                 = 'IEEE Spectrum'
+    __author__            = 'Franco Venturi'
+    description           = 'Electronics News from IEEE'
+    publisher             = 'IEEE'
+    category              = 'news, electronics, IT, computer science'
+    oldest_article        = 32
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'en'
+    index                 = 'http://spectrum.ieee.org/magazine/'
+    masthead_url          = 'http://spectrum.ieee.org/images/logo_hdr.png'
+
+    remove_javascript     = True
+    remove_tags           = [dict(name={'script':True, 'object':True})]
+    remove_attributes     = ['height','width','alt']
+    keep_only_tags        = [dict(attrs={'class': {'artSctn':True, 'artTitle':True, 'dekTitle': True}}), dict(attrs={'id':'artBody'})]
+
+
+#    def get_cover_url(self):
+#        cover_url = None
+#        soup = self.index_to_soup(self.index)
+#        cover_item = soup.find('img',attrs={'image':'cover.gif'})
+#        if cover_item:
+#            cover_url = urljoin(self.index, cover_item['src'])
+#        return cover_url
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.index)
+        content = soup.find(id='gnrlContent')
+        title = content.find(attrs={'class':'style4'}).string.strip()
+        date = ' '.join(title.split()[0:2])
+        self.timefmt = ' [' + date + ']'
+        contents = []
+        for tag in content.findAll(attrs={'class': {'style2':True, 'lstngTitle':True, 'lstngBody': True}}):
+            if tag['class'] == 'style2':
+                contents.append((capwords(tag.renderContents().strip()), []))
+            elif tag['class'] == 'lstngTitle':
+                url = urljoin(self.index, tag.findPrevious('a')['href']) + '/0'
+                contents[-1][1].append({'title': tag.renderContents().strip(),
+                                        'url': url,
+                                        'date': date,
+                                        'description': '',
+                                        'content': ''
+                                       })
+            elif tag['class'] == 'lstngBody':
+                contents[-1][1][-1]['description'] = tag.renderContents().strip()
+
+        return contents
+
+    def preprocess_html(self, soup):
+        for a in soup.findAll('a'):
+            if not a['href'].lower().startswith('http'):
+               a['href'] = urljoin(self.index, a['href'])
+        return soup