IEEE Spectrum by Franco Venturi

2025-08-30 23:00:21 -04:00 · 2010-03-23 09:46:54 +05:30 · 2010-03-23 09:46:54 +05:30 · fdaed4a169
commit fdaed4a169
parent c39954ba99
1 changed files with 67 additions and 0 deletions
--- a/resources/recipes/ieeespectrum.recipe
+++ b/resources/recipes/ieeespectrum.recipe
@ -0,0 +1,67 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Franco Venturi <fventuri at comcast.net>'
 '''
 spectrum.ieee.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from string import capwords
 from urlparse import urljoin
 class IEEESpectrum(BasicNewsRecipe):
    title                 = 'IEEE Spectrum'
    __author__            = 'Franco Venturi'
    description           = 'Electronics News from IEEE'
    publisher             = 'IEEE'
    category              = 'news, electronics, IT, computer science'
    oldest_article        = 32
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'en'
    index                 = 'http://spectrum.ieee.org/magazine/'
    masthead_url          = 'http://spectrum.ieee.org/images/logo_hdr.png'
    remove_javascript     = True
    remove_tags           = [dict(name={'script':True, 'object':True})]
    remove_attributes     = ['height','width','alt']
    keep_only_tags        = [dict(attrs={'class': {'artSctn':True, 'artTitle':True, 'dekTitle': True}}), dict(attrs={'id':'artBody'})]
 #    def get_cover_url(self):
 #        cover_url = None
 #        soup = self.index_to_soup(self.index)
 #        cover_item = soup.find('img',attrs={'image':'cover.gif'})
 #        if cover_item:
 #            cover_url = urljoin(self.index, cover_item['src'])
 #        return cover_url
    def parse_index(self):
        soup = self.index_to_soup(self.index)
        content = soup.find(id='gnrlContent')
        title = content.find(attrs={'class':'style4'}).string.strip()
        date = ' '.join(title.split()[0:2])
        self.timefmt = ' [' + date + ']'
        contents = []
        for tag in content.findAll(attrs={'class': {'style2':True, 'lstngTitle':True, 'lstngBody': True}}):
            if tag['class'] == 'style2':
                contents.append((capwords(tag.renderContents().strip()), []))
            elif tag['class'] == 'lstngTitle':
                url = urljoin(self.index, tag.findPrevious('a')['href']) + '/0'
                contents[-1][1].append({'title': tag.renderContents().strip(),
                                        'url': url,
                                        'date': date,
                                        'description': '',
                                        'content': ''
                                       })
            elif tag['class'] == 'lstngBody':
                contents[-1][1][-1]['description'] = tag.renderContents().strip()
        return contents
    def preprocess_html(self, soup):
        for a in soup.findAll('a'):
            if not a['href'].lower().startswith('http'):
               a['href'] = urljoin(self.index, a['href'])
        return soup