#!/usr/bin/env python __license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe class kerrang(BasicNewsRecipe): title = u'Kerrang!' __author__ = 'Artur Stachecki ' language = 'en_GB' description = u'UK-based magazine devoted to rock music published by Bauer Media Group' oldest_article = 7 masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif' max_articles_per_feed = 100 simultaneous_downloads = 5 remove_javascript = True no_stylesheets = True use_embedded_content = False recursions = 0 keep_only_tags = [] keep_only_tags.append(dict(attrs={'class': ['headz', 'blktxt']})) extra_css = ''' img { display: block; margin-right: auto;} h1 {text-align: left; font-size: 22px;}''' feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')] def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup