__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' www.businessworld.in ''' from calibre.web.feeds.news import BasicNewsRecipe class BusinessWorldMagazine(BasicNewsRecipe): title = 'Business World Magazine' __author__ = 'Kovid Goyal' description = 'News from India' category = 'news, politics, finances, India, Asia' no_stylesheets = True encoding = 'utf-8' language = 'en_IN' oldest_article = 2 keep_only_tags = [ dict(attrs={'class': ['main-article']}), ] remove_tags = [ dict(id='video_n_ad_div'), dict(attrs={'class': ['meta-tools', 'social-article']}), ] remove_tags_after = dict(attrs={'class': 'social-article'}) feeds = ['http://www.businessworld.in/rss/all-article.xml'] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-original': True}): img['src'] = img['data-original'] for ins in soup.findAll(attrs={'class': 'adsbygoogle'}): ins.parent.extract() return soup