from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1276934715(BasicNewsRecipe): title = u'Forbes India' __author__ = 'rty' description = 'India Edition Forbes' publisher = 'Forbes India' category = 'Business News, Economy, India' oldest_article = 7 max_articles_per_feed = 100 remove_javascript = True use_embedded_content = False no_stylesheets = True language = 'en_IN' temp_files = [] articles_are_obfuscated = True conversion_options = {'linearize_tables': True} feeds = [ (u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'), ] extra_css = ''' .t-10-gy-l{font-style: italic; font-size: small} .t-30-b-d{font-weight: bold; font-size: xx-large} .t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic} .storycontent{font-size: 4px;font-family: Times New Roman;} ''' remove_tags_before = dict(name='div', attrs={'class': 'pdl10 pdr15'}) def get_obfuscated_article(self, url): br = self.get_browser() br.open(url) response = br.follow_link(url_regex=r'/printcontent/[0-9]+', nr=0) html = response.read() self.temp_files.append(PersistentTemporaryFile('_fa.html')) self.temp_files[-1].write(html) self.temp_files[-1].close() return self.temp_files[-1].name def get_cover_url(self): index = 'http://business.in.com/magazine/' soup = self.index_to_soup(index) for image in soup.findAll('a', {"class": "lbOn a-9-b-d"}): return image['href'] # return image['href'] + '.jpg' return None def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll(width=True): del item['width'] return soup