diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index cc5305eb77..eb84fc4031 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -16,10 +16,14 @@ class TheHindu(BasicNewsRecipe): keep_only_tags = [dict(id='content')] remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}), - dict(id=['email-section', 'right-column', 'printfooter'])] + dict(id=['email-section', 'right-column', 'printfooter', 'topover', + 'slidebox', 'th_footer'])] extra_css = '.photo-caption { font-size: smaller }' + def preprocess_raw_html(self, raw, url): + return raw.replace('

', '

').replace('

', '

') + def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td','center']): t.name = 'div'