From 67cc77eb8a8b91b50fc6775ed13f9636820da145 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 23 Dec 2012 11:29:33 +0530 Subject: [PATCH] Update The Hindu --- recipes/hindu.recipe | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index cc5305eb77..eb84fc4031 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -16,10 +16,14 @@ class TheHindu(BasicNewsRecipe): keep_only_tags = [dict(id='content')] remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}), - dict(id=['email-section', 'right-column', 'printfooter'])] + dict(id=['email-section', 'right-column', 'printfooter', 'topover', + 'slidebox', 'th_footer'])] extra_css = '.photo-caption { font-size: smaller }' + def preprocess_raw_html(self, raw, url): + return raw.replace('

', '

').replace('

', '

') + def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td','center']): t.name = 'div'