diff --git a/resources/recipes/novosti.recipe b/resources/recipes/novosti.recipe index eecae620e4..0465b59e17 100644 --- a/resources/recipes/novosti.recipe +++ b/resources/recipes/novosti.recipe @@ -37,6 +37,16 @@ class Novosti(BasicNewsRecipe): preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})] - remove_tags = [dict(name=['embed','object','iframe','base'])] - + remove_tags = [dict(name=['embed','object','iframe','base','link','meta'])] feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('span', attrs={'class':'author'}): + item.name='p' + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup + \ No newline at end of file