Fix #6845 (Updated recipe for Novosti)

This commit is contained in:
Kovid Goyal 2010-09-17 12:46:03 -06:00
parent 8f5842edd3
commit 48faa74301

View File

@ -37,6 +37,16 @@ class Novosti(BasicNewsRecipe):
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
remove_tags = [dict(name=['embed','object','iframe','base'])]
remove_tags = [dict(name=['embed','object','iframe','base','link','meta'])]
feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('span', attrs={'class':'author'}):
item.name='p'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup