mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Fix #6845 (Updated recipe for Novosti)
This commit is contained in:
parent
8f5842edd3
commit
48faa74301
@ -37,6 +37,16 @@ class Novosti(BasicNewsRecipe):
|
|||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
|
keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
|
||||||
remove_tags = [dict(name=['embed','object','iframe','base'])]
|
remove_tags = [dict(name=['embed','object','iframe','base','link','meta'])]
|
||||||
|
|
||||||
feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
|
feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('span', attrs={'class':'author'}):
|
||||||
|
item.name='p'
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user