From 2ff339bfa35a7f81c59e11c2da8ea559ee0a547a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 3 Oct 2016 23:06:58 +0530 Subject: [PATCH] Update San Jose Mercury News Fixes #1629944 [San Jose Mercury recipe not working](https://bugs.launchpad.net/calibre/+bug/1629944) --- recipes/sanjosemercurynews.recipe | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/recipes/sanjosemercurynews.recipe b/recipes/sanjosemercurynews.recipe index c937503837..006c3e81e2 100644 --- a/recipes/sanjosemercurynews.recipe +++ b/recipes/sanjosemercurynews.recipe @@ -28,19 +28,16 @@ class MercuryNews(BasicNewsRecipe): } keep_only_tags = [ - dict(name='h1', attrs={'id': 'articleTitle'}), dict( - name='div', attrs={'id': 'articleBody'}) - ] - remove_tags = [ - dict(name='div', attrs={'class': 'articleEmbeddedAdBox'}), dict(name=[ - 'link', 'iframe', 'object']), dict(name='div', attrs={'id': 'articleViewerGroup'}) + dict(name='h1'), + dict(attrs={'class':['byline', 'time', 'article-body']}), + dict(attrs={'class':lambda x: x and 'header-features' in x.split()}), ] feeds = [ - - (u'News', u'http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200735.xml'), - (u'Politics', u'http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200740.xml'), - (u'Local News', u'http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200748.xml'), - (u'Editorials', u'http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200766.xml'), - (u'Opinion', u'http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200224.xml') + ('News', 'http://www.mercurynews.com/feed/') ] + + def preprocess_html(self, soup, *a): + for img in soup.findAll(name='img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup