diff --git a/recipes/scmp.recipe b/recipes/scmp.recipe index 8f1c7af8c2..46c5d24aad 100644 --- a/recipes/scmp.recipe +++ b/recipes/scmp.recipe @@ -3,6 +3,7 @@ scmp.com ''' from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag def classes(classes): @@ -29,8 +30,7 @@ class SCMP(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), - classes('field-name-field-subheading scmp-gallery-swiper pane-node-body field-name-field-authors'), - dict(itemprop='dateCreated dateModified'.split()), + classes('info__subHeadline article-author main__right'), ] remove_tags = [ dict(name='button') @@ -50,16 +50,29 @@ class SCMP(BasicNewsRecipe): return br feeds = [ - (u'Business', u'http://www.scmp.com/rss/business.xml'), - (u'Hong Kong', u'http://www.scmp.com/rss/hong_kong.xml'), - (u'China', u'http://www.scmp.com/rss/china.xml'), - (u'Asia & World', u'http://www.scmp.com/rss/news_asia_world.xml'), - (u'Opinion', u'http://www.scmp.com/rss/opinion.xml'), - (u'LifeSTYLE', u'http://www.scmp.com/rss/lifestyle.xml'), - (u'Sport', u'http://www.scmp.com/rss/sport.xml') + ('Hong Kong', 'https://www.scmp.com/rss/2/feed'), + ('China', 'https://www.scmp.com/rss/4/feed'), + ('Asia', 'https://www.scmp.com/rss/3/feed'), + ('World', 'https://www.scmp.com/rss/5/feed'), + ('Business', 'https://www.scmp.com/rss/92/feed'), + ('Tech', 'https://www.scmp.com/rss/36/feed'), + ('Life', 'https://www.scmp.com/rss/94/feed'), + ('Culture', 'https://www.scmp.com/rss/322296/feed'), + ('Sport', 'https://www.scmp.com/rss/95/feed'), + ('Post Mag', 'https://www.scmp.com/rss/71/feed'), + ('Style', 'https://www.scmp.com/rss/72/feed'), ] def preprocess_html(self, soup): for img in soup.findAll("img", attrs={'data-original':True}): img['src'] = img['data-original'] + meta = soup.find('meta', attrs={'name':'twitter:image:src'}, content=True) + if meta is not None: + wrapper = soup.find(**classes('image-wrapper__placeholder')) + if wrapper is not None: + p = wrapper.parent + img = Tag(soup, 'img') + img['src'] = meta['content'] + p.append(img) + wrapper.extract() return soup