Update South China Morning Post

Fixes #1816307 [recipe broken South China Morning News](https://bugs.launchpad.net/calibre/+bug/1816307)
This commit is contained in:
Kovid Goyal 2019-02-18 11:28:31 +05:30
parent 3abd63304f
commit 5595655b69
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -3,6 +3,7 @@ scmp.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def classes(classes):
@ -29,8 +30,7 @@ class SCMP(BasicNewsRecipe):
keep_only_tags = [
dict(name='h1'),
classes('field-name-field-subheading scmp-gallery-swiper pane-node-body field-name-field-authors'),
dict(itemprop='dateCreated dateModified'.split()),
classes('info__subHeadline article-author main__right'),
]
remove_tags = [
dict(name='button')
@ -50,16 +50,29 @@ class SCMP(BasicNewsRecipe):
return br
feeds = [
(u'Business', u'http://www.scmp.com/rss/business.xml'),
(u'Hong Kong', u'http://www.scmp.com/rss/hong_kong.xml'),
(u'China', u'http://www.scmp.com/rss/china.xml'),
(u'Asia & World', u'http://www.scmp.com/rss/news_asia_world.xml'),
(u'Opinion', u'http://www.scmp.com/rss/opinion.xml'),
(u'LifeSTYLE', u'http://www.scmp.com/rss/lifestyle.xml'),
(u'Sport', u'http://www.scmp.com/rss/sport.xml')
('Hong Kong', 'https://www.scmp.com/rss/2/feed'),
('China', 'https://www.scmp.com/rss/4/feed'),
('Asia', 'https://www.scmp.com/rss/3/feed'),
('World', 'https://www.scmp.com/rss/5/feed'),
('Business', 'https://www.scmp.com/rss/92/feed'),
('Tech', 'https://www.scmp.com/rss/36/feed'),
('Life', 'https://www.scmp.com/rss/94/feed'),
('Culture', 'https://www.scmp.com/rss/322296/feed'),
('Sport', 'https://www.scmp.com/rss/95/feed'),
('Post Mag', 'https://www.scmp.com/rss/71/feed'),
('Style', 'https://www.scmp.com/rss/72/feed'),
]
def preprocess_html(self, soup):
for img in soup.findAll("img", attrs={'data-original':True}):
img['src'] = img['data-original']
meta = soup.find('meta', attrs={'name':'twitter:image:src'}, content=True)
if meta is not None:
wrapper = soup.find(**classes('image-wrapper__placeholder'))
if wrapper is not None:
p = wrapper.parent
img = Tag(soup, 'img')
img['src'] = meta['content']
p.append(img)
wrapper.extract()
return soup