mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-07 09:01:38 -04:00
Update South China Morning Post
Fixes #1816307 [recipe broken South China Morning News](https://bugs.launchpad.net/calibre/+bug/1816307)
This commit is contained in:
parent
3abd63304f
commit
5595655b69
@ -3,6 +3,7 @@ scmp.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
def classes(classes):
|
||||
@ -29,8 +30,7 @@ class SCMP(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
classes('field-name-field-subheading scmp-gallery-swiper pane-node-body field-name-field-authors'),
|
||||
dict(itemprop='dateCreated dateModified'.split()),
|
||||
classes('info__subHeadline article-author main__right'),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='button')
|
||||
@ -50,16 +50,29 @@ class SCMP(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
feeds = [
|
||||
(u'Business', u'http://www.scmp.com/rss/business.xml'),
|
||||
(u'Hong Kong', u'http://www.scmp.com/rss/hong_kong.xml'),
|
||||
(u'China', u'http://www.scmp.com/rss/china.xml'),
|
||||
(u'Asia & World', u'http://www.scmp.com/rss/news_asia_world.xml'),
|
||||
(u'Opinion', u'http://www.scmp.com/rss/opinion.xml'),
|
||||
(u'LifeSTYLE', u'http://www.scmp.com/rss/lifestyle.xml'),
|
||||
(u'Sport', u'http://www.scmp.com/rss/sport.xml')
|
||||
('Hong Kong', 'https://www.scmp.com/rss/2/feed'),
|
||||
('China', 'https://www.scmp.com/rss/4/feed'),
|
||||
('Asia', 'https://www.scmp.com/rss/3/feed'),
|
||||
('World', 'https://www.scmp.com/rss/5/feed'),
|
||||
('Business', 'https://www.scmp.com/rss/92/feed'),
|
||||
('Tech', 'https://www.scmp.com/rss/36/feed'),
|
||||
('Life', 'https://www.scmp.com/rss/94/feed'),
|
||||
('Culture', 'https://www.scmp.com/rss/322296/feed'),
|
||||
('Sport', 'https://www.scmp.com/rss/95/feed'),
|
||||
('Post Mag', 'https://www.scmp.com/rss/71/feed'),
|
||||
('Style', 'https://www.scmp.com/rss/72/feed'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll("img", attrs={'data-original':True}):
|
||||
img['src'] = img['data-original']
|
||||
meta = soup.find('meta', attrs={'name':'twitter:image:src'}, content=True)
|
||||
if meta is not None:
|
||||
wrapper = soup.find(**classes('image-wrapper__placeholder'))
|
||||
if wrapper is not None:
|
||||
p = wrapper.parent
|
||||
img = Tag(soup, 'img')
|
||||
img['src'] = meta['content']
|
||||
p.append(img)
|
||||
wrapper.extract()
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user