mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Update South China Morning Post
Fixes #1816307 [recipe broken South China Morning News](https://bugs.launchpad.net/calibre/+bug/1816307)
This commit is contained in:
parent
3abd63304f
commit
5595655b69
@ -3,6 +3,7 @@ scmp.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -29,8 +30,7 @@ class SCMP(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),
|
dict(name='h1'),
|
||||||
classes('field-name-field-subheading scmp-gallery-swiper pane-node-body field-name-field-authors'),
|
classes('info__subHeadline article-author main__right'),
|
||||||
dict(itemprop='dateCreated dateModified'.split()),
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='button')
|
dict(name='button')
|
||||||
@ -50,16 +50,29 @@ class SCMP(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Business', u'http://www.scmp.com/rss/business.xml'),
|
('Hong Kong', 'https://www.scmp.com/rss/2/feed'),
|
||||||
(u'Hong Kong', u'http://www.scmp.com/rss/hong_kong.xml'),
|
('China', 'https://www.scmp.com/rss/4/feed'),
|
||||||
(u'China', u'http://www.scmp.com/rss/china.xml'),
|
('Asia', 'https://www.scmp.com/rss/3/feed'),
|
||||||
(u'Asia & World', u'http://www.scmp.com/rss/news_asia_world.xml'),
|
('World', 'https://www.scmp.com/rss/5/feed'),
|
||||||
(u'Opinion', u'http://www.scmp.com/rss/opinion.xml'),
|
('Business', 'https://www.scmp.com/rss/92/feed'),
|
||||||
(u'LifeSTYLE', u'http://www.scmp.com/rss/lifestyle.xml'),
|
('Tech', 'https://www.scmp.com/rss/36/feed'),
|
||||||
(u'Sport', u'http://www.scmp.com/rss/sport.xml')
|
('Life', 'https://www.scmp.com/rss/94/feed'),
|
||||||
|
('Culture', 'https://www.scmp.com/rss/322296/feed'),
|
||||||
|
('Sport', 'https://www.scmp.com/rss/95/feed'),
|
||||||
|
('Post Mag', 'https://www.scmp.com/rss/71/feed'),
|
||||||
|
('Style', 'https://www.scmp.com/rss/72/feed'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll("img", attrs={'data-original':True}):
|
for img in soup.findAll("img", attrs={'data-original':True}):
|
||||||
img['src'] = img['data-original']
|
img['src'] = img['data-original']
|
||||||
|
meta = soup.find('meta', attrs={'name':'twitter:image:src'}, content=True)
|
||||||
|
if meta is not None:
|
||||||
|
wrapper = soup.find(**classes('image-wrapper__placeholder'))
|
||||||
|
if wrapper is not None:
|
||||||
|
p = wrapper.parent
|
||||||
|
img = Tag(soup, 'img')
|
||||||
|
img['src'] = meta['content']
|
||||||
|
p.append(img)
|
||||||
|
wrapper.extract()
|
||||||
return soup
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user