mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Bangkok Post
Fixes #1661925 [No news content when fetching news from Bangkok Post & The Nation](https://bugs.launchpad.net/calibre/+bug/1661925)
This commit is contained in:
parent
fcc7de203a
commit
be6a1e9921
@ -1,6 +1,12 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class BangkokPostRecipe(BasicNewsRecipe):
|
class BangkokPostRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
@ -40,15 +46,12 @@ class BangkokPostRecipe(BasicNewsRecipe):
|
|||||||
feeds.append((u'Life', u'http://www.bangkokpost.com/rss/data/life.xml'))
|
feeds.append((u'Life', u'http://www.bangkokpost.com/rss/data/life.xml'))
|
||||||
feeds.append((u'Tech', u'http://www.bangkokpost.com/rss/data/tect.xml'))
|
feeds.append((u'Tech', u'http://www.bangkokpost.com/rss/data/tect.xml'))
|
||||||
|
|
||||||
keep_only_tags = []
|
keep_only_tags = [
|
||||||
keep_only_tags.append(dict(name='div', attrs={'class': 'entry'}))
|
dict(itemprop='headline'),
|
||||||
|
classes('articleContents'),
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = []
|
def print_version(self, url):
|
||||||
remove_tags.append(dict(name='div', attrs={'class': 'article-features'}))
|
if '.com/vdo/' in url:
|
||||||
remove_tags.append(dict(name='div', attrs={'class': 'socialBookmark'}))
|
url = None
|
||||||
remove_tags.append(dict(name='div', attrs={'id': 'main-sns'}))
|
return url
|
||||||
# Their YouTube movies are displayed in an iframe, if you want those you will have to parse the articles by hand.
|
|
||||||
# Setting self.recursion to 1, which might resolve this, makes calibre
|
|
||||||
# downloading a lot of PDF files, which will cause a very, very very, long
|
|
||||||
# download time
|
|
||||||
remove_tags.append(dict(name='iframe'))
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user