Update stuff.co.nz

Fixes #1795053 [News downloads no longer working](https://bugs.launchpad.net/calibre/+bug/1795053)
This commit is contained in:
Kovid Goyal 2018-09-29 10:45:07 +05:30
parent 6b69b78ead
commit 7b4d7f968c
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,5 +1,10 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class stuffconz(BasicNewsRecipe): class stuffconz(BasicNewsRecipe):
@ -9,17 +14,15 @@ class stuffconz(BasicNewsRecipe):
oldest_article = 1 # days oldest_article = 1 # days
max_articles_per_feed = 25 max_articles_per_feed = 25
remove_stylesheets = True keep_only_tags = [
remove_tags_after = dict(name='div', attrs={'id': 'related_box'}) classes('sics-component__headline sics-component__byline sics-component__story')
]
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name=['meta', 'link', 'style']),
dict(name='div', attrs={'class': ['story_feature_title']}), classes('sics-component__sharebar'),
dict(name='div', attrs={
'id': ['toolbox', 'related_box', 'adSTORYBODY']}),
dict(name='span', attrs={
'class': ['related_link', 'slideshowcontrols']}),
] ]
remove_stylesheets = True
feeds = [ feeds = [
('Dominion Post', ('Dominion Post',
'http://www.stuff.co.nz/rss/dominion-post'), 'http://www.stuff.co.nz/rss/dominion-post'),
@ -39,11 +42,3 @@ class stuffconz(BasicNewsRecipe):
'http://www.stuff.co.nz/rss/life-style'), 'http://www.stuff.co.nz/rss/life-style'),
] ]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id': 'left_col'})
soup = BeautifulSoup(
'<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
return soup