Update Popular Science

This commit is contained in:
Kovid Goyal 2017-12-28 19:13:21 +05:30
parent 9453d87b38
commit 10254b86f1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1282101454(BasicNewsRecipe): class AdvancedUserRecipe1282101454(BasicNewsRecipe):
@ -12,19 +13,30 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
feeds = [ feeds = [
('Gadgets', 'http://www.popsci.com/full-feed/gadgets'), ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
('Cars', 'http://www.popsci.com/full-feed/cars'), ('Cars', 'http://www.popsci.com/full-feed/cars'),
('Science', 'http://www.popsci.com/full-feed/science'), ('Science', 'http://www.popsci.com/full-feed/science'),
('Technology', 'http://www.popsci.com/full-feed/technology'), ('Technology', 'http://www.popsci.com/full-feed/technology'),
('DIY', 'http://www.popsci.com/full-feed/diy'), ('DIY', 'http://www.popsci.com/full-feed/diy'),
('Animals', 'https://www.popsci.com/rss-animals.xml'),
('Space', 'https://www.popsci.com/rss-space.xml'),
('Environment', 'https://www.popsci.com/rss-environment.xml'),
('Eastern Arsenal', 'https://www.popsci.com/rss-eastern-arsenal.xml'),
] ]
pane_node_body = re.compile('pane-node-(?:\w+-){0,9}body')
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class': lambda x: x and { dict(attrs={'class': lambda x: x and frozenset('pane-node-header'.split()).issubset(frozenset(x.split()))}),
'pane-node-header', 'pane-node-body'} & set(x.split())}), dict(attrs={'class': pane_node_body}),
]
remove_tags = [
dict(attrs={'class': lambda x: x and frozenset('ads seperator'.split()).issubset(frozenset(x.split()))}),
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):