Update New Statesman

Fixes #1747712 [New recipe for New Statesman magazine](https://bugs.launchpad.net/calibre/+bug/1747712)
This commit is contained in:
Kovid Goyal 2018-02-06 22:35:36 +05:30
parent 5697f8f282
commit d20cffe1b2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 30 additions and 68 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 B

View File

@ -1,81 +1,43 @@
# -*- mode: python -*-
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2018, Darko Miletic <darko.miletic at gmail.com>'
''' '''
newstatesman.com www.newstatesman.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class NewStatesman(BasicNewsRecipe): class NewStatesman(BasicNewsRecipe):
title = 'New Statesman' title = 'New Statesman'
language = 'en_GB' __author__ = 'Darko Miletic'
__author__ = "NotTaken" description = "Current affairs, world politics, the arts and more from Britain's award-winning magazine"
description = "Britain's Current Affairs & Politics Magazine (bi-weekly)" publisher = 'New Statesman Media'
oldest_article = 4.0 category = 'New Statesman magazine, International Politics, British Politics, Britain, UK, Tony Blair, Gordon Brown, Geoffrey Robinson, David Cameron, Sir Menzies Campbell, Labour, Conservative, Liberal Democrat, UK Politics, House of Commons, House of Lords, Legislation, politics magazine' # noqa
oldest_article = 30
no_stylesheets = True no_stylesheets = True
use_embedded_content = False encoding = 'utf8'
use_embedded_content = True
language = 'en_GB'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'magazine'
auto_cleanup = True
resolve_internal_links = True
ignore_duplicate_articles = {'url'}
masthead_url = 'https://www.newstatesman.com/sites/all/themes/creative-responsive-theme/images/newstatesman_logo@2x.png'
keep_only_tags = [dict(attrs={'class': 'node'})] extra_css = """
body{font-family: serif}
img{margin-top:1em; margin-bottom: 1em; display:block}
"""
remove_tags_after = [ conversion_options = {
dict(attrs={'class': lambda x: x and 'content123' in x}) 'comment': description,
] 'tags': category,
'publisher': publisher,
'language': language
}
remove_tags = [ feeds = [(u'Articles', u'https://www.newstatesman.com/feeds/site_feed.rss')]
dict(attrs={'class': lambda x: x and 'links_bookmark' in x})
]
extra_css = '''
.title-main {font-size: x-large;}
h2 { font-size: small; }
h1 { font-size: medium; }
.field-field-nodeimage-title {
font-size: small;
color: #3C3C3C;
}
.link_col {
font-size: x-small;
}
'''
processed_urls = []
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
pic = soup.find('img')
if pic is not None:
self.add_toc_thumbnail(article, pic['src'])
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.processed_urls:
self.log('skipping duplicate article: %s' % article.title)
return None
self.processed_urls.append(url)
return url
feeds = [
(u'Politics',
u'http://www.newstatesman.com/politics.rss'),
(u'Business',
u'http://www.newstatesman.com/business.rss'),
(u'Economics',
u'http://www.newstatesman.com/economics.rss'),
(u'Culture',
u'http://www.newstatesman.com/culture.rss'),
(u'Media',
u'http://www.newstatesman.com/media.rss'),
(u'Books',
u'http://www.newstatesman.com/taxonomy/term/feed/27'),
(u'Life & Society',
u'http://www.newstatesman.com/taxonomyfeed/11'),
(u'World Affairs',
u'http://www.newstatesman.com/world-affairs.rss'),
(u'Sci-Tech',
u'http://www.newstatesman.com/feeds/topics/sci-tech.rss'),
(u'Others',
u'http://www.newstatesman.com/feeds_allsite/site_feed.php'),
]