From 5afd59f9f13bff8e129ddf158f1e23ff2eee4e9c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jan 2016 21:08:30 +0530 Subject: [PATCH] Update New Scientist Fixes #1538604 [Updated recipe for new scientist](https://bugs.launchpad.net/calibre/+bug/1538604) --- recipes/new_scientist.recipe | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/recipes/new_scientist.recipe b/recipes/new_scientist.recipe index f961f2259e..f40b17b9ad 100644 --- a/recipes/new_scientist.recipe +++ b/recipes/new_scientist.recipe @@ -3,21 +3,22 @@ ## Contact: AprilHare, Darko Miletic ## ## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html -## Copyright: 2008-2010, AprilHare, Darko Miletic +## Copyright: 2008-2016, AprilHare, Darko Miletic ## ## Written: 2008 -## Last Edited: Dec 2015 +## Last Edited: Jan 2016 ## ''' 01-19-2012: Added GrayScale Image conversion and Duplicant article removals 12-31-2015: Major rewrite due to massive changes in site structure +01-27-2016: Added support for series index and minor cleanup ''' -__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' -__copyright__ = '2008-2015, AprilHare, Darko Miletic ' -__version__ = 'v0.6.0' -__date__ = '2015-12-31' +__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' +__copyright__ = '2008-2016, AprilHare, Darko Miletic ' +__version__ = 'v0.6.1' +__date__ = '2016-01-27' __author__ = 'Darko Miletic' ''' @@ -25,6 +26,7 @@ newscientist.com ''' import re +import urllib from calibre.utils.magick import Image from calibre.web.feeds.news import BasicNewsRecipe @@ -43,7 +45,7 @@ class NewScientist(BasicNewsRecipe): needs_subscription = 'optional' remove_empty_feeds = True ignore_duplicate_articles = {'url'} - compress_news_images = True + compress_news_images = False scale_news_images = True resolve_internal_links = True extra_css = """ @@ -58,10 +60,10 @@ class NewScientist(BasicNewsRecipe): """ conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language } preprocess_regexps = [(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '')] @@ -120,6 +122,14 @@ class NewScientist(BasicNewsRecipe): cover_item = soup.find('img', attrs={'class':'issue-new-magazine-cover'}) if cover_item: cover_url = self.image_url_processor(None, cover_item['src']) + #Configure series and issue number + issue_nr = soup.find('div', attrs={'class':'magnavissue'}) + if issue_nr: + if issue_nr.string is not None: + non_decimal = re.compile(r'[^\d.]+') + nr = non_decimal.sub('', issue_nr.string) + self.conversion_options.update({'series':'New Scientist'}) + self.conversion_options.update({'series_index':nr}) return cover_url # Converts images to Gray Scale