diff --git a/recipes/al_jazeera.recipe b/recipes/al_jazeera.recipe index 0474af1064..ed7957dccf 100644 --- a/recipes/al_jazeera.recipe +++ b/recipes/al_jazeera.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' @@ -35,6 +37,20 @@ class AlJazeera(BasicNewsRecipe): 'meta', 'base', 'iframe', 'embed']), ] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [(u'Al Jazeera English', u'http://www.aljazeera.com/xml/rss/all.xml')] diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 91989467cd..6076d506dd 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -40,6 +40,20 @@ class TheIndependentNew(BasicNewsRecipe): encoding = 'utf-8' compress_news_images = True + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(id=['articleHeader', 'main']), classes('headline sub-headline breadcrumb author publish-date hero-image body-content'), diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 2a77beb54a..10e285c302 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -143,9 +143,7 @@ class LiveMint(BasicNewsRecipe): # remove empty p tags raw = re.sub( r'(

\s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub( - r'(

\s* \s*<\/p>)|(

\s*<\/p>)|( \s*<\/p>)', '', re.sub( - r'(?=

\s*Also\s*Read).*?(?<=

)', '', raw - ) + r'(

\s* \s*<\/p>)|(

\s*<\/p>)|( \s*<\/p>)', '', raw ) ) if '' in raw: @@ -186,10 +184,11 @@ class LiveMint(BasicNewsRecipe): for span in soup.findAll('span', attrs={'class':'exclusive'}): span.extract() for al in soup.findAll('a', attrs={'class':'manualbacklink'}): - pa = al.findParent('p') + pa = al.findParent(['p', 'h2', 'h3', 'h4']) if pa: pa.extract() - if wa := soup.find(**classes('autobacklink-topic')): + wa = soup.find(**classes('autobacklink-topic')) + if wa: p = wa.findParent('p') if p: p.extract() diff --git a/recipes/new_scientist_mag.recipe b/recipes/new_scientist_mag.recipe index 87d15cb44a..7984b24769 100644 --- a/recipes/new_scientist_mag.recipe +++ b/recipes/new_scientist_mag.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' newscientist.com ''' @@ -70,8 +72,20 @@ class NewScientist(BasicNewsRecipe): classes('ArticleHeader__SocialWrapper AdvertWrapper ReadMoreWithImage ArticleTopics') ] + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 3498' + } + } + def parse_index(self): - soup = self.index_to_soup('https://www.newscientist.com/issues/current/') + issue_url = 'https://www.newscientist.com/issues/current/' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.newscientist.com/issue/' + d + + soup = self.index_to_soup(issue_url) div = soup.find('div', attrs={'class':'ThisWeeksMagazineHero__CoverInfo'}) tme = div.find(**classes('ThisWeeksMagazineHero__MagInfoHeading')) self.log('Downloading issue:', self.tag_to_string(tme))