diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe index 7c27764b8d..79c17c5ed8 100644 --- a/resources/recipes/economist_free.recipe +++ b/resources/recipes/economist_free.recipe @@ -13,7 +13,7 @@ class Economist(BasicNewsRecipe): description = ('Global news and current affairs from a European perspective.' ' Much slower than the subscription based version.') - oldest_article = 6.5 + oldest_article = 7.0 cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg' remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), dict(attrs={'class':['dblClkTrk']})] @@ -29,8 +29,15 @@ class Economist(BasicNewsRecipe): self.feed_dict = {} requests = [] for i, item in enumerate(entries): - published = time.gmtime(item.get('timestamp', time.time())) title = item.get('title', _('Untitled article')) + published = item.date_parsed + if not published: + published = time.gmtime() + utctime = datetime(*published[:6]) + delta = datetime.utcnow() - utctime + if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article: + self.log.debug('Skipping article %s as it is too old.'%title) + continue link = item.get('link', None) description = item.get('description', '') author = item.get('author', '') @@ -64,11 +71,6 @@ class Economist(BasicNewsRecipe): self.log('Found print version for article:', title) a = Article(i, title, link, author, description, published, '') - delta = datetime.utcnow() - a.utctime - if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article: - self.log.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, a.localtime.strftime('%a, %d %b, %Y %H:%M'), title)) - return - article = dict(title=a.title, description=a.text_summary, date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url) diff --git a/resources/recipes/independent.recipe b/resources/recipes/independent.recipe index e9e15e2ba9..de33d64d93 100644 --- a/resources/recipes/independent.recipe +++ b/resources/recipes/independent.recipe @@ -9,7 +9,7 @@ class TheIndependent(BasicNewsRecipe): max_articles_per_feed = 25 encoding = 'latin1' - remove_stylesheets = True + no_stylesheets = True #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) #remove_tags_after = dict(name='td', attrs={'class':'newptool1'}) remove_tags = [ @@ -17,33 +17,33 @@ class TheIndependent(BasicNewsRecipe): dict(name='div', attrs={'class':'related-articles'}), dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}), dict(name='ul', attrs={'class':'article-tools'}), - dict(name='ul', attrs={'class':'articleTools'}), + dict(name='ul', attrs={'class':'articleTools'}), ] feeds = [ -('UK', - 'http://www.independent.co.uk/news/uk/rss'), -('World', - 'http://www.independent.co.uk/news/world/rss'), -('Sport', - 'http://www.independent.co.uk/sport/rss'), -('Arts and Entertainment', - 'http://www.independent.co.uk/arts-entertainment/rss'), -('Business', - 'http://www.independent.co.uk/news/business/rss'), -('Life and Style', - 'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'), -('Science', - 'http://www.independent.co.uk/news/science/rss'), -('People', - 'http://www.independent.co.uk/news/people/rss'), -('Media', - 'http://www.independent.co.uk/news/media/rss'), -('Health and Families', - 'http://www.independent.co.uk/life-style/health-and-families/rss'), -('Obituaries', - 'http://www.independent.co.uk/news/obituaries/rss'), -] + ('UK', + 'http://www.independent.co.uk/news/uk/rss'), + ('World', + 'http://www.independent.co.uk/news/world/rss'), + ('Sport', + 'http://www.independent.co.uk/sport/rss'), + ('Arts and Entertainment', + 'http://www.independent.co.uk/arts-entertainment/rss'), + ('Business', + 'http://www.independent.co.uk/news/business/rss'), + ('Life and Style', + 'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'), + ('Science', + 'http://www.independent.co.uk/news/science/rss'), + ('People', + 'http://www.independent.co.uk/news/people/rss'), + ('Media', + 'http://www.independent.co.uk/news/media/rss'), + ('Health and Families', + 'http://www.independent.co.uk/life-style/health-and-families/rss'), + ('Obituaries', + 'http://www.independent.co.uk/news/obituaries/rss'), + ] def preprocess_html(self, soup): story = soup.find(name='div', attrs={'id':'mainColumn'})