From 1af4092851cde94998ea58ed79b8f90000612180 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Feb 2018 08:08:08 +0530
Subject: [PATCH] Add a configurable parameter to the NYT web edition recipe to
 skip articles older than specified number of days

---
 recipes/nytimes.recipe     | 15 +++++++++++++--
 recipes/nytimes_sub.recipe | 15 +++++++++++++--
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe
index e500d10c51..2743d2ca1d 100644
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@@ -11,6 +11,8 @@ from calibre.utils.date import strptime
 from calibre.web.feeds.news import BasicNewsRecipe
 
 is_web_edition = True
+oldest_web_edition_article = 7  # days
+
 # The sections to download when downloading the web edition, comment out
 # the section you are not interested in
 web_sections = [
@@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
                     p = article.find(**classes('summary'))
                     if p is not None:
                         desc = self.tag_to_string(p)
-                    yield {'title': title, 'url': url, 'description': desc}
+                    date = ''
+                    d = date_from_url(url)
+                    if d is not None:
+                        date = format_date(d)
+                        today = datetime.date.today()
+                        delta = today - d
+                        if delta.days > oldest_web_edition_article:
+                            self.log.debug('\tSkipping article', title, 'as it is too old')
+                            continue
+                    yield {'title': title, 'url': url, 'description': desc, 'date': date}
 
     def parse_web_section(self, soup, slug):
 
         def log(article):
-            self.log('\t', article['title'], ':', article['url'])
+            self.log('\t', article['title'] + article['date'], ':', article['url'])
             if article.get('description'):
                 self.log('\t\t', article['description'])
 
diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe
index 73ef9674ba..01de3e0e14 100644
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@@ -11,6 +11,8 @@ from calibre.utils.date import strptime
 from calibre.web.feeds.news import BasicNewsRecipe
 
 is_web_edition = False
+oldest_web_edition_article = 7  # days
+
 # The sections to download when downloading the web edition, comment out
 # the section you are not interested in
 web_sections = [
@@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
                     p = article.find(**classes('summary'))
                     if p is not None:
                         desc = self.tag_to_string(p)
-                    yield {'title': title, 'url': url, 'description': desc}
+                    date = ''
+                    d = date_from_url(url)
+                    if d is not None:
+                        date = format_date(d)
+                        today = datetime.date.today()
+                        delta = today - d
+                        if delta.days > oldest_web_edition_article:
+                            self.log.debug('\tSkipping article', title, 'as it is too old')
+                            continue
+                    yield {'title': title, 'url': url, 'description': desc, 'date': date}
 
     def parse_web_section(self, soup, slug):
 
         def log(article):
-            self.log('\t', article['title'], ':', article['url'])
+            self.log('\t', article['title'] + article['date'], ':', article['url'])
             if article.get('description'):
                 self.log('\t\t', article['description'])