Add a configurable parameter to the NYT web edition recipe to skip articles older than specified number of days

2025-07-09 03:04:10 -04:00 · 2018-02-13 08:08:08 +05:30 · 2018-02-13 08:08:08 +05:30 · 1af4092851
commit 1af4092851
parent 4e730dc862
2 changed files with 26 additions and 4 deletions
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -11,6 +11,8 @@ from calibre.utils.date import strptime
 from calibre.web.feeds.news import BasicNewsRecipe

 is_web_edition = True
+oldest_web_edition_article = 7  # days
+
 # The sections to download when downloading the web edition, comment out
 # the section you are not interested in
 web_sections = [
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
                    p = article.find(**classes('summary'))
                    if p is not None:
                        desc = self.tag_to_string(p)
-                    yield {'title': title, 'url': url, 'description': desc}
+                    date = ''
+                    d = date_from_url(url)
+                    if d is not None:
+                        date = format_date(d)
+                        today = datetime.date.today()
+                        delta = today - d
+                        if delta.days > oldest_web_edition_article:
+                            self.log.debug('\tSkipping article', title, 'as it is too old')
+                            continue
+                    yield {'title': title, 'url': url, 'description': desc, 'date': date}

    def parse_web_section(self, soup, slug):

        def log(article):
-            self.log('\t', article['title'], ':', article['url'])
+            self.log('\t', article['title'] + article['date'], ':', article['url'])
            if article.get('description'):
                self.log('\t\t', article['description'])

--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -11,6 +11,8 @@ from calibre.utils.date import strptime
 from calibre.web.feeds.news import BasicNewsRecipe

 is_web_edition = False
+oldest_web_edition_article = 7  # days
+
 # The sections to download when downloading the web edition, comment out
 # the section you are not interested in
 web_sections = [
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
                    p = article.find(**classes('summary'))
                    if p is not None:
                        desc = self.tag_to_string(p)
-                    yield {'title': title, 'url': url, 'description': desc}
+                    date = ''
+                    d = date_from_url(url)
+                    if d is not None:
+                        date = format_date(d)
+                        today = datetime.date.today()
+                        delta = today - d
+                        if delta.days > oldest_web_edition_article:
+                            self.log.debug('\tSkipping article', title, 'as it is too old')
+                            continue
+                    yield {'title': title, 'url': url, 'description': desc, 'date': date}

    def parse_web_section(self, soup, slug):

        def log(article):
-            self.log('\t', article['title'], ':', article['url'])
+            self.log('\t', article['title'] + article['date'], ':', article['url'])
            if article.get('description'):
                self.log('\t\t', article['description'])