Add a configurable parameter to the NYT web edition recipe to skip articles older than specified number of days

This commit is contained in:
Kovid Goyal 2018-02-13 08:08:08 +05:30
parent 4e730dc862
commit 1af4092851
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 26 additions and 4 deletions

View File

@ -11,6 +11,8 @@ from calibre.utils.date import strptime
from calibre.web.feeds.news import BasicNewsRecipe
is_web_edition = True
oldest_web_edition_article = 7 # days
# The sections to download when downloading the web edition, comment out
# the section you are not interested in
web_sections = [
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
p = article.find(**classes('summary'))
if p is not None:
desc = self.tag_to_string(p)
yield {'title': title, 'url': url, 'description': desc}
date = ''
d = date_from_url(url)
if d is not None:
date = format_date(d)
today = datetime.date.today()
delta = today - d
if delta.days > oldest_web_edition_article:
self.log.debug('\tSkipping article', title, 'as it is too old')
continue
yield {'title': title, 'url': url, 'description': desc, 'date': date}
def parse_web_section(self, soup, slug):
def log(article):
self.log('\t', article['title'], ':', article['url'])
self.log('\t', article['title'] + article['date'], ':', article['url'])
if article.get('description'):
self.log('\t\t', article['description'])

View File

@ -11,6 +11,8 @@ from calibre.utils.date import strptime
from calibre.web.feeds.news import BasicNewsRecipe
is_web_edition = False
oldest_web_edition_article = 7 # days
# The sections to download when downloading the web edition, comment out
# the section you are not interested in
web_sections = [
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
p = article.find(**classes('summary'))
if p is not None:
desc = self.tag_to_string(p)
yield {'title': title, 'url': url, 'description': desc}
date = ''
d = date_from_url(url)
if d is not None:
date = format_date(d)
today = datetime.date.today()
delta = today - d
if delta.days > oldest_web_edition_article:
self.log.debug('\tSkipping article', title, 'as it is too old')
continue
yield {'title': title, 'url': url, 'description': desc, 'date': date}
def parse_web_section(self, soup, slug):
def log(article):
self.log('\t', article['title'], ':', article['url'])
self.log('\t', article['title'] + article['date'], ':', article['url'])
if article.get('description'):
self.log('\t\t', article['description'])