mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add a configurable parameter to the NYT web edition recipe to skip articles older than specified number of days
This commit is contained in:
parent
4e730dc862
commit
1af4092851
@ -11,6 +11,8 @@ from calibre.utils.date import strptime
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
is_web_edition = True
|
is_web_edition = True
|
||||||
|
oldest_web_edition_article = 7 # days
|
||||||
|
|
||||||
# The sections to download when downloading the web edition, comment out
|
# The sections to download when downloading the web edition, comment out
|
||||||
# the section you are not interested in
|
# the section you are not interested in
|
||||||
web_sections = [
|
web_sections = [
|
||||||
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
p = article.find(**classes('summary'))
|
p = article.find(**classes('summary'))
|
||||||
if p is not None:
|
if p is not None:
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
yield {'title': title, 'url': url, 'description': desc}
|
date = ''
|
||||||
|
d = date_from_url(url)
|
||||||
|
if d is not None:
|
||||||
|
date = format_date(d)
|
||||||
|
today = datetime.date.today()
|
||||||
|
delta = today - d
|
||||||
|
if delta.days > oldest_web_edition_article:
|
||||||
|
self.log.debug('\tSkipping article', title, 'as it is too old')
|
||||||
|
continue
|
||||||
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
|
|
||||||
def parse_web_section(self, soup, slug):
|
def parse_web_section(self, soup, slug):
|
||||||
|
|
||||||
def log(article):
|
def log(article):
|
||||||
self.log('\t', article['title'], ':', article['url'])
|
self.log('\t', article['title'] + article['date'], ':', article['url'])
|
||||||
if article.get('description'):
|
if article.get('description'):
|
||||||
self.log('\t\t', article['description'])
|
self.log('\t\t', article['description'])
|
||||||
|
|
||||||
|
@ -11,6 +11,8 @@ from calibre.utils.date import strptime
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
is_web_edition = False
|
is_web_edition = False
|
||||||
|
oldest_web_edition_article = 7 # days
|
||||||
|
|
||||||
# The sections to download when downloading the web edition, comment out
|
# The sections to download when downloading the web edition, comment out
|
||||||
# the section you are not interested in
|
# the section you are not interested in
|
||||||
web_sections = [
|
web_sections = [
|
||||||
@ -155,12 +157,21 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
p = article.find(**classes('summary'))
|
p = article.find(**classes('summary'))
|
||||||
if p is not None:
|
if p is not None:
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
yield {'title': title, 'url': url, 'description': desc}
|
date = ''
|
||||||
|
d = date_from_url(url)
|
||||||
|
if d is not None:
|
||||||
|
date = format_date(d)
|
||||||
|
today = datetime.date.today()
|
||||||
|
delta = today - d
|
||||||
|
if delta.days > oldest_web_edition_article:
|
||||||
|
self.log.debug('\tSkipping article', title, 'as it is too old')
|
||||||
|
continue
|
||||||
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
|
|
||||||
def parse_web_section(self, soup, slug):
|
def parse_web_section(self, soup, slug):
|
||||||
|
|
||||||
def log(article):
|
def log(article):
|
||||||
self.log('\t', article['title'], ':', article['url'])
|
self.log('\t', article['title'] + article['date'], ':', article['url'])
|
||||||
if article.get('description'):
|
if article.get('description'):
|
||||||
self.log('\t\t', article['description'])
|
self.log('\t\t', article['description'])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user