diff --git a/recipes/boston.com.recipe b/recipes/boston.com.recipe index 3908bd9bd7..47f730a967 100644 --- a/recipes/boston.com.recipe +++ b/recipes/boston.com.recipe @@ -5,10 +5,16 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json import pprint +from datetime import timedelta +from calibre.utils.date import utcnow +from calibre.utils.iso8601 import parse_iso8601 from calibre.web.feeds.recipes import BasicNewsRecipe +oldest_article = 1 # days, includes articles that were published no more than the specified number of days ago + + def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ @@ -80,6 +86,8 @@ def absolutize_url(url): def parse_section(raw_html): data = extract_json(raw_html)['content-feed'] + now = utcnow() + cutoff_date = now - timedelta(days=oldest_article) def text(e): if not e: @@ -88,10 +96,13 @@ def parse_section(raw_html): for group in data.values(): for elem in group['data']['content_elements']: + date = parse_iso8601(elem['publish_date']) + if date < cutoff_date: + continue title = text(elem['headlines']) description = text(elem.get('description')) url = absolutize_url(elem['canonical_url']) - yield {'title': title, 'url': url, 'description': description} + yield {'title': title, 'url': url, 'description': description, 'date': ' ' + str(date.date())} def main():