Update Boston Globe

This commit is contained in:
Kovid Goyal 2021-10-31 11:56:59 +05:30
parent 5832bb74b1
commit 70feb562be
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -5,10 +5,16 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import pprint
from datetime import timedelta
from calibre.utils.date import utcnow
from calibre.utils.iso8601 import parse_iso8601
from calibre.web.feeds.recipes import BasicNewsRecipe
oldest_article = 1 # days, includes articles that were published no more than the specified number of days ago
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
@ -80,6 +86,8 @@ def absolutize_url(url):
def parse_section(raw_html):
data = extract_json(raw_html)['content-feed']
now = utcnow()
cutoff_date = now - timedelta(days=oldest_article)
def text(e):
if not e:
@ -88,10 +96,13 @@ def parse_section(raw_html):
for group in data.values():
for elem in group['data']['content_elements']:
date = parse_iso8601(elem['publish_date'])
if date < cutoff_date:
continue
title = text(elem['headlines'])
description = text(elem.get('description'))
url = absolutize_url(elem['canonical_url'])
yield {'title': title, 'url': url, 'description': description}
yield {'title': title, 'url': url, 'description': description, 'date': ' ' + str(date.date())}
def main():