mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Boston Globe
This commit is contained in:
parent
5832bb74b1
commit
70feb562be
@ -5,10 +5,16 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
import json
|
||||
import pprint
|
||||
from datetime import timedelta
|
||||
from calibre.utils.date import utcnow
|
||||
from calibre.utils.iso8601 import parse_iso8601
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
oldest_article = 1 # days, includes articles that were published no more than the specified number of days ago
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
@ -80,6 +86,8 @@ def absolutize_url(url):
|
||||
|
||||
def parse_section(raw_html):
|
||||
data = extract_json(raw_html)['content-feed']
|
||||
now = utcnow()
|
||||
cutoff_date = now - timedelta(days=oldest_article)
|
||||
|
||||
def text(e):
|
||||
if not e:
|
||||
@ -88,10 +96,13 @@ def parse_section(raw_html):
|
||||
|
||||
for group in data.values():
|
||||
for elem in group['data']['content_elements']:
|
||||
date = parse_iso8601(elem['publish_date'])
|
||||
if date < cutoff_date:
|
||||
continue
|
||||
title = text(elem['headlines'])
|
||||
description = text(elem.get('description'))
|
||||
url = absolutize_url(elem['canonical_url'])
|
||||
yield {'title': title, 'url': url, 'description': description}
|
||||
yield {'title': title, 'url': url, 'description': description, 'date': ' ' + str(date.date())}
|
||||
|
||||
|
||||
def main():
|
||||
|
Loading…
x
Reference in New Issue
Block a user