Add oldest_article to AP recipe

This commit is contained in:
Kovid Goyal 2022-07-08 08:16:06 +05:30
parent 204568a6bb
commit f0bba63c60
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,6 +7,7 @@ import json
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.date import utcnow, parse_date
def extract_article(raw): def extract_article(raw):
@ -41,6 +42,7 @@ class AssociatedPress(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = False remove_empty_feeds = False
oldest_article = 1.5
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
@ -74,6 +76,13 @@ class AssociatedPress(BasicNewsRecipe):
if not title: if not title:
continue continue
title = title.split('\u2014')[-1] title = title.split('\u2014')[-1]
updated = article.get('updated')
if updated:
updated = parse_date(updated, assume_utc=True)
delta = utcnow() - updated
if (delta.days*24*3600 + delta.seconds) > 24*3600*self.oldest_article:
self.log('Skipping', title, 'as it is too old')
continue
self.log('\tFound article:', title, 'at', url) self.log('\tFound article:', title, 'at', url)
articles.append({'title': title, 'url': url}) articles.append({'title': title, 'url': url})
self.log('') self.log('')