From 57e91dbd3c402a0c4e874018a7839e9853716472 Mon Sep 17 00:00:00 2001 From: Pieter Smith Date: Sun, 29 Dec 2024 13:27:39 +0100 Subject: [PATCH] use the LWN weekly publication date Bulk conversion (unfortunately) is a frequent thing when catching up after a holiday or crontab breakage. It is really annoying figuring out the LWN weekly edition reading order if they all have the same title based on the conversion date. Falls back to the current date if the date cannot be parsed. Uses dateutil.parser.parse() for thread-safety. --- recipes/lwn_weekly.recipe | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 1f73d07d24..6e489a8901 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -12,6 +12,7 @@ import re import sys from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.date import now as nowf class WeeklyLWN(BasicNewsRecipe): @@ -82,6 +83,19 @@ class WeeklyLWN(BasicNewsRecipe): return url + def publication_date(self): + return self.pub_date + + def parse_publication_date(self, soup): + from dateutil.parser import parse, ParserError + try: + date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string)) + # dateutil.parser.parse() is considered thread-safe + self.pub_date = parse(date_match[1]) + except (TypeError, ParserError): + self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string) + self.pub_date = nowf() + def parse_index(self): past_edition = self.recipe_specific_options.get('issue') if past_edition and isinstance(past_edition, str): @@ -91,6 +105,7 @@ class WeeklyLWN(BasicNewsRecipe): else: index_url = self.print_version('/free/bigpage') soup = self.index_to_soup(index_url) + self.parse_publication_date(soup) curr = soup.body articles = {}