use the LWN weekly publication date

Bulk conversion (unfortunately) is a frequent thing when catching up after a holiday or crontab breakage. It is really annoying figuring out the LWN weekly edition reading order if they all have the same title based on the conversion date. Falls back to the current date if the date cannot be parsed. Uses dateutil.parser.parse() for thread-safety.
2025-07-09 03:04:10 -04:00 · 2024-12-29 13:27:39 +01:00 · 2024-12-29 13:27:39 +01:00 · 57e91dbd3c
commit 57e91dbd3c
parent bfbd83bff6
1 changed files with 15 additions and 0 deletions
--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -12,6 +12,7 @@ import re
 import sys

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.date import now as nowf


 class WeeklyLWN(BasicNewsRecipe):
@ -82,6 +83,19 @@ class WeeklyLWN(BasicNewsRecipe):

        return url

+    def publication_date(self):
+        return self.pub_date
+    
+    def parse_publication_date(self, soup):
+        from dateutil.parser import parse, ParserError
+        try:
+            date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string))
+            # dateutil.parser.parse() is considered thread-safe
+            self.pub_date = parse(date_match[1])
+        except (TypeError, ParserError):
+            self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string)
+            self.pub_date = nowf()
+
    def parse_index(self):
        past_edition = self.recipe_specific_options.get('issue')
        if past_edition and isinstance(past_edition, str):
@ -91,6 +105,7 @@ class WeeklyLWN(BasicNewsRecipe):
        else:
            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
+        self.parse_publication_date(soup)
        curr = soup.body

        articles = {}