use the LWN weekly publication date

Bulk conversion (unfortunately) is a frequent thing when catching up
after a holiday or crontab breakage. It is really annoying figuring out
the LWN weekly edition reading order if they all have the same title
based on the conversion date.

Falls back to the current date if the date cannot be parsed.

Uses dateutil.parser.parse() for thread-safety.
This commit is contained in:
Pieter Smith 2024-12-29 13:27:39 +01:00
parent bfbd83bff6
commit 57e91dbd3c

View File

@ -12,6 +12,7 @@ import re
import sys
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.date import now as nowf
class WeeklyLWN(BasicNewsRecipe):
@ -82,6 +83,19 @@ class WeeklyLWN(BasicNewsRecipe):
return url
def publication_date(self):
return self.pub_date
def parse_publication_date(self, soup):
from dateutil.parser import parse, ParserError
try:
date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string))
# dateutil.parser.parse() is considered thread-safe
self.pub_date = parse(date_match[1])
except (TypeError, ParserError):
self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string)
self.pub_date = nowf()
def parse_index(self):
past_edition = self.recipe_specific_options.get('issue')
if past_edition and isinstance(past_edition, str):
@ -91,6 +105,7 @@ class WeeklyLWN(BasicNewsRecipe):
else:
index_url = self.print_version('/free/bigpage')
soup = self.index_to_soup(index_url)
self.parse_publication_date(soup)
curr = soup.body
articles = {}