mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
use the LWN weekly publication date
Bulk conversion (unfortunately) is a frequent thing when catching up after a holiday or crontab breakage. It is really annoying figuring out the LWN weekly edition reading order if they all have the same title based on the conversion date. Falls back to the current date if the date cannot be parsed. Uses dateutil.parser.parse() for thread-safety.
This commit is contained in:
parent
bfbd83bff6
commit
57e91dbd3c
@ -12,6 +12,7 @@ import re
|
||||
import sys
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.date import now as nowf
|
||||
|
||||
|
||||
class WeeklyLWN(BasicNewsRecipe):
|
||||
@ -82,6 +83,19 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
|
||||
return url
|
||||
|
||||
def publication_date(self):
|
||||
return self.pub_date
|
||||
|
||||
def parse_publication_date(self, soup):
|
||||
from dateutil.parser import parse, ParserError
|
||||
try:
|
||||
date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string))
|
||||
# dateutil.parser.parse() is considered thread-safe
|
||||
self.pub_date = parse(date_match[1])
|
||||
except (TypeError, ParserError):
|
||||
self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string)
|
||||
self.pub_date = nowf()
|
||||
|
||||
def parse_index(self):
|
||||
past_edition = self.recipe_specific_options.get('issue')
|
||||
if past_edition and isinstance(past_edition, str):
|
||||
@ -91,6 +105,7 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
else:
|
||||
index_url = self.print_version('/free/bigpage')
|
||||
soup = self.index_to_soup(index_url)
|
||||
self.parse_publication_date(soup)
|
||||
curr = soup.body
|
||||
|
||||
articles = {}
|
||||
|
Loading…
x
Reference in New Issue
Block a user