From f58dcea94efafa1fd71e980a2ecdc126838665b9 Mon Sep 17 00:00:00 2001 From: Sophist Date: Sun, 7 May 2017 15:09:00 +0100 Subject: [PATCH] Various metadata tweaks... 1. Change this to Private Eye Online to distinguish it with e.g. subscribed versions 2. Set Series and Series Index to issue number 3. Set author and publisher 4. Put publication date in title. --- recipes/private_eye.recipe | 40 ++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/recipes/private_eye.recipe b/recipes/private_eye.recipe index 0266826923..b3dca5846f 100644 --- a/recipes/private_eye.recipe +++ b/recipes/private_eye.recipe @@ -13,9 +13,12 @@ from calibre.web.feeds.news import BasicNewsRecipe class PrivateEyeRecipe(BasicNewsRecipe): - title = 'Private Eye' + title = 'Private Eye Online' + title_author = 'Private Eye' __author__ = 'Sophist at sodalis.co.uk' - description = 'Private Eye is a fortnightly British satirical news and current affairs magazine, edited by Ian Hislop, offering a unique blend of humour, social and political observations and investigative journalism.' # noqa + description = '''Private Eye is a fortnightly British satirical news and current affairs magazine, edited by Ian Hislop, offering a unique blend of humour, social and political observations and investigative journalism. + +This e-book is a download of the online-edition. The full edition is available only on subscription.''' publication_type = 'magazine' language = 'en' encoding = 'utf-8' @@ -23,15 +26,20 @@ class PrivateEyeRecipe(BasicNewsRecipe): INDEX = DOMAIN + 'current-issue' oldest_article = 13 max_articles_per_feed = 100 - # remove_empty_feeds = True remove_javascript = True - # no_stylesheets = True ignore_duplicate_articles = {'url'} + conversion_options = { + 'authors': title_author, + 'author_sort': title_author, + 'smarten_punctuation': True, + 'series': title, + 'publisher': title_author, + } remove_tags_before = [ { 'id': 'story', - 'class': 'article' + 'class': 'article', }, { 'id': 'page' @@ -39,7 +47,7 @@ class PrivateEyeRecipe(BasicNewsRecipe): ] remove_tags_after = [ { - 'class': 'section' + 'class': 'section', }, ] remove_tags = [ @@ -49,10 +57,9 @@ class PrivateEyeRecipe(BasicNewsRecipe): 'class': 'text'}), dict(name='span', attrs={'class': 'section'}), ] - preprocess_regexps = [ ( - re.compile(r'../grfx', re.DOTALL | re.IGNORECASE), + re.compile(r'\.\./grfx', re.DOTALL | re.IGNORECASE), lambda match: 'http://www.private-eye.co.uk/grfx' ), ] @@ -116,6 +123,7 @@ class PrivateEyeRecipe(BasicNewsRecipe): self.cover_url = self.DOMAIN + img['src'] filename = img['src'].split('/')[-1] self.issue_no = filename.replace('_big.jpg', '') + self.conversion_options['series_index'] = self.issue_no self.log.debug('Cover image found. Issue: %s' % self.issue_no) break else: @@ -129,12 +137,16 @@ class PrivateEyeRecipe(BasicNewsRecipe): day, month, year = tag_contents[2].split() day = ''.join(c for c in day if c.isdigit()) date = datetime.strptime( - " ".join((day, month, year)), "%d %B %Y" + " ".join((day, month, year)), + "%d %B %Y" ) date = date - timedelta(14) - self.publication_date = datetime.strftime(date, "%d %B %Y" - ).lstrip("0") + self.publication_date = datetime.strftime( + date, + "%d %B %Y" + ).lstrip("0") self.log.debug("Publication date: %s" % self.publication_date) + self.title += " " + datetime.strftime(date, "%Y-%m-%d") break except: self.log.warning( @@ -211,10 +223,8 @@ class PrivateEyeRecipe(BasicNewsRecipe): self.DOMAIN + "about", """Private Eye is the UK's number one best-selling news and current affairs magazine, edited by Ian Hislop. -It offers a unique blend of humour, social and political observations and investigative journalism. -Published fortnightly, the magazine is read by over 700,000 readers and costs just £1.80 an issue.""", - date="" - ) +It offers a unique blend of humour, social and political observations and investigative journalism. Published fortnightly, the magazine is read by over 700,000 readers and costs just £1.80 an issue.""", + date="") self.page_index_append("About Private Eye") return self.page_index