Various metadata tweaks...

1. Change this to Private Eye Online to distinguish it with e.g. subscribed versions
2. Set Series and Series Index to issue number
3. Set author and publisher 
4. Put publication date in title.
This commit is contained in:
Sophist 2017-05-07 15:09:00 +01:00 committed by GitHub
parent 762d35b054
commit f58dcea94e

View File

@ -13,9 +13,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
class PrivateEyeRecipe(BasicNewsRecipe): class PrivateEyeRecipe(BasicNewsRecipe):
title = 'Private Eye' title = 'Private Eye Online'
title_author = 'Private Eye'
__author__ = 'Sophist at sodalis.co.uk' __author__ = 'Sophist at sodalis.co.uk'
description = 'Private Eye is a fortnightly British satirical news and current affairs magazine, edited by Ian Hislop, offering a unique blend of humour, social and political observations and investigative journalism.' # noqa description = '''Private Eye is a fortnightly British satirical news and current affairs magazine, edited by Ian Hislop, offering a unique blend of humour, social and political observations and investigative journalism.
This e-book is a download of the online-edition. The full edition is available only on subscription.'''
publication_type = 'magazine' publication_type = 'magazine'
language = 'en' language = 'en'
encoding = 'utf-8' encoding = 'utf-8'
@ -23,15 +26,20 @@ class PrivateEyeRecipe(BasicNewsRecipe):
INDEX = DOMAIN + 'current-issue' INDEX = DOMAIN + 'current-issue'
oldest_article = 13 oldest_article = 13
max_articles_per_feed = 100 max_articles_per_feed = 100
# remove_empty_feeds = True
remove_javascript = True remove_javascript = True
# no_stylesheets = True
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
conversion_options = {
'authors': title_author,
'author_sort': title_author,
'smarten_punctuation': True,
'series': title,
'publisher': title_author,
}
remove_tags_before = [ remove_tags_before = [
{ {
'id': 'story', 'id': 'story',
'class': 'article' 'class': 'article',
}, },
{ {
'id': 'page' 'id': 'page'
@ -39,7 +47,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
] ]
remove_tags_after = [ remove_tags_after = [
{ {
'class': 'section' 'class': 'section',
}, },
] ]
remove_tags = [ remove_tags = [
@ -49,10 +57,9 @@ class PrivateEyeRecipe(BasicNewsRecipe):
'class': 'text'}), 'class': 'text'}),
dict(name='span', attrs={'class': 'section'}), dict(name='span', attrs={'class': 'section'}),
] ]
preprocess_regexps = [ preprocess_regexps = [
( (
re.compile(r'../grfx', re.DOTALL | re.IGNORECASE), re.compile(r'\.\./grfx', re.DOTALL | re.IGNORECASE),
lambda match: 'http://www.private-eye.co.uk/grfx' lambda match: 'http://www.private-eye.co.uk/grfx'
), ),
] ]
@ -116,6 +123,7 @@ class PrivateEyeRecipe(BasicNewsRecipe):
self.cover_url = self.DOMAIN + img['src'] self.cover_url = self.DOMAIN + img['src']
filename = img['src'].split('/')[-1] filename = img['src'].split('/')[-1]
self.issue_no = filename.replace('_big.jpg', '') self.issue_no = filename.replace('_big.jpg', '')
self.conversion_options['series_index'] = self.issue_no
self.log.debug('Cover image found. Issue: %s' % self.issue_no) self.log.debug('Cover image found. Issue: %s' % self.issue_no)
break break
else: else:
@ -129,12 +137,16 @@ class PrivateEyeRecipe(BasicNewsRecipe):
day, month, year = tag_contents[2].split() day, month, year = tag_contents[2].split()
day = ''.join(c for c in day if c.isdigit()) day = ''.join(c for c in day if c.isdigit())
date = datetime.strptime( date = datetime.strptime(
" ".join((day, month, year)), "%d %B %Y" " ".join((day, month, year)),
"%d %B %Y"
) )
date = date - timedelta(14) date = date - timedelta(14)
self.publication_date = datetime.strftime(date, "%d %B %Y" self.publication_date = datetime.strftime(
).lstrip("0") date,
"%d %B %Y"
).lstrip("0")
self.log.debug("Publication date: %s" % self.publication_date) self.log.debug("Publication date: %s" % self.publication_date)
self.title += " " + datetime.strftime(date, "%Y-%m-%d")
break break
except: except:
self.log.warning( self.log.warning(
@ -211,10 +223,8 @@ class PrivateEyeRecipe(BasicNewsRecipe):
self.DOMAIN + "about", self.DOMAIN + "about",
"""Private Eye is the UK's number one best-selling news and current affairs magazine, edited by Ian Hislop. """Private Eye is the UK's number one best-selling news and current affairs magazine, edited by Ian Hislop.
It offers a unique blend of humour, social and political observations and investigative journalism. It offers a unique blend of humour, social and political observations and investigative journalism. Published fortnightly, the magazine is read by over 700,000 readers and costs just £1.80 an issue.""",
Published fortnightly, the magazine is read by over 700,000 readers and costs just £1.80 an issue.""", date="")
date=""
)
self.page_index_append("About Private Eye") self.page_index_append("About Private Eye")
return self.page_index return self.page_index