diff --git a/recipes/theoldie.recipe b/recipes/theoldie.recipe
new file mode 100644
index 0000000000..dfcfde7caa
--- /dev/null
+++ b/recipes/theoldie.recipe
@@ -0,0 +1,247 @@
+'''
+Fetch The Oldie (Online Edition)
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+from datetime import datetime, timedelta
+from collections import OrderedDict
+
+class PrivateEyeRecipe(BasicNewsRecipe):
+ ##
+ # Last Edited: 2023-08-07
+ #
+ # Remark: Version 1.0 2023-08-07
+ # Initial version
+
+ title = u'The Oldie (Online Edition)'
+ description = u'The Oldie has been dubbed ‘Private Eye for grown-ups’ and is read by intelligent people who are fed up with the formulaic nature of the celebrity-obsessed national press. The Oldie was cooked up in 1992 by Richard Ingrams (who previously co-founded Private Eye in 1961) as a free-thinking, funny magazine, a light-hearted alternative to a press obsessed with youth and celebrity. The editors claim that the Oldie is ageless and timeless, free of retirement advice, crammed with rejuvenating wit, intelligence and delight.'
+ publication_type = 'magazine'
+ language = 'en_GB'
+ encoding = 'utf-8'
+ oldest_article = 31
+ max_articles_per_feed = 100
+ remove_javascript = True
+ ignore_duplicate_articles = {'url'}
+
+ __author__ = u'Sophist-UK'
+ __copyright__ = '2023, Sophist-UK '
+
+ web_root = 'https://www.theoldie.co.uk'
+ current_issue = web_root + '/magazine'
+ about_pages = {
+ 'About Us': web_root + '/about-us',
+ 'Our History': web_root + '/about-us/history',
+ }
+ masthead_url = web_root + '/assets/images/theoldie_logo_22.png'
+ name = 'Oldie Online'
+ series = 'The ' + name
+ now = datetime.now().strftime(' %Y-%m')
+ title = series + now
+ title_sort = name + now + ', The'
+ conversion_options = {
+ 'authors': 'The Oldie',
+ 'author_sort': 'Oldie, The',
+ 'series': series,
+ 'series_index': 0,
+ 'title': title,
+ 'title_sort': title_sort,
+ }
+ cover_suburl = '-front-cover-'
+
+ # Convert relative URLS to absolute ones i.e. /cover to https://theoldie.co.uk/cover
+ def abs_url(self, url):
+ return self.web_root + url if url.startswith('/') else url
+
+ # Create a correctly formated DICT entry for Calibre parse_index return
+ def article_entry(self, title, url, author=None):
+ article = {
+ 'title': title,
+ 'url': url,
+ }
+ if author:
+ article['author'] = author
+ return article
+
+ edition_re = re.compile('(?:-front-cover-)(\d+)-')
+
+ # Identify the cover image and extract the edition# from the url
+ def get_cover_url(self):
+ soup = self.index_to_soup(self.current_issue)
+
+ for img in soup.findAll('img'):
+ src = self.abs_url(img['src'])
+ editions = self.edition_re.findall(src)
+ if editions:
+ try:
+ self.conversion_options.update({'series_index': int(editions[0])})
+ self.log('series-index:', self.conversion_options['series_index'])
+ except (TypeError, ValueError):
+ continue
+ self.log('cover_url:', src)
+ return src
+ return None
+
+ # oldie links/headings often contain the author (in one of various formats
+ # 1. Title. By author
+ #.2. Title by author: subtitle
+ # 3. Title: author: subtitle
+ title_author_re = re.compile('^(.*?)(?:(?: by )|(?:: ))(.*?): (.*?)$')
+
+ # Separate author from title (where it is specified)
+ def title_author(self, head):
+ if '. By ' in head:
+ return head.rsplit('. By ', 1)
+ matches = self.title_author_re.findall(head)
+ if matches and len(matches[0]) == 3:
+ (title_1, author, title_2) = matches[0]
+ title = ': '.join((title_1, title_2))
+ return (title, author)
+ return (head, None)
+
+ # Return the list of articles from blocks in the content of an index/listing page
+ def parse_content(self, soup):
+ content_articles = []
+
+ content = soup.find('div', class_='content-wrapper')
+
+ if not content:
+ return content_articles
+
+ for article in content.findAll('div', class_='listing-block'):
+ for a in article.findAll('a', href=True):
+ for h in a.findAll('h3'):
+ (title, author) = self.title_author(h.getText())
+ content_articles.append(self.article_entry(
+ title = title,
+ url = self.abs_url(a.get('href')),
+ author = author,
+ ))
+ break
+ else:
+ continue
+ break
+
+ return content_articles
+
+ def parse_index(self):
+ # The set of pages to be used in the online edition are:
+ # 1. The list of articles in the body of the magazine index page
+ # 2. The contents / pages linked to by each of the links in the #categories menu
+ # 3. The div.only-in-the-magazine contents in the magazine index page
+ # 4. The about pages
+ # Obviously repeated content is de-duplicated by Calibre
+
+ self.log('masthead_url:', self.masthead_url)
+ soup = self.index_to_soup(self.current_issue)
+
+ # 1. The list of articles in the body of the magazine index page
+ articles = self.parse_content(soup)
+
+ # 2. The contents / pages linked to by each of the links in the #categories menu
+ categories = soup.find('nav', class_='categories')
+ for li in categories.findAll('li'):
+ a = li.find('a', href=True)
+ href = self.abs_url(a.get('href'))
+ self.log('Checking page for sub-index:', href)
+ content = self.parse_content(self.index_to_soup(href))
+ if content:
+ self.log('Subpages found:', href, len(content))
+ articles.extend(content)
+ else:
+ (title, author) = self.title_author(a.getText())
+ articles.append(self.article_entry(
+ title = title,
+ url = self.abs_url(a.get('href')),
+ author = author,
+ ))
+
+ if not articles:
+ raise ValueError('The Oldie Online index of pages not found')
+
+ # 3. The div.only-in-the-magazine contents in the magazine index page
+ articles.append({
+ 'title': 'In the full issue…',
+ 'url': self.current_issue,
+ })
+
+ pages = [('In this issue…', articles)]
+ self.log('n this issue…', articles)
+
+ # 4. The about pages
+ abouts = []
+ for (title, url) in self.about_pages.items():
+ abouts.append({
+ 'title': title,
+ 'url': url,
+ })
+
+ if abouts:
+ pages.append(('About The Oldie', abouts))
+ self.log('About The Oldie', abouts)
+
+ return pages
+
+ def preprocess_html(self, soup):
+ for h in soup.findAll('h1'):
+ (title, author) = self.title_author(h.getText())
+ self.log('Replacing h3 "', h.getText(), '" with "', title, '"')
+ h.string = title
+
+ return soup
+
+
+ # Remove features not wanted and tweak HTML
+ preprocess_regexps = [
+ # Remove big blank spaces
+ (
+ re.compile(
+ r'\s*
\s*
',
+ re.DOTALL | re.IGNORECASE
+ ),
+ lambda match: ''
+ ),
+ # Local fix for paragraph HTML issues join paragraphs that do not end in a full-stop.
+ (
+ re.compile(
+ r'(?<=[^\.\s])\s*
\s*',
+ re.DOTALL | re.IGNORECASE
+ ),
+ lambda match: ' ' # space
+ ),
+ ]
+
+ # We remove vast swathes of HTML which is not part of the articles.
+ remove_tags_before = [
+ {'name': 'div', 'class': "container"},
+ {'name': 'div', 'class': "content-wrapper"},
+ {'name': 'div', 'class': "only-in-the-magazine"},
+ ]
+ remove_tags_after = [
+ {'name': 'div', 'class': "container"},
+ {'name': 'div', 'class': "content-wrapper"},
+ {'name': 'h2', 'string': "Find out more about The Oldie"},
+ ]
+ # Remove non-sibling content
+ remove_tags = [
+ {'name': 'nav', 'class': "categories"},
+ {'name': 'div', 'class': "internal-placeholders"},
+ {'name': 'div', 'class': "leaderboard"},
+ {'name': 'div', 'class': "share"},
+ {'name': 'div', 'class': "most-popular"},
+ {'name': 'div', 'class': "article-convert"},
+ # {'name': 'p', 'class': "article-convert"},
+ # {'name': 'p', 'class': "meta"},
+ {'name': 'hr'},
+ {'name': 'a', 'class': "view-full-screen"},
+ {'name': 'div', 'class': "image-counter"},
+ {'name': 'h2', 'string': "Find out more about The Oldie"},
+ {'name': 'a', 'href': re.compile("^https?:\/\/issuu.com\/")},
+ {'name': 'img', 'src': re.compile("\/assets\/images\/icons\/icon-")},
+ ]
+
+ # The following extra css is to tweak the formatting of various elements of various article pages.
+ extra_css = ' \n '.join([
+ 'div.image-captions div.caption {text-align: center; font-weight: bold; width:750px;}',
+ 'p.article-convert {text-align: center;}',
+ ])