mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'patch-21' of https://github.com/Sophist-UK/calibre
This commit is contained in:
commit
a378ed13c4
@ -10,9 +10,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class PrivateEyeRecipe(BasicNewsRecipe):
|
class PrivateEyeRecipe(BasicNewsRecipe):
|
||||||
##
|
##
|
||||||
# Last Edited: 2023-07-14
|
# Last Edited: 2025-04-02
|
||||||
#
|
#
|
||||||
# Remark: Version 3.1 2023-07-14
|
# Remark: Version 3.2 2025-04-02
|
||||||
|
# Fix recipe after web-site changes
|
||||||
|
# Version 3.1 2023-07-14
|
||||||
# Show crossword on right so clues are continuous down left
|
# Show crossword on right so clues are continuous down left
|
||||||
# Link to crossword image removed
|
# Link to crossword image removed
|
||||||
# Improve many image layouts
|
# Improve many image layouts
|
||||||
@ -34,9 +36,10 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
|||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
__author__ = u'Martyn Pritchard & Sophist-UK'
|
__author__ = u'Martyn Pritchard & Sophist-UK'
|
||||||
__copyright__ = '2020, Martyn Pritchard <MPritchard2k9@gmail.com> & Sophist-UK <sophist-uk@sodalis.co.uk>'
|
__copyright__ = '2020-2025, Martyn Pritchard <MPritchard2k9@gmail.com> & Sophist-UK <sophist-uk@sodalis.co.uk>'
|
||||||
|
|
||||||
current_issue = 'https://www.private-eye.co.uk/current-issue'
|
base_url = 'https://www.private-eye.co.uk/'
|
||||||
|
current_issue = 'https://www.private-eye.co.uk/'
|
||||||
about_page = 'https://www.private-eye.co.uk/about'
|
about_page = 'https://www.private-eye.co.uk/about'
|
||||||
masthead_url = 'https://www.private-eye.co.uk/grfx/logos/logo-new.png'
|
masthead_url = 'https://www.private-eye.co.uk/grfx/logos/logo-new.png'
|
||||||
author = 'Private Eye'
|
author = 'Private Eye'
|
||||||
@ -50,10 +53,34 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
|||||||
'title_sort': title,
|
'title_sort': title,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
index_attrs_to_include = [
|
||||||
|
{'class': 'footer-block'},
|
||||||
|
{'id': 'top-stories'},
|
||||||
|
{'id': 'home-content'},
|
||||||
|
{'id': 'home-color-content'},
|
||||||
|
]
|
||||||
|
|
||||||
|
titles_to_skip = [
|
||||||
|
'Home',
|
||||||
|
'more',
|
||||||
|
'In This Issue',
|
||||||
|
]
|
||||||
|
|
||||||
|
url_to_section_name = {
|
||||||
|
'hp-sauce': 'HP Sauce',
|
||||||
|
'in-the-back': 'In the Back',
|
||||||
|
'street-of-shame': 'Street of Shame',
|
||||||
|
'cartoons': 'Strips and Cartoons',
|
||||||
|
'lookalikes': 'Lookalike',
|
||||||
|
'number-crunching': 'Number Crunching',
|
||||||
|
'mediaballs': 'Dumb Britain',
|
||||||
|
'crossword': 'Eye Crossword',
|
||||||
|
}
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup(self.current_issue)
|
soup = self.index_to_soup(self.current_issue)
|
||||||
|
|
||||||
for img in soup.findAll('img'):
|
for img in soup.findAll('img', {'class': 'issue-cover'}):
|
||||||
src = img['src']
|
src = img['src']
|
||||||
if src.endswith('_big.jpg'):
|
if src.endswith('_big.jpg'):
|
||||||
file_name = src.rsplit('/',1)[1]
|
file_name = src.rsplit('/',1)[1]
|
||||||
@ -71,11 +98,13 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup(self.current_issue)
|
soup = self.index_to_soup(self.current_issue)
|
||||||
|
|
||||||
# Get publication date
|
# Get publication date - Next issue on sale date - 12 days
|
||||||
sidebar = soup.find('div', attrs={'id': 'current-issue-sidebar'})
|
issue_box = soup.find('div', attrs={'id': 'issue-box'})
|
||||||
next_issue_text = sidebar.find('b').nextSibling.strip()
|
next_issue_text = issue_box.find(text=re.compile('NEXT\s+ISSUE')).parent.contents[-1].strip()
|
||||||
|
self.log("next_issue_text:", next_issue_text)
|
||||||
try:
|
try:
|
||||||
day, month, year = next_issue_text.split(' ')
|
day, month, year = next_issue_text.split(' ')
|
||||||
|
# remove day suffixes e.g. 2nd
|
||||||
day = ''.join(c for c in day if c.isdigit())
|
day = ''.join(c for c in day if c.isdigit())
|
||||||
pub_date = datetime.strptime(' '.join((day, month, year)), '%d %B %Y') - timedelta(12)
|
pub_date = datetime.strptime(' '.join((day, month, year)), '%d %B %Y') - timedelta(12)
|
||||||
self.log('pub-date:', pub_date)
|
self.log('pub-date:', pub_date)
|
||||||
@ -87,25 +116,36 @@ class PrivateEyeRecipe(BasicNewsRecipe):
|
|||||||
# Bad date
|
# Bad date
|
||||||
self.log('Cannot parse next issue date from:', next_issue_text)
|
self.log('Cannot parse next issue date from:', next_issue_text)
|
||||||
|
|
||||||
# Get pages first from the sub-menu, and then from the contents panel.
|
# Get pages from the various contents panels.
|
||||||
# Duplicates will be eliminated automatically.
|
# Duplicates will be eliminated automatically.
|
||||||
articles = []
|
articles = []
|
||||||
for menu_attrs in (
|
urls = []
|
||||||
{'class': 'sub-nav-bar', 'id':'sub-nav-box'},
|
for section_attrs in self.index_attrs_to_include:
|
||||||
{'class': 'article', 'id': 'block-left'},
|
section = soup.find('div', attrs=section_attrs)
|
||||||
):
|
|
||||||
menu = soup.find('div', attrs=menu_attrs)
|
|
||||||
|
|
||||||
if not menu:
|
if not section:
|
||||||
|
self.log("section not found:", section_id)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for a in menu.findAll('a', href=True):
|
for a in section.findAll('a', href=True):
|
||||||
|
url = a.get('href')
|
||||||
title = a.getText().rstrip(' »\n')
|
title = a.getText().rstrip(' »\n')
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
|
if title in self.titles_to_skip:
|
||||||
|
continue
|
||||||
|
known_url = url.rsplit('/',1)[-1]
|
||||||
|
if known_url and known_url in self.url_to_section_name:
|
||||||
|
title = self.url_to_section_name[known_url]
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = self.base_url + url
|
||||||
|
if url in urls:
|
||||||
|
continue
|
||||||
|
self.log("title:", title, ", url:", url)
|
||||||
|
urls.append(url)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': a.get('href'),
|
'url': url,
|
||||||
})
|
})
|
||||||
|
|
||||||
if not articles:
|
if not articles:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user