mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add Prospect Magazine UK (Free) recipe
This commit is contained in:
parent
93f60a674d
commit
41e23a8b0c
128
recipes/prospectmaguk_free.recipe
Normal file
128
recipes/prospectmaguk_free.recipe
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# Copyright (c) 2023 https://github.com/ping/
|
||||||
|
#
|
||||||
|
# This software is released under the GNU General Public License v3.0
|
||||||
|
# https://opensource.org/licenses/GPL-3.0
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
|
||||||
|
|
||||||
|
_issue_url = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ProspectMagazineUKFree(BasicNewsRecipe):
|
||||||
|
title = "Prospect Magazine (Free)"
|
||||||
|
__author__ = "ping"
|
||||||
|
description = (
|
||||||
|
"Prospect is Britain’s leading current affairs monthly magazine. "
|
||||||
|
"It is an independent and eclectic forum for writing and thinking—in "
|
||||||
|
"print and online. Published every month with two double issues in "
|
||||||
|
"the summer and winter, it spans politics, science, foreign affairs, "
|
||||||
|
"economics, the environment, philosophy and the arts."
|
||||||
|
)
|
||||||
|
language = "en_GB"
|
||||||
|
category = "news, UK"
|
||||||
|
publication_type = "magazine"
|
||||||
|
masthead_url = "https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png"
|
||||||
|
encoding = "utf-8"
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
ignore_duplicate_articles = {"url"}
|
||||||
|
INDEX = "https://www.prospectmagazine.co.uk/issues"
|
||||||
|
|
||||||
|
keep_only_tags = [dict(class_="prop-book-article-panel_main")]
|
||||||
|
remove_tags = [
|
||||||
|
dict(
|
||||||
|
class_=[
|
||||||
|
"prop-book-review-header-wrapper_magazine",
|
||||||
|
"prop-mobile-social-share_header",
|
||||||
|
"prop-magazine-link-block",
|
||||||
|
"pros-article-body__img-credit",
|
||||||
|
"pros-article-topics__wrapper",
|
||||||
|
"pros-article-author__image-wrapper",
|
||||||
|
"prop-book-review-promo_details-buy-mobile",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dict(id=["disqus_thread", "newsletter_wrapper"]),
|
||||||
|
prefixed_classes("dfp-slot-"),
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = """
|
||||||
|
h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
||||||
|
.prop-book-review-header-wrapper_standfirst { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; }
|
||||||
|
.prop-book-review-header-wrapper_details { margin-top: 1rem; margin-bottom: 1rem; }
|
||||||
|
.prop-book-review-header-wrapper_details-byline {
|
||||||
|
display: inline-block; font-weight: bold; color: #444; margin-right: 0.5rem; }
|
||||||
|
.prop-book-review-header-wrapper_details-date { display: inline-block; }
|
||||||
|
.gd-picture img { display: block; max-width: 100%; height: auto; }
|
||||||
|
.pros-article-body__img-caption {
|
||||||
|
font-size: 0.8rem; display: block; margin-top: 0.2rem;
|
||||||
|
}
|
||||||
|
.pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
|
||||||
|
.prop-book-review-article_author { margin: 1.5rem 0; font-style: italic; }
|
||||||
|
.prop-book-review-promo { margin-bottom: 1rem; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# re-position lede image
|
||||||
|
lede_img = soup.find("img", class_="prop-book-review-header-wrapper_image")
|
||||||
|
meta = soup.find("div", class_="prop-book-review-header-wrapper_details")
|
||||||
|
if lede_img and meta:
|
||||||
|
lede_img = lede_img.extract()
|
||||||
|
meta.insert_after(lede_img)
|
||||||
|
|
||||||
|
for img in soup.find_all("img", attrs={"data-src": True}):
|
||||||
|
img["src"] = img["data-src"]
|
||||||
|
del img["data-src"]
|
||||||
|
|
||||||
|
for byline_link in soup.find_all("a", attrs={"data-author-name": True}):
|
||||||
|
byline_link.unwrap()
|
||||||
|
for author_link in soup.find_all("a", class_="pros-article-author"):
|
||||||
|
author_link.unwrap()
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
if not _issue_url:
|
||||||
|
issues_soup = self.index_to_soup(self.INDEX)
|
||||||
|
curr_issue_a_ele = issues_soup.find(
|
||||||
|
"a", class_="pros-collection-landing__item"
|
||||||
|
)
|
||||||
|
curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele["href"])
|
||||||
|
else:
|
||||||
|
curr_issue_url = _issue_url
|
||||||
|
|
||||||
|
soup = self.index_to_soup(curr_issue_url)
|
||||||
|
issue_name = (
|
||||||
|
self.tag_to_string(soup.find(class_="magazine-lhc__issue-name"))
|
||||||
|
.replace(" issue", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
self.timefmt = f" [{issue_name}]"
|
||||||
|
|
||||||
|
self.cover_url = soup.find("img", class_="magazine-lhc__cover-image")[
|
||||||
|
"data-src"
|
||||||
|
].replace("portrait_small_fit", "portrait_large_fit")
|
||||||
|
|
||||||
|
articles = OrderedDict()
|
||||||
|
sections = soup.find_all("div", class_="pro-magazine-section")
|
||||||
|
for section in sections:
|
||||||
|
section_name = self.tag_to_string(
|
||||||
|
section.find(class_="pro-magazine-section__name")
|
||||||
|
)
|
||||||
|
for sect_article in section.find_all(
|
||||||
|
class_="pro-magazine-section__article"
|
||||||
|
):
|
||||||
|
articles.setdefault(section_name, []).append(
|
||||||
|
{
|
||||||
|
"url": urljoin(self.INDEX, sect_article.find("a")["href"]),
|
||||||
|
"title": self.tag_to_string(
|
||||||
|
sect_article.find(
|
||||||
|
class_="pro-magazine-section__article-headline"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return articles.items()
|
Loading…
x
Reference in New Issue
Block a user