Add Prospect Magazine UK (Free) recipe

2025-08-30 23:00:21 -04:00 · 2023-06-04 12:05:34 +08:00 · 2023-06-04 12:05:34 +08:00 · 41e23a8b0c
commit 41e23a8b0c
parent 93f60a674d
1 changed files with 128 additions and 0 deletions
--- a/recipes/prospectmaguk_free.recipe
+++ b/recipes/prospectmaguk_free.recipe
@ -0,0 +1,128 @@
+# Copyright (c) 2023 https://github.com/ping/
+#
+# This software is released under the GNU General Public License v3.0
+# https://opensource.org/licenses/GPL-3.0
+
+from collections import OrderedDict
+from urllib.parse import urljoin
+
+from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
+
+_issue_url = ""
+
+
+class ProspectMagazineUKFree(BasicNewsRecipe):
+    title = "Prospect Magazine (Free)"
+    __author__ = "ping"
+    description = (
+        "Prospect is Britain’s leading current affairs monthly magazine. "
+        "It is an independent and eclectic forum for writing and thinking—in "
+        "print and online. Published every month with two double issues in "
+        "the summer and winter, it spans politics, science, foreign affairs, "
+        "economics, the environment, philosophy and the arts."
+    )
+    language = "en_GB"
+    category = "news, UK"
+    publication_type = "magazine"
+    masthead_url = "https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png"
+    encoding = "utf-8"
+    remove_javascript = True
+    no_stylesheets = True
+    ignore_duplicate_articles = {"url"}
+    INDEX = "https://www.prospectmagazine.co.uk/issues"
+
+    keep_only_tags = [dict(class_="prop-book-article-panel_main")]
+    remove_tags = [
+        dict(
+            class_=[
+                "prop-book-review-header-wrapper_magazine",
+                "prop-mobile-social-share_header",
+                "prop-magazine-link-block",
+                "pros-article-body__img-credit",
+                "pros-article-topics__wrapper",
+                "pros-article-author__image-wrapper",
+                "prop-book-review-promo_details-buy-mobile",
+            ]
+        ),
+        dict(id=["disqus_thread", "newsletter_wrapper"]),
+        prefixed_classes("dfp-slot-"),
+    ]
+
+    extra_css = """
+    h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
+    .prop-book-review-header-wrapper_standfirst { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; }
+    .prop-book-review-header-wrapper_details {  margin-top: 1rem; margin-bottom: 1rem; }
+    .prop-book-review-header-wrapper_details-byline {
+        display: inline-block; font-weight: bold; color: #444; margin-right: 0.5rem; }
+    .prop-book-review-header-wrapper_details-date { display: inline-block; }
+    .gd-picture img { display: block; max-width: 100%; height: auto; }
+    .pros-article-body__img-caption {
+        font-size: 0.8rem; display: block; margin-top: 0.2rem;
+    }
+    .pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
+    .prop-book-review-article_author { margin: 1.5rem 0; font-style: italic; }
+    .prop-book-review-promo { margin-bottom: 1rem; }
+    """
+
+    def preprocess_html(self, soup):
+        # re-position lede image
+        lede_img = soup.find("img", class_="prop-book-review-header-wrapper_image")
+        meta = soup.find("div", class_="prop-book-review-header-wrapper_details")
+        if lede_img and meta:
+            lede_img = lede_img.extract()
+            meta.insert_after(lede_img)
+
+        for img in soup.find_all("img", attrs={"data-src": True}):
+            img["src"] = img["data-src"]
+            del img["data-src"]
+
+        for byline_link in soup.find_all("a", attrs={"data-author-name": True}):
+            byline_link.unwrap()
+        for author_link in soup.find_all("a", class_="pros-article-author"):
+            author_link.unwrap()
+
+        return soup
+
+    def parse_index(self):
+        if not _issue_url:
+            issues_soup = self.index_to_soup(self.INDEX)
+            curr_issue_a_ele = issues_soup.find(
+                "a", class_="pros-collection-landing__item"
+            )
+            curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele["href"])
+        else:
+            curr_issue_url = _issue_url
+
+        soup = self.index_to_soup(curr_issue_url)
+        issue_name = (
+            self.tag_to_string(soup.find(class_="magazine-lhc__issue-name"))
+            .replace(" issue", "")
+            .strip()
+        )
+        self.timefmt = f" [{issue_name}]"
+
+        self.cover_url = soup.find("img", class_="magazine-lhc__cover-image")[
+            "data-src"
+        ].replace("portrait_small_fit", "portrait_large_fit")
+
+        articles = OrderedDict()
+        sections = soup.find_all("div", class_="pro-magazine-section")
+        for section in sections:
+            section_name = self.tag_to_string(
+                section.find(class_="pro-magazine-section__name")
+            )
+            for sect_article in section.find_all(
+                class_="pro-magazine-section__article"
+            ):
+                articles.setdefault(section_name, []).append(
+                    {
+                        "url": urljoin(self.INDEX, sect_article.find("a")["href"]),
+                        "title": self.tag_to_string(
+                            sect_article.find(
+                                class_="pro-magazine-section__article-headline"
+                            )
+                        ),
+                    }
+                )
+
+        return articles.items()