Add Prospect Magazine UK (Free) recipe

2025-07-09 03:04:10 -04:00 · 2023-06-04 12:05:34 +08:00 · 2023-06-04 12:05:34 +08:00 · 41e23a8b0c
commit 41e23a8b0c
parent 93f60a674d
1 changed files with 128 additions and 0 deletions
--- a/recipes/prospectmaguk_free.recipe
+++ b/recipes/prospectmaguk_free.recipe
@ -0,0 +1,128 @@
 # Copyright (c) 2023 https://github.com/ping/
 #
 # This software is released under the GNU General Public License v3.0
 # https://opensource.org/licenses/GPL-3.0
 from collections import OrderedDict
 from urllib.parse import urljoin
 from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
 _issue_url = ""
 class ProspectMagazineUKFree(BasicNewsRecipe):
    title = "Prospect Magazine (Free)"
    __author__ = "ping"
    description = (
        "Prospect is Britain’s leading current affairs monthly magazine. "
        "It is an independent and eclectic forum for writing and thinking—in "
        "print and online. Published every month with two double issues in "
        "the summer and winter, it spans politics, science, foreign affairs, "
        "economics, the environment, philosophy and the arts."
    )
    language = "en_GB"
    category = "news, UK"
    publication_type = "magazine"
    masthead_url = "https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png"
    encoding = "utf-8"
    remove_javascript = True
    no_stylesheets = True
    ignore_duplicate_articles = {"url"}
    INDEX = "https://www.prospectmagazine.co.uk/issues"
    keep_only_tags = [dict(class_="prop-book-article-panel_main")]
    remove_tags = [
        dict(
            class_=[
                "prop-book-review-header-wrapper_magazine",
                "prop-mobile-social-share_header",
                "prop-magazine-link-block",
                "pros-article-body__img-credit",
                "pros-article-topics__wrapper",
                "pros-article-author__image-wrapper",
                "prop-book-review-promo_details-buy-mobile",
            ]
        ),
        dict(id=["disqus_thread", "newsletter_wrapper"]),
        prefixed_classes("dfp-slot-"),
    ]
    extra_css = """
    h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
    .prop-book-review-header-wrapper_standfirst { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; }
    .prop-book-review-header-wrapper_details {  margin-top: 1rem; margin-bottom: 1rem; }
    .prop-book-review-header-wrapper_details-byline {
        display: inline-block; font-weight: bold; color: #444; margin-right: 0.5rem; }
    .prop-book-review-header-wrapper_details-date { display: inline-block; }
    .gd-picture img { display: block; max-width: 100%; height: auto; }
    .pros-article-body__img-caption {
        font-size: 0.8rem; display: block; margin-top: 0.2rem;
    }
    .pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
    .prop-book-review-article_author { margin: 1.5rem 0; font-style: italic; }
    .prop-book-review-promo { margin-bottom: 1rem; }
    """
    def preprocess_html(self, soup):
        # re-position lede image
        lede_img = soup.find("img", class_="prop-book-review-header-wrapper_image")
        meta = soup.find("div", class_="prop-book-review-header-wrapper_details")
        if lede_img and meta:
            lede_img = lede_img.extract()
            meta.insert_after(lede_img)
        for img in soup.find_all("img", attrs={"data-src": True}):
            img["src"] = img["data-src"]
            del img["data-src"]
        for byline_link in soup.find_all("a", attrs={"data-author-name": True}):
            byline_link.unwrap()
        for author_link in soup.find_all("a", class_="pros-article-author"):
            author_link.unwrap()
        return soup
    def parse_index(self):
        if not _issue_url:
            issues_soup = self.index_to_soup(self.INDEX)
            curr_issue_a_ele = issues_soup.find(
                "a", class_="pros-collection-landing__item"
            )
            curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele["href"])
        else:
            curr_issue_url = _issue_url
        soup = self.index_to_soup(curr_issue_url)
        issue_name = (
            self.tag_to_string(soup.find(class_="magazine-lhc__issue-name"))
            .replace(" issue", "")
            .strip()
        )
        self.timefmt = f" [{issue_name}]"
        self.cover_url = soup.find("img", class_="magazine-lhc__cover-image")[
            "data-src"
        ].replace("portrait_small_fit", "portrait_large_fit")
        articles = OrderedDict()
        sections = soup.find_all("div", class_="pro-magazine-section")
        for section in sections:
            section_name = self.tag_to_string(
                section.find(class_="pro-magazine-section__name")
            )
            for sect_article in section.find_all(
                class_="pro-magazine-section__article"
            ):
                articles.setdefault(section_name, []).append(
                    {
                        "url": urljoin(self.INDEX, sect_article.find("a")["href"]),
                        "title": self.tag_to_string(
                            sect_article.find(
                                class_="pro-magazine-section__article-headline"
                            )
                        ),
                    }
                )
        return articles.items()