mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add Prospect Magazine UK (Free) recipe
This commit is contained in:
parent
93f60a674d
commit
41e23a8b0c
128
recipes/prospectmaguk_free.recipe
Normal file
128
recipes/prospectmaguk_free.recipe
Normal file
@ -0,0 +1,128 @@
|
||||
# Copyright (c) 2023 https://github.com/ping/
|
||||
#
|
||||
# This software is released under the GNU General Public License v3.0
|
||||
# https://opensource.org/licenses/GPL-3.0
|
||||
|
||||
from collections import OrderedDict
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
|
||||
|
||||
_issue_url = ""
|
||||
|
||||
|
||||
class ProspectMagazineUKFree(BasicNewsRecipe):
|
||||
title = "Prospect Magazine (Free)"
|
||||
__author__ = "ping"
|
||||
description = (
|
||||
"Prospect is Britain’s leading current affairs monthly magazine. "
|
||||
"It is an independent and eclectic forum for writing and thinking—in "
|
||||
"print and online. Published every month with two double issues in "
|
||||
"the summer and winter, it spans politics, science, foreign affairs, "
|
||||
"economics, the environment, philosophy and the arts."
|
||||
)
|
||||
language = "en_GB"
|
||||
category = "news, UK"
|
||||
publication_type = "magazine"
|
||||
masthead_url = "https://media.prospectmagazine.co.uk/prod/images/gm_grid_thumbnail/358ffc17208c-f4c3cddcdeda-prospect-masthead.png"
|
||||
encoding = "utf-8"
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {"url"}
|
||||
INDEX = "https://www.prospectmagazine.co.uk/issues"
|
||||
|
||||
keep_only_tags = [dict(class_="prop-book-article-panel_main")]
|
||||
remove_tags = [
|
||||
dict(
|
||||
class_=[
|
||||
"prop-book-review-header-wrapper_magazine",
|
||||
"prop-mobile-social-share_header",
|
||||
"prop-magazine-link-block",
|
||||
"pros-article-body__img-credit",
|
||||
"pros-article-topics__wrapper",
|
||||
"pros-article-author__image-wrapper",
|
||||
"prop-book-review-promo_details-buy-mobile",
|
||||
]
|
||||
),
|
||||
dict(id=["disqus_thread", "newsletter_wrapper"]),
|
||||
prefixed_classes("dfp-slot-"),
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
||||
.prop-book-review-header-wrapper_standfirst { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; }
|
||||
.prop-book-review-header-wrapper_details { margin-top: 1rem; margin-bottom: 1rem; }
|
||||
.prop-book-review-header-wrapper_details-byline {
|
||||
display: inline-block; font-weight: bold; color: #444; margin-right: 0.5rem; }
|
||||
.prop-book-review-header-wrapper_details-date { display: inline-block; }
|
||||
.gd-picture img { display: block; max-width: 100%; height: auto; }
|
||||
.pros-article-body__img-caption {
|
||||
font-size: 0.8rem; display: block; margin-top: 0.2rem;
|
||||
}
|
||||
.pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
|
||||
.prop-book-review-article_author { margin: 1.5rem 0; font-style: italic; }
|
||||
.prop-book-review-promo { margin-bottom: 1rem; }
|
||||
"""
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# re-position lede image
|
||||
lede_img = soup.find("img", class_="prop-book-review-header-wrapper_image")
|
||||
meta = soup.find("div", class_="prop-book-review-header-wrapper_details")
|
||||
if lede_img and meta:
|
||||
lede_img = lede_img.extract()
|
||||
meta.insert_after(lede_img)
|
||||
|
||||
for img in soup.find_all("img", attrs={"data-src": True}):
|
||||
img["src"] = img["data-src"]
|
||||
del img["data-src"]
|
||||
|
||||
for byline_link in soup.find_all("a", attrs={"data-author-name": True}):
|
||||
byline_link.unwrap()
|
||||
for author_link in soup.find_all("a", class_="pros-article-author"):
|
||||
author_link.unwrap()
|
||||
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
if not _issue_url:
|
||||
issues_soup = self.index_to_soup(self.INDEX)
|
||||
curr_issue_a_ele = issues_soup.find(
|
||||
"a", class_="pros-collection-landing__item"
|
||||
)
|
||||
curr_issue_url = urljoin(self.INDEX, curr_issue_a_ele["href"])
|
||||
else:
|
||||
curr_issue_url = _issue_url
|
||||
|
||||
soup = self.index_to_soup(curr_issue_url)
|
||||
issue_name = (
|
||||
self.tag_to_string(soup.find(class_="magazine-lhc__issue-name"))
|
||||
.replace(" issue", "")
|
||||
.strip()
|
||||
)
|
||||
self.timefmt = f" [{issue_name}]"
|
||||
|
||||
self.cover_url = soup.find("img", class_="magazine-lhc__cover-image")[
|
||||
"data-src"
|
||||
].replace("portrait_small_fit", "portrait_large_fit")
|
||||
|
||||
articles = OrderedDict()
|
||||
sections = soup.find_all("div", class_="pro-magazine-section")
|
||||
for section in sections:
|
||||
section_name = self.tag_to_string(
|
||||
section.find(class_="pro-magazine-section__name")
|
||||
)
|
||||
for sect_article in section.find_all(
|
||||
class_="pro-magazine-section__article"
|
||||
):
|
||||
articles.setdefault(section_name, []).append(
|
||||
{
|
||||
"url": urljoin(self.INDEX, sect_article.find("a")["href"]),
|
||||
"title": self.tag_to_string(
|
||||
sect_article.find(
|
||||
class_="pro-magazine-section__article-headline"
|
||||
)
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
return articles.items()
|
Loading…
x
Reference in New Issue
Block a user