calibre/recipes/kirkusreviews.recipe
Kovid Goyal 61937c430d
pep8
2023-10-24 11:24:57 +05:30

132 lines
4.8 KiB
Python

from urllib.parse import urljoin
from calibre.web.feeds.news import BasicNewsRecipe
class KirkusReviews(BasicNewsRecipe):
title = "Kirkus Reviews"
description = ("Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus."
" The magazine is headquartered in New York City. Released twice monthly on the 1st/15th.")
language = "en"
__author__ = "ping"
publication_type = "magazine"
masthead_url = (
"https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg"
)
encoding = "utf-8"
remove_javascript = True
no_stylesheets = True
auto_cleanup = False
ignore_duplicate_articles = {"url"}
compress_news_images = True
compress_news_images_auto_size = 6
max_articles_per_feed = 99
keep_only_tags = [
dict(
class_=[
"article-author",
"article-author-img-start",
"article-author-description-start",
"single-review",
]
)
]
remove_tags = [
dict(
class_=[
"sidebar-content",
"article-social-share-desktop-first",
"article-social-share-desktop-pagination",
"article-social-share-mobile",
"share-review-text",
"like-dislike-article",
"rate-this-book-text",
"input-group",
"user-comments",
"show-all-response-text",
"button-row",
"hide-on-mobile",
"related-article",
"breadcrumb-row",
"shop-now-dropdown",
]
)
]
remove_tags_after = [dict(class_="single-review")]
extra_css = """
.image-container img { max-width: 100%; height: auto; margin-bottom: 0.2rem; }
.photo-caption { font-size: 0.8rem; margin-bottom: 0.5rem; display: block; }
.book-review-img .image-container { text-align: center; }
.book-rating-module .description-title { font-size: 1.25rem; margin-left: 0; text-align: center; }
"""
def preprocess_html(self, soup):
h1 = soup.find(class_="article-title")
book_cover = soup.find("ul", class_="book-review-img")
if book_cover:
for li in book_cover.find_all("li"):
li.name = "div"
book_cover.name = "div"
if h1:
book_cover.insert_before(h1.extract())
return soup
def parse_index(self):
issue_url = "https://www.kirkusreviews.com/magazine/current/"
soup = self.index_to_soup(issue_url)
issue = soup.find(name="article", class_="issue-container")
cover_img = issue.select(".issue-header .cover-image img")
if cover_img:
self.cover_url = cover_img[0]["src"]
h1 = issue.find("h1")
if h1:
self.timefmt = f" [{self.tag_to_string(h1)}]" # edition
articles = {}
for book_ele in soup.find_all(name="div", class_="issue-featured-book"):
link = book_ele.find("a")
if not link:
continue
section = self.tag_to_string(book_ele.find("h3")).upper()
articles.setdefault(section, []).append(
{"url": urljoin(issue_url, link["href"]), "title": link["title"]}
)
for post_ele in issue.select("div.issue-more-posts ul li div.lead-text"):
link = post_ele.find("a")
if not link:
continue
section = self.tag_to_string(post_ele.find(class_="lead-text-type")).upper()
articles.setdefault(section, []).append(
{
"url": urljoin(issue_url, link["href"]),
"title": self.tag_to_string(link),
}
)
for section_ele in issue.select("section.reviews-section"):
section_articles = []
for review in section_ele.select("ul li.starred"):
link = review.select("h4 a")
if not link:
continue
description = review.find("p")
section_articles.append(
{
"url": urljoin(issue_url, link[0]["href"]),
"title": self.tag_to_string(link[0]),
"description": ""
if not description
else self.tag_to_string(description),
}
)
if not section_articles:
continue
section = self.tag_to_string(section_ele.find("h3")).upper()
if section not in articles:
articles[section] = []
articles.setdefault(section, []).extend(section_articles)
return articles.items()