mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
132 lines
4.8 KiB
Python
132 lines
4.8 KiB
Python
from urllib.parse import urljoin
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class KirkusReviews(BasicNewsRecipe):
|
|
title = "Kirkus Reviews"
|
|
description = ("Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus."
|
|
" The magazine is headquartered in New York City. Released twice monthly on the 1st/15th.")
|
|
language = "en"
|
|
__author__ = "ping"
|
|
publication_type = "magazine"
|
|
masthead_url = (
|
|
"https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg"
|
|
)
|
|
encoding = "utf-8"
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
auto_cleanup = False
|
|
ignore_duplicate_articles = {"url"}
|
|
compress_news_images = True
|
|
compress_news_images_auto_size = 6
|
|
max_articles_per_feed = 99
|
|
|
|
keep_only_tags = [
|
|
dict(
|
|
class_=[
|
|
"article-author",
|
|
"article-author-img-start",
|
|
"article-author-description-start",
|
|
"single-review",
|
|
]
|
|
)
|
|
]
|
|
remove_tags = [
|
|
dict(
|
|
class_=[
|
|
"sidebar-content",
|
|
"article-social-share-desktop-first",
|
|
"article-social-share-desktop-pagination",
|
|
"article-social-share-mobile",
|
|
"share-review-text",
|
|
"like-dislike-article",
|
|
"rate-this-book-text",
|
|
"input-group",
|
|
"user-comments",
|
|
"show-all-response-text",
|
|
"button-row",
|
|
"hide-on-mobile",
|
|
"related-article",
|
|
"breadcrumb-row",
|
|
"shop-now-dropdown",
|
|
]
|
|
)
|
|
]
|
|
remove_tags_after = [dict(class_="single-review")]
|
|
|
|
extra_css = """
|
|
.image-container img { max-width: 100%; height: auto; margin-bottom: 0.2rem; }
|
|
.photo-caption { font-size: 0.8rem; margin-bottom: 0.5rem; display: block; }
|
|
.book-review-img .image-container { text-align: center; }
|
|
.book-rating-module .description-title { font-size: 1.25rem; margin-left: 0; text-align: center; }
|
|
"""
|
|
|
|
def preprocess_html(self, soup):
|
|
h1 = soup.find(class_="article-title")
|
|
book_cover = soup.find("ul", class_="book-review-img")
|
|
if book_cover:
|
|
for li in book_cover.find_all("li"):
|
|
li.name = "div"
|
|
book_cover.name = "div"
|
|
if h1:
|
|
book_cover.insert_before(h1.extract())
|
|
return soup
|
|
|
|
def parse_index(self):
|
|
issue_url = "https://www.kirkusreviews.com/magazine/current/"
|
|
soup = self.index_to_soup(issue_url)
|
|
issue = soup.find(name="article", class_="issue-container")
|
|
cover_img = issue.select(".issue-header .cover-image img")
|
|
if cover_img:
|
|
self.cover_url = cover_img[0]["src"]
|
|
|
|
h1 = issue.find("h1")
|
|
if h1:
|
|
self.timefmt = f" [{self.tag_to_string(h1)}]" # edition
|
|
|
|
articles = {}
|
|
for book_ele in soup.find_all(name="div", class_="issue-featured-book"):
|
|
link = book_ele.find("a")
|
|
if not link:
|
|
continue
|
|
section = self.tag_to_string(book_ele.find("h3")).upper()
|
|
articles.setdefault(section, []).append(
|
|
{"url": urljoin(issue_url, link["href"]), "title": link["title"]}
|
|
)
|
|
for post_ele in issue.select("div.issue-more-posts ul li div.lead-text"):
|
|
link = post_ele.find("a")
|
|
if not link:
|
|
continue
|
|
section = self.tag_to_string(post_ele.find(class_="lead-text-type")).upper()
|
|
articles.setdefault(section, []).append(
|
|
{
|
|
"url": urljoin(issue_url, link["href"]),
|
|
"title": self.tag_to_string(link),
|
|
}
|
|
)
|
|
for section_ele in issue.select("section.reviews-section"):
|
|
section_articles = []
|
|
for review in section_ele.select("ul li.starred"):
|
|
link = review.select("h4 a")
|
|
if not link:
|
|
continue
|
|
description = review.find("p")
|
|
section_articles.append(
|
|
{
|
|
"url": urljoin(issue_url, link[0]["href"]),
|
|
"title": self.tag_to_string(link[0]),
|
|
"description": ""
|
|
if not description
|
|
else self.tag_to_string(description),
|
|
}
|
|
)
|
|
if not section_articles:
|
|
continue
|
|
section = self.tag_to_string(section_ele.find("h3")).upper()
|
|
if section not in articles:
|
|
articles[section] = []
|
|
articles.setdefault(section, []).extend(section_articles)
|
|
|
|
return articles.items()
|