mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-01-03 10:40:21 -05:00
79 lines
2.9 KiB
Python
79 lines
2.9 KiB
Python
from urllib.parse import urljoin
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
_issue_url = ''
|
|
|
|
|
|
class BookforumMagazine(BasicNewsRecipe):
|
|
title = 'Bookforum'
|
|
description = (
|
|
'Bookforum is an American book review magazine devoted to books and '
|
|
'the discussion of literature. https://www.bookforum.com/print'
|
|
)
|
|
language = 'en'
|
|
__author__ = 'ping'
|
|
publication_type = 'magazine'
|
|
encoding = 'utf-8'
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
auto_cleanup = False
|
|
compress_news_images = True
|
|
compress_news_images_auto_size = 8
|
|
|
|
keep_only_tags = [dict(class_='blog-article')]
|
|
remove_tags = [dict(name=['af-share-toggle', 'af-related-articles'])]
|
|
|
|
extra_css = '''
|
|
.blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
|
|
.blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
|
.blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
|
|
.blog-article__book-info { margin: 1rem 0; }
|
|
.article-image-container img, .blog-article__publication-media img {
|
|
display: block; max-width: 100%; height: auto;
|
|
}
|
|
.blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
|
|
'''
|
|
|
|
def preprocess_html(self, soup):
|
|
# strip away links that's not needed
|
|
for ele in soup.select('.blog-article__header a'):
|
|
ele.unwrap()
|
|
return soup
|
|
|
|
def parse_index(self):
|
|
soup = self.index_to_soup(
|
|
_issue_url if _issue_url else 'https://www.bookforum.com/print'
|
|
)
|
|
meta_ele = soup.find('meta', property='og:title')
|
|
if meta_ele:
|
|
self.timefmt = f' [{meta_ele["content"]}]'
|
|
|
|
cover_ele = soup.find('img', class_='toc-issue__cover')
|
|
if cover_ele:
|
|
self.cover_url = urljoin(
|
|
'https://www.bookforum.com',
|
|
soup.find('img', class_='toc-issue__cover')['src'],
|
|
)
|
|
|
|
articles = {}
|
|
for sect_ele in soup.find_all('div', class_='toc-articles__section'):
|
|
section_name = self.tag_to_string(
|
|
sect_ele.find('a', class_='toc__anchor-links__link')
|
|
)
|
|
for article_ele in sect_ele.find_all('article'):
|
|
title_ele = article_ele.find('h1')
|
|
sub_title_ele = article_ele.find(class_='toc-article__subtitle')
|
|
articles.setdefault(section_name, []).append(
|
|
{
|
|
'title': self.tag_to_string(title_ele),
|
|
'url': article_ele.find('a', class_='toc-article__link')[
|
|
'href'
|
|
],
|
|
'description': self.tag_to_string(sub_title_ele)
|
|
if sub_title_ele
|
|
else '',
|
|
}
|
|
)
|
|
return articles.items()
|