mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
118 lines
4.0 KiB
Plaintext
118 lines
4.0 KiB
Plaintext
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = 'Ralf Hein - ralfhein at GMX dot DE'
|
|
'''
|
|
Heise Select Magazine - iX
|
|
'''
|
|
|
|
|
|
class heise_select(BasicNewsRecipe):
|
|
issue = None
|
|
# overwrite this for easy download of previous issues
|
|
# issue = '/select/ix/2020/3'
|
|
|
|
title = 'iX'
|
|
timefmt = ''
|
|
__author__ = 'Ralf Hein'
|
|
needs_subscription = True
|
|
description = 'Das iX Magazin als ePaper. Benötigt Heise Plus Digitalabo (siehe https://www.heise.de/plus/)'
|
|
publisher = 'Heise Verlag'
|
|
authors = 'Heise Verlag'
|
|
category = 'it'
|
|
tags = 'Magazin, IT, computer, ix'
|
|
publication_type = 'magazine'
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
compress_news_images = True
|
|
encoding = 'utf-8'
|
|
language = 'de'
|
|
|
|
conversion_options = {
|
|
'base_font_size': 10,
|
|
'no_inline_navbars': True,
|
|
'language': language,
|
|
'publisher': publisher,
|
|
'authors': publisher
|
|
}
|
|
|
|
# some code cleanup
|
|
remove_tags = [
|
|
dict(name='meta'),
|
|
dict(name='link', attrs={'rel': 'icon'}),
|
|
dict(name='link', attrs={'rel': 'dns-prefetch'}),
|
|
dict(name='link', attrs={'rel': 'preconnect'}),
|
|
dict(name='div', attrs={'class': 'meta__group--issue'}),
|
|
dict(name='p', attrs={'class': 'comment'}),
|
|
dict(name='div', attrs={'class': 'pswp'}),
|
|
dict(name='div', attrs={'class': 'bottom-links'}),
|
|
]
|
|
|
|
# content is neatly within <main> element
|
|
remove_tags_before = [dict(name='main')]
|
|
remove_tags_after = [dict(name='main')]
|
|
|
|
def parse_index(self):
|
|
baseref = 'https://www.heise.de'
|
|
# find current issue if not defined
|
|
if self.issue is None:
|
|
soup = self.index_to_soup(baseref + '/select')
|
|
sec = soup.find('section', attrs={'class': 'magazine--ix'})
|
|
self.issue = sec.find(
|
|
'a', attrs={'class': 'magazine__link--issue'}, href=True
|
|
)['href']
|
|
|
|
issue_num = self.issue.replace('/select/ix/', '')
|
|
# fix title with issue number to keep them neatly organised
|
|
self.title += ' ' + issue_num.replace('/', '-')
|
|
self.cover_url = 'https://www.heise.de/select/thumbnail/ix/' + issue_num
|
|
|
|
soup = self.index_to_soup(baseref + self.issue)
|
|
toc = []
|
|
|
|
for h3 in soup.findAll('h3', attrs={'class': 'xp__inhalt__title'}):
|
|
section_title = h3.text
|
|
articles = []
|
|
ul = h3.find_next('ul')
|
|
|
|
for li in ul.findAll('li', attrs={'class': 'xp__toc__item'}):
|
|
article_uri = li.find('a', attrs={'class': 'xp__link'})['href']
|
|
article_title = li.find(
|
|
'span', attrs={
|
|
'class': 'xp__toc__item-subtitle'
|
|
}
|
|
).text
|
|
article = {'title': article_title, 'url': baseref + article_uri}
|
|
articles.append(article)
|
|
toc.append((section_title, articles))
|
|
|
|
return toc
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
if self.username is not None and self.password is not None:
|
|
loginURL = 'https://www.heise.de/sso/login?forward=%2Fselect'
|
|
br.open(loginURL)
|
|
br.select_form(action='/sso/login/login')
|
|
br['username'] = self.username
|
|
br['password'] = self.password
|
|
br.submit()
|
|
|
|
return br
|
|
|
|
def preprocess_html(self, soup):
|
|
# images are dynamically sized via js + a-img tag, epub can not work with this
|
|
# construct ordinary img from it
|
|
for aimg in soup.findAll('a', attrs={'class': 'js-pswp-image'}):
|
|
if aimg['href'] is not None and aimg['data-pswp-bu'] is not None:
|
|
img = soup.new_tag(
|
|
'img',
|
|
src=aimg['href'],
|
|
alt=aimg['data-pswp-bu'],
|
|
style="display: block;"
|
|
)
|
|
if img is not None:
|
|
aimg.replaceWith(img)
|
|
|
|
return soup
|