mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Heise ct and iX by Ralf Hein
This commit is contained in:
parent
a00cde1120
commit
be261dcd71
115
recipes/heise_ct.recipe
Normal file
115
recipes/heise_ct.recipe
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'Ralf Hein - ralfhein at GMX dot DE'
|
||||||
|
'''
|
||||||
|
Heise Select Magazine - ct
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class heise_select(BasicNewsRecipe):
|
||||||
|
issue = None
|
||||||
|
# overwrite this for easy download of previous issues
|
||||||
|
# issue = '/select/ct/2020/8'
|
||||||
|
|
||||||
|
title = 'Heise ct'
|
||||||
|
timefmt = ''
|
||||||
|
__author__ = 'Ralf Hein'
|
||||||
|
needs_subscription = True
|
||||||
|
description = 'Das ct Magazin als ePaper. Benötigt Heise Plus Digitalabo (siehe https://www.heise.de/plus/)'
|
||||||
|
publisher = 'Heise Verlag'
|
||||||
|
authors = 'Heise Verlag'
|
||||||
|
category = 'it'
|
||||||
|
tags = 'Magazin, IT, computer, ct'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
compress_news_images = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'de'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'base_font_size': 10,
|
||||||
|
'no_inline_navbars': True,
|
||||||
|
'language': language,
|
||||||
|
'publisher': publisher,
|
||||||
|
'authors': publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='meta'),
|
||||||
|
dict(name='link', attrs={'rel': 'icon'}),
|
||||||
|
dict(name='link', attrs={'rel': 'dns-prefetch'}),
|
||||||
|
dict(name='link', attrs={'rel': 'preconnect'}),
|
||||||
|
dict(name='div', attrs={'class': 'meta__group--issue'}),
|
||||||
|
dict(name='p', attrs={'class': 'comment'}),
|
||||||
|
dict(name='div', attrs={'class': 'pswp'}),
|
||||||
|
dict(name='div', attrs={'class': 'bottom-links'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = [dict(name='main')]
|
||||||
|
remove_tags_after = [dict(name='main')]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
baseref = 'https://www.heise.de'
|
||||||
|
# find current issue if not defined
|
||||||
|
if self.issue is None:
|
||||||
|
soup = self.index_to_soup(baseref + '/select')
|
||||||
|
sec = soup.find('section', attrs={'class': 'magazine--ct'})
|
||||||
|
self.issue = sec.find('a',
|
||||||
|
attrs={'class': 'magazine__link--issue'},
|
||||||
|
href=True)['href']
|
||||||
|
|
||||||
|
issue_num = self.issue.replace('/select/ct/', '')
|
||||||
|
# fix title with issue number to keep them neatly organised
|
||||||
|
self.title += ' ' + issue_num.replace('/', '-')
|
||||||
|
self.cover_url = 'https://www.heise.de/select/thumbnail/ct/' + issue_num
|
||||||
|
|
||||||
|
soup = self.index_to_soup(baseref + self.issue)
|
||||||
|
toc = []
|
||||||
|
|
||||||
|
for h3 in soup.findAll('h3', attrs={'class': 'xp__inhalt__title'}):
|
||||||
|
section_title = h3.text
|
||||||
|
articles = []
|
||||||
|
ul = h3.find_next('ul')
|
||||||
|
|
||||||
|
for li in ul.findAll('li', attrs={'class': 'xp__toc__item'}):
|
||||||
|
article_uri = li.find('a', attrs={'class': 'xp__link'})['href']
|
||||||
|
article_title = li.find('span',
|
||||||
|
attrs={
|
||||||
|
'class': 'xp__toc__item-subtitle'
|
||||||
|
}).text
|
||||||
|
article = {
|
||||||
|
'title': article_title,
|
||||||
|
'url': baseref + article_uri
|
||||||
|
}
|
||||||
|
articles.append(article)
|
||||||
|
toc.append((section_title, articles))
|
||||||
|
|
||||||
|
return toc
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
loginURL = 'https://www.heise.de/sso/login?forward=%2Fselect'
|
||||||
|
br.open(loginURL)
|
||||||
|
br.select_form(action='/sso/login/login')
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
return br
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# images are dynamically sized via js + a-img tag, epub can not work with this
|
||||||
|
# construct ordinary img from it
|
||||||
|
for aimg in soup.findAll('a', attrs={'class': 'js-pswp-image'}):
|
||||||
|
if aimg['href'] is not None and aimg['data-pswp-bu'] is not None:
|
||||||
|
img = soup.new_tag('img',
|
||||||
|
src=aimg['href'],
|
||||||
|
alt=aimg['data-pswp-bu'],
|
||||||
|
style="display: block;")
|
||||||
|
if img is not None:
|
||||||
|
aimg.replaceWith(img)
|
||||||
|
|
||||||
|
return soup
|
117
recipes/heise_ix.recipe
Normal file
117
recipes/heise_ix.recipe
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'Ralf Hein - ralfhein at GMX dot DE'
|
||||||
|
'''
|
||||||
|
Heise Select Magazine - iX
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class heise_select(BasicNewsRecipe):
|
||||||
|
issue = None
|
||||||
|
# overwrite this for easy download of previous issues
|
||||||
|
# issue = '/select/ix/2020/3'
|
||||||
|
|
||||||
|
title = 'iX'
|
||||||
|
timefmt = ''
|
||||||
|
__author__ = 'Ralf Hein'
|
||||||
|
needs_subscription = True
|
||||||
|
description = 'Das iX Magazin als ePaper. Benötigt Heise Plus Digitalabo (siehe https://www.heise.de/plus/)'
|
||||||
|
publisher = 'Heise Verlag'
|
||||||
|
authors = 'Heise Verlag'
|
||||||
|
category = 'it'
|
||||||
|
tags = 'Magazin, IT, computer, ix'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
compress_news_images = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'de'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'base_font_size': 10,
|
||||||
|
'no_inline_navbars': True,
|
||||||
|
'language': language,
|
||||||
|
'publisher': publisher,
|
||||||
|
'authors': publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
# some code cleanup
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='meta'),
|
||||||
|
dict(name='link', attrs={'rel': 'icon'}),
|
||||||
|
dict(name='link', attrs={'rel': 'dns-prefetch'}),
|
||||||
|
dict(name='link', attrs={'rel': 'preconnect'}),
|
||||||
|
dict(name='div', attrs={'class': 'meta__group--issue'}),
|
||||||
|
dict(name='p', attrs={'class': 'comment'}),
|
||||||
|
dict(name='div', attrs={'class': 'pswp'}),
|
||||||
|
dict(name='div', attrs={'class': 'bottom-links'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
# content is neatly within <main> element
|
||||||
|
remove_tags_before = [dict(name='main')]
|
||||||
|
remove_tags_after = [dict(name='main')]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
baseref = 'https://www.heise.de'
|
||||||
|
# find current issue if not defined
|
||||||
|
if self.issue is None:
|
||||||
|
soup = self.index_to_soup(baseref + '/select')
|
||||||
|
sec = soup.find('section', attrs={'class': 'magazine--ix'})
|
||||||
|
self.issue = sec.find(
|
||||||
|
'a', attrs={'class': 'magazine__link--issue'}, href=True
|
||||||
|
)['href']
|
||||||
|
|
||||||
|
issue_num = self.issue.replace('/select/ix/', '')
|
||||||
|
# fix title with issue number to keep them neatly organised
|
||||||
|
self.title += ' ' + issue_num.replace('/', '-')
|
||||||
|
self.cover_url = 'https://www.heise.de/select/thumbnail/ix/' + issue_num
|
||||||
|
|
||||||
|
soup = self.index_to_soup(baseref + self.issue)
|
||||||
|
toc = []
|
||||||
|
|
||||||
|
for h3 in soup.findAll('h3', attrs={'class': 'xp__inhalt__title'}):
|
||||||
|
section_title = h3.text
|
||||||
|
articles = []
|
||||||
|
ul = h3.find_next('ul')
|
||||||
|
|
||||||
|
for li in ul.findAll('li', attrs={'class': 'xp__toc__item'}):
|
||||||
|
article_uri = li.find('a', attrs={'class': 'xp__link'})['href']
|
||||||
|
article_title = li.find(
|
||||||
|
'span', attrs={
|
||||||
|
'class': 'xp__toc__item-subtitle'
|
||||||
|
}
|
||||||
|
).text
|
||||||
|
article = {'title': article_title, 'url': baseref + article_uri}
|
||||||
|
articles.append(article)
|
||||||
|
toc.append((section_title, articles))
|
||||||
|
|
||||||
|
return toc
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
loginURL = 'https://www.heise.de/sso/login?forward=%2Fselect'
|
||||||
|
br.open(loginURL)
|
||||||
|
br.select_form(action='/sso/login/login')
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
return br
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# images are dynamically sized via js + a-img tag, epub can not work with this
|
||||||
|
# construct ordinary img from it
|
||||||
|
for aimg in soup.findAll('a', attrs={'class': 'js-pswp-image'}):
|
||||||
|
if aimg['href'] is not None and aimg['data-pswp-bu'] is not None:
|
||||||
|
img = soup.new_tag(
|
||||||
|
'img',
|
||||||
|
src=aimg['href'],
|
||||||
|
alt=aimg['data-pswp-bu'],
|
||||||
|
style="display: block;"
|
||||||
|
)
|
||||||
|
if img is not None:
|
||||||
|
aimg.replaceWith(img)
|
||||||
|
|
||||||
|
return soup
|
Loading…
x
Reference in New Issue
Block a user