mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Brand Eins
This commit is contained in:
commit
4f307ddae1
@ -14,7 +14,7 @@ from collections import OrderedDict
|
|||||||
class BrandEins(BasicNewsRecipe):
|
class BrandEins(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'brand eins'
|
title = u'brand eins'
|
||||||
__author__ = 'Nikolas Mangold-Takao'
|
__author__ = 'Nikolas Mangold-Takao, Thomas Schlenkhoff'
|
||||||
language = 'de'
|
language = 'de'
|
||||||
description = u'brand eins beschreibt den momentanen Wandel in Wirtschaft und Gesellschaft.'
|
description = u'brand eins beschreibt den momentanen Wandel in Wirtschaft und Gesellschaft.'
|
||||||
publisher = u'brand eins Verlag GmbH & Co. oHG'
|
publisher = u'brand eins Verlag GmbH & Co. oHG'
|
||||||
@ -37,9 +37,11 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
keep_only_tags = dict(name='div', attrs={'id':'content'})
|
keep_only_tags = dict(name='div', attrs={'id':'content'})
|
||||||
|
|
||||||
# remove share image from articles
|
# remove share image from articles
|
||||||
remove_tags = [dict(name='div', attrs={'class':'advertisement rectangle desktop'}),
|
remove_tags = [dict(name='div', attrs={'id':'oms_gpt_billboard'}),
|
||||||
|
dict(name='div', attrs={'id':'oms_gpt_rectangle'}),
|
||||||
dict(name='h3', attrs={'class':'sharing-headline'}),
|
dict(name='h3', attrs={'class':'sharing-headline'}),
|
||||||
dict(name='div', attrs={'class':'sharing-links'})]
|
dict(name='div', attrs={'class':'sharing-links'}),
|
||||||
|
dict(name='aside', attrs={'class':'articleAside'})]
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', attrs={'class':'innerContent typeArticle'})
|
remove_tags_before = dict(name='div', attrs={'class':'innerContent typeArticle'})
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'socialshareprivacy'})
|
remove_tags_after = dict(name='div', attrs={'id':'socialshareprivacy'})
|
||||||
@ -72,9 +74,10 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
self.log('- ', year, month, title, link.get('href'))
|
self.log('- ', year, month, title, link.get('href'))
|
||||||
|
|
||||||
# Issue 1 (most recent) has only few articles online,
|
# Issue 1 (most recent) has only few articles online,
|
||||||
# Issue 2 (2nd recent) is not completely online.
|
# Issue 2 and 3 (2nd and 3rd recent) is not completely online.
|
||||||
# Issue 3 (3rd recent) is completely online, hence i == 2
|
# Issue 4 (4th recent) is completely online, hence i == 3
|
||||||
if issue == "" and i == 2:
|
|
||||||
|
if issue == "" and i == 3:
|
||||||
issue = yyyymm
|
issue = yyyymm
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
@ -107,15 +110,12 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
# the index does not contain a usable cover, but the 'Welt in Zahlen'-article contains it
|
# the index does not contain a usable cover, but the 'Welt in Zahlen'-article contains it
|
||||||
cover_article = "{}/{}".format(self.issue_url, 'die-welt-in-zahlen.html')
|
cover_article = "{}{}".format(self.issue_url, 'die-welt-in-zahlen.html')
|
||||||
self.log('Cover article URL: %s' % cover_article)
|
self.log('Cover article URL: %s' % cover_article)
|
||||||
soup = self.index_to_soup(cover_article)
|
soup = self.index_to_soup(cover_article)
|
||||||
cover_meta = soup.find('meta', attrs={'property':'og:image'})
|
img = soup.find('section', 'asideSection no-content').find('img')
|
||||||
if cover_meta:
|
self.log('Found cover image url: %s' % img['src'])
|
||||||
return cover_meta['content']
|
return (self.PREFIX + img['src'])
|
||||||
else:
|
|
||||||
self.log('ERROR: Could not return cover url')
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
return raw_html.replace('<p>• ', '<p>')
|
return raw_html.replace('<p>• ', '<p>')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user