Update Frankfurter Rundschau

Fixes #1690340 [Download of 'Frankfurter Rundschau' has stopped working](https://bugs.launchpad.net/calibre/+bug/1690340)
This commit is contained in:
Kovid Goyal 2017-05-21 09:14:25 +05:30
parent a24f489822
commit fa7d11772f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,8 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Christian Schmitt'
''' '''
fr-online.de fr-online.de
''' '''
@ -10,14 +7,16 @@ fr-online.de
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class FROnlineRecipe(BasicNewsRecipe): def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class FR(BasicNewsRecipe):
title = 'Frankfurter Rundschau' title = 'Frankfurter Rundschau'
__author__ = 'maccs' __author__ = 'Kovid Goyal'
description = 'Nachrichten aus D und aller Welt' description = 'Nachrichten aus D und aller Welt'
encoding = 'utf-8'
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
category = 'news, germany, world'
language = 'de' language = 'de'
publication_type = 'newspaper' publication_type = 'newspaper'
use_embedded_content = False use_embedded_content = False
@ -25,52 +24,36 @@ class FROnlineRecipe(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
oldest_article = 1 # Increase this number if you're interested in older articles oldest_article = 1 # Increase this number if you're interested in older articles
max_articles_per_feed = 50 # Seems a reasonable number to me max_articles_per_feed = 50 # Seems a reasonable number to me
extra_css = ''' encoding = 'cp1252'
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
.p--heading-1 {font-weight: bold;}
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
'''
keep_only_tags = [{'class': 'ArticleHeadlineH1'},
{'class': 'article_text'}]
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
cover_margins = (100, 150, '#ffffff')
feeds = [] keep_only_tags = [
feeds.append( dict(id='fcms_page_main'),
('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml')) ]
feeds.append( remove_tags = [
('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml')) dict(name='footer'),
feeds.append( dict(id='comments'),
('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml')) ]
feeds.append(
('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
feeds.append(
('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
feeds.append(('Eintracht Frankfurt',
u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
feeds.append(('Kultur und Medien',
u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
feeds.append(
('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
feeds.append(
('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
feeds.append(
('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
feeds.append(
('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
feeds.append(
('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
feeds.append(
('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
feeds.append(
('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
feeds.append(
('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
feeds.append(
('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
feeds.append(
('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
def print_version(self, url): feeds = [
return url.replace('index.html', 'view/printVersion/-/index.html') ('Startseite', u'http://www.fr.de/?_XML=rss'),
('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'),
('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'),
('Politik', 'https://www.fr.de/politik/?_XML=rss'),
('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'),
('Sport', 'https://www.fr.de/sport/?_XML=rss'),
('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'),
('Kultur', 'https://www.fr.de/kultur/?_XML=rss'),
('Wissen', 'https://www.fr.de/wissen/?_XML=rss'),
('Leben', 'https://www.fr.de/leben/?_XML=rss'),
('Panorama', 'https://www.fr.de/panorama/?_XML=rss'),
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
main = soup.find(id='fcms_page_main')
for i, tag in tuple(enumerate(main)):
if getattr(tag, 'name', None):
main.replaceWith(tag)
break
return soup