From fa7d11772f2a7831b5d2f4e66ad43634bcfdca7b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 21 May 2017 09:14:25 +0530 Subject: [PATCH] Update Frankfurter Rundschau Fixes #1690340 [Download of 'Frankfurter Rundschau' has stopped working](https://bugs.launchpad.net/calibre/+bug/1690340) --- recipes/frankfurter_rundschau.recipe | 95 ++++++++++++---------------- 1 file changed, 39 insertions(+), 56 deletions(-) diff --git a/recipes/frankfurter_rundschau.recipe b/recipes/frankfurter_rundschau.recipe index 86d806b7fe..e3b9f68200 100644 --- a/recipes/frankfurter_rundschau.recipe +++ b/recipes/frankfurter_rundschau.recipe @@ -1,8 +1,5 @@ #!/usr/bin/env python2 -__license__ = 'GPL v3' -__copyright__ = '2010-2011, Christian Schmitt' - ''' fr-online.de ''' @@ -10,14 +7,16 @@ fr-online.de from calibre.web.feeds.recipes import BasicNewsRecipe -class FROnlineRecipe(BasicNewsRecipe): +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +class FR(BasicNewsRecipe): title = 'Frankfurter Rundschau' - __author__ = 'maccs' + __author__ = 'Kovid Goyal' description = 'Nachrichten aus D und aller Welt' - encoding = 'utf-8' - masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' - publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH' - category = 'news, germany, world' language = 'de' publication_type = 'newspaper' use_embedded_content = False @@ -25,52 +24,36 @@ class FROnlineRecipe(BasicNewsRecipe): no_stylesheets = True oldest_article = 1 # Increase this number if you're interested in older articles max_articles_per_feed = 50 # Seems a reasonable number to me - extra_css = ''' - body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;} - .imgSubline{background-color: #f4f4f4; font-size: 0.8em;} - .p--heading-1 {font-weight: bold;} - .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;} - ''' - keep_only_tags = [{'class': 'ArticleHeadlineH1'}, - {'class': 'article_text'}] - cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' - cover_margins = (100, 150, '#ffffff') + encoding = 'cp1252' - feeds = [] - feeds.append( - ('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml')) - feeds.append( - ('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml')) - feeds.append( - ('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml')) - feeds.append( - ('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml')) - feeds.append(('Eintracht Frankfurt', - u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml')) - feeds.append(('Kultur und Medien', - u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml')) - feeds.append( - ('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml')) - feeds.append( - ('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml')) - feeds.append( - ('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml')) - feeds.append( - ('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml')) - feeds.append( - ('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml')) - feeds.append( - ('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml')) - feeds.append( - ('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml')) - feeds.append( - ('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')) + keep_only_tags = [ + dict(id='fcms_page_main'), + ] + remove_tags = [ + dict(name='footer'), + dict(id='comments'), + ] - def print_version(self, url): - return url.replace('index.html', 'view/printVersion/-/index.html') + feeds = [ + ('Startseite', u'http://www.fr.de/?_XML=rss'), + ('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'), + ('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'), + ('Politik', 'https://www.fr.de/politik/?_XML=rss'), + ('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'), + ('Sport', 'https://www.fr.de/sport/?_XML=rss'), + ('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'), + ('Kultur', 'https://www.fr.de/kultur/?_XML=rss'), + ('Wissen', 'https://www.fr.de/wissen/?_XML=rss'), + ('Leben', 'https://www.fr.de/leben/?_XML=rss'), + ('Panorama', 'https://www.fr.de/panorama/?_XML=rss'), + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + main = soup.find(id='fcms_page_main') + for i, tag in tuple(enumerate(main)): + if getattr(tag, 'name', None): + main.replaceWith(tag) + break + return soup