From 5317f8bb9c0acd80576ca577ffc02d33fb138c1e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 May 2011 14:16:41 -0600 Subject: [PATCH] Various German news sources by schuster --- recipes/borse_online.recipe | 33 ++++++++++++++++++++ recipes/capital_de.recipe | 61 +++++++++++++++++++++++++++++++++++++ recipes/impulse_de.recipe | 32 +++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 recipes/borse_online.recipe create mode 100644 recipes/capital_de.recipe create mode 100644 recipes/impulse_de.recipe diff --git a/recipes/borse_online.recipe b/recipes/borse_online.recipe new file mode 100644 index 0000000000..c192ce2b8d --- /dev/null +++ b/recipes/borse_online.recipe @@ -0,0 +1,33 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe +class AdvancedUserRecipe1303841067(BasicNewsRecipe): + + title = u'Börse-online' + __author__ = 'schuster' + oldest_article = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'de' + remove_javascript = True + cover_url = 'http://www.dpv.de/images/1995/source.gif' + masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg' + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + remove_tags_bevor = [dict(name='h3')] + remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})] + remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}), + dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']), + dict(name=['h2', 'Gesamtranking', 'h3',''])] + + def print_version(self, url): + return url.replace('.html#nv=rss', '.html?mode=print') + + + + feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')] + diff --git a/recipes/capital_de.recipe b/recipes/capital_de.recipe new file mode 100644 index 0000000000..6826049bc9 --- /dev/null +++ b/recipes/capital_de.recipe @@ -0,0 +1,61 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe1305470859(BasicNewsRecipe): + title = u'Capital.de' + language = 'de' + __author__ = 'schuster' + oldest_article =7 + max_articles_per_feed = 35 + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg' + cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg' + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + def print_version(self, url): + return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print') + remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})] + remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})] + + feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'), + (u'Unternehmen', u'http://www.capital.de/rss/unternehmen'), + (u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')] + + def append_page(self, soup, appendtag, position): + pager = soup.find('div',attrs={'class':'artikelsplit'}) + if pager: + nexturl = self.INDEX + pager.a['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'class':'printable'}) + for it in texttag.findAll(style=True): + del it['style'] + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('div', attrs={'class':'artikelsplit'}): + item.extract() + self.append_page(soup, soup.body, 3) + pager = soup.find('div',attrs={'class':'artikelsplit'}) + if pager: + pager.extract() + return self.adeify_images(soup) + + + + remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}), + dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']), + dict(span=['ratingtext', 'Gesamtranking', 'h3','']), + dict(rel=['canonical'])] + diff --git a/recipes/impulse_de.recipe b/recipes/impulse_de.recipe new file mode 100644 index 0000000000..d38c0aa6a6 --- /dev/null +++ b/recipes/impulse_de.recipe @@ -0,0 +1,32 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe1305470859(BasicNewsRecipe): + title = u'Impulse.de' + language = 'de' + __author__ = 'schuster' + oldest_article =14 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg' + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + def print_version(self, url): + return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print') + remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})] + remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})] + + feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')] + + + remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}), + dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']), + dict(span=['ratingtext', 'Gesamtranking', 'h3','']), + dict(rel=['canonical'])] +