diff --git a/recipes/bild_de.recipe b/recipes/bild_de.recipe index ad16b94405..7434b1d47b 100644 --- a/recipes/bild_de.recipe +++ b/recipes/bild_de.recipe @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- from calibre.web.feeds.recipes import BasicNewsRecipe class AdvancedUserRecipe1303841067(BasicNewsRecipe): + title = u'Bild.de' __author__ = 'schuster' oldest_article = 1 - max_articles_per_feed = 50 + max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False language = 'de' @@ -12,11 +13,25 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): # get cover from myspace cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg' + masthead_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg' # set what to fetch on the site remove_tags_before = dict(name = 'h2', attrs={'id':'cover'}) remove_tags_after = dict(name ='div', attrs={'class':'back'}) + +# remove things on the site that we don't want + remove_tags = [dict(name='div', attrs={'class':'credit'}), + dict(name='div', attrs={'class':'index'}), + dict(name='div', attrs={'id':'zstart31'}), + dict(name='div', attrs={'class':'hentry'}), + dict(name='div', attrs={'class':'back'}), + dict(name='div', attrs={'class':'pagination'}), + dict(name='div', attrs={'class':'header'}), + dict(name='div', attrs={'class':'element floatL'}), + dict(name='div', attrs={'class':'stWrap'}) +] + # thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code) # this one removes a lot of direct-link's def preprocess_html(self, soup): @@ -42,5 +57,18 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): (u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'), (u'Sport', u'http://rss.bild.de/bild-sport.xml'), (u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'), - (u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml') + (u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml'), + (u'Reg. - Berlin', u'http://rss.bild.de/bild-berlin.xml'), + (u'Reg. - Bremen', u'http://rss.bild.de/bild-bremen.xml'), + (u'Reg. - Dresden', u'http://rss.bild.de/bild-dresden.xml'), + (u'Reg. - Düsseldorf', u'http://rss.bild.de/bild-duesseldorf.xml'), + (u'Reg. - Frankfurt-Main', u'http://rss.bild.de/bild-frankfurt-main.xml'), + (u'Reg. - Hamburg', u'http://rss.bild.de/bild-hamburg.xml'), + (u'Reg. - Hannover', u'http://rss.bild.de/bild-hannover.xml'), + (u'Reg. - Köln', u'http://rss.bild.de/bild-koeln.xml'), + (u'Reg. - Leipzig', u'http://rss.bild.de/bild-leipzig.xml'), + (u'Reg. - München', u'http://rss.bild.de/bild-muenchen.xml'), + (u'Reg. - Ruhrgebiet', u'http://rss.bild.de/bild-ruhrgebiet.xml'), + (u'Reg. - Stuttgart', u'http://rss.bild.de/bild-stuttgart.xml') ] +