diff --git a/resources/recipes/cinebel_be.recipe b/resources/recipes/cinebel_be.recipe index ec76bfc894..024050eb67 100644 --- a/resources/recipes/cinebel_be.recipe +++ b/resources/recipes/cinebel_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' cinebel.be ''' @@ -14,14 +14,14 @@ class Cinebel(BasicNewsRecipe): description = u'Cinema news from Belgium in French' publisher = u'cinebel.be' category = 'news, cinema, movie, Belgium' - oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + oldest_article = 15 + language = 'fr' max_articles_per_feed = 20 no_stylesheets = True use_embedded_content = False timefmt = ' [%d %b %Y]' + filterDuplicates = True keep_only_tags = [ dict(name = 'span', attrs = {'class': 'movieMainTitle'}) @@ -35,6 +35,13 @@ class Cinebel(BasicNewsRecipe): ,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.has_key('href'): + tstr = "Site officiel: " + alink['href'] + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif' return cover_url diff --git a/resources/recipes/dhnet_be.recipe b/resources/recipes/dhnet_be.recipe index ef4d1736e3..d55470a765 100644 --- a/resources/recipes/dhnet_be.recipe +++ b/resources/recipes/dhnet_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' dhnet.be ''' @@ -16,7 +16,8 @@ class DHNetBe(BasicNewsRecipe): publisher = u'dhnet.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.dhnet.be/images/homepage_logo_dh.gif' max_articles_per_feed = 20 no_stylesheets = True @@ -34,6 +35,13 @@ class DHNetBe(BasicNewsRecipe): ,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg') return cover_url diff --git a/resources/recipes/lalibre_be.recipe b/resources/recipes/lalibre_be.recipe index 53e346bf12..a6356be828 100644 --- a/resources/recipes/lalibre_be.recipe +++ b/resources/recipes/lalibre_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lalibre.be ''' @@ -16,18 +16,18 @@ class LaLibre(BasicNewsRecipe): publisher = u'lalibre.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.lalibre.be/img/logoLaLibre.gif' max_articles_per_feed = 20 no_stylesheets = True use_embedded_content = False timefmt = ' [%d %b %Y]' - keep_only_tags = [ - dict(name = 'div', attrs = {'id': 'articleHat'}) - ,dict(name = 'p', attrs = {'id': 'publicationDate'}) - ,dict(name = 'div', attrs = {'id': 'articleText'}) - ] + remove_tags_before = dict(name = 'div', attrs = {'class': 'extraMainContent'}) + remove_tags_after = dict(name = 'div', attrs = {'id': 'articleText'}) + + remove_tags = [dict(name = 'div', attrs = {'id': 'strongArticleLinks'})] feeds = [ (u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' ) @@ -38,6 +38,13 @@ class LaLibre(BasicNewsRecipe): ,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg') return cover_url diff --git a/resources/recipes/lameuse_be.recipe b/resources/recipes/lameuse_be.recipe index 03b7f84a5f..7166d01103 100644 --- a/resources/recipes/lameuse_be.recipe +++ b/resources/recipes/lameuse_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lameuse.be ''' @@ -16,8 +16,8 @@ class LaMeuse(BasicNewsRecipe): publisher = u'lameuse.be' category = 'news, Belgium' oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.lameuse.be/images/SPV3/logo_header_LM.gif' max_articles_per_feed = 20 no_stylesheets = True @@ -32,6 +32,11 @@ class LaMeuse(BasicNewsRecipe): dict(name = 'div', attrs = {'class': 'sb-group'}) ,dict(name = 'div', attrs = {'id': 'share'}) ,dict(name = 'div', attrs = {'id': 'commentaires'}) + ,dict(name = 'ul', attrs = {'class': 'right liensutiles'}) + ,dict(name = 'ul', attrs = {'class': 'bas liensutiles'}) + ,dict(name = 'p', attrs = {'class': 'ariane'}) + ,dict(name = 'div', attrs = {'class': 'inner-bloc'}) + ,dict(name = 'div', attrs = {'class': 'block-01'}) ] feeds = [ diff --git a/resources/recipes/lavenir_be.recipe b/resources/recipes/lavenir_be.recipe index 68be449ae5..4c2c8a00a2 100644 --- a/resources/recipes/lavenir_be.recipe +++ b/resources/recipes/lavenir_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lavenir.net ''' @@ -15,8 +15,7 @@ class LAvenir(BasicNewsRecipe): publisher = u'lavenir.net' category = 'news, Belgium' oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + language = 'fr' max_articles_per_feed = 20 no_stylesheets = True @@ -35,6 +34,13 @@ class LAvenir(BasicNewsRecipe): ,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1§ion=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF' return cover_url diff --git a/resources/recipes/lesoir_be.recipe b/resources/recipes/lesoir_be.recipe index 6b6891c3b8..64fd2fa65c 100644 --- a/resources/recipes/lesoir_be.recipe +++ b/resources/recipes/lesoir_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lesoir.be ''' @@ -16,7 +16,8 @@ class LeSoirBe(BasicNewsRecipe): publisher = u'lesoir.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://pdf.lesoir.be/pdf/images/SOIR//logo.gif' max_articles_per_feed = 20 no_stylesheets = True