Update various French Belgian recipes

This commit is contained in:
Kovid Goyal 2011-02-08 09:07:30 -07:00
parent 2cfc6b1baa
commit c30e5bcaee
6 changed files with 55 additions and 21 deletions

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
cinebel.be cinebel.be
''' '''
@ -14,14 +14,14 @@ class Cinebel(BasicNewsRecipe):
description = u'Cinema news from Belgium in French' description = u'Cinema news from Belgium in French'
publisher = u'cinebel.be' publisher = u'cinebel.be'
category = 'news, cinema, movie, Belgium' category = 'news, cinema, movie, Belgium'
oldest_article = 3 oldest_article = 15
encoding = 'utf8' language = 'fr'
language = 'fr_BE'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
filterDuplicates = True
keep_only_tags = [ keep_only_tags = [
dict(name = 'span', attrs = {'class': 'movieMainTitle'}) dict(name = 'span', attrs = {'class': 'movieMainTitle'})
@ -35,6 +35,13 @@ class Cinebel(BasicNewsRecipe):
,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' ) ,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' )
] ]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.has_key('href'):
tstr = "Site officiel: " + alink['href']
alink.replaceWith(tstr)
return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif' cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif'
return cover_url return cover_url

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
dhnet.be dhnet.be
''' '''
@ -16,7 +16,8 @@ class DHNetBe(BasicNewsRecipe):
publisher = u'dhnet.be' publisher = u'dhnet.be'
category = 'news, Belgium' category = 'news, Belgium'
oldest_article = 3 oldest_article = 3
language = 'fr_BE' language = 'fr'
masthead_url = 'http://www.dhnet.be/images/homepage_logo_dh.gif'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
@ -34,6 +35,13 @@ class DHNetBe(BasicNewsRecipe):
,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' ) ,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' )
] ]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg') cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg')
return cover_url return cover_url

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
lalibre.be lalibre.be
''' '''
@ -16,18 +16,18 @@ class LaLibre(BasicNewsRecipe):
publisher = u'lalibre.be' publisher = u'lalibre.be'
category = 'news, Belgium' category = 'news, Belgium'
oldest_article = 3 oldest_article = 3
language = 'fr_BE' language = 'fr'
masthead_url = 'http://www.lalibre.be/img/logoLaLibre.gif'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
keep_only_tags = [ remove_tags_before = dict(name = 'div', attrs = {'class': 'extraMainContent'})
dict(name = 'div', attrs = {'id': 'articleHat'}) remove_tags_after = dict(name = 'div', attrs = {'id': 'articleText'})
,dict(name = 'p', attrs = {'id': 'publicationDate'})
,dict(name = 'div', attrs = {'id': 'articleText'}) remove_tags = [dict(name = 'div', attrs = {'id': 'strongArticleLinks'})]
]
feeds = [ feeds = [
(u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' ) (u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' )
@ -38,6 +38,13 @@ class LaLibre(BasicNewsRecipe):
,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' ) ,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' )
] ]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg') cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg')
return cover_url return cover_url

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
lameuse.be lameuse.be
''' '''
@ -16,8 +16,8 @@ class LaMeuse(BasicNewsRecipe):
publisher = u'lameuse.be' publisher = u'lameuse.be'
category = 'news, Belgium' category = 'news, Belgium'
oldest_article = 3 oldest_article = 3
encoding = 'utf8' language = 'fr'
language = 'fr_BE' masthead_url = 'http://www.lameuse.be/images/SPV3/logo_header_LM.gif'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
@ -32,6 +32,11 @@ class LaMeuse(BasicNewsRecipe):
dict(name = 'div', attrs = {'class': 'sb-group'}) dict(name = 'div', attrs = {'class': 'sb-group'})
,dict(name = 'div', attrs = {'id': 'share'}) ,dict(name = 'div', attrs = {'id': 'share'})
,dict(name = 'div', attrs = {'id': 'commentaires'}) ,dict(name = 'div', attrs = {'id': 'commentaires'})
,dict(name = 'ul', attrs = {'class': 'right liensutiles'})
,dict(name = 'ul', attrs = {'class': 'bas liensutiles'})
,dict(name = 'p', attrs = {'class': 'ariane'})
,dict(name = 'div', attrs = {'class': 'inner-bloc'})
,dict(name = 'div', attrs = {'class': 'block-01'})
] ]
feeds = [ feeds = [

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
lavenir.net lavenir.net
''' '''
@ -15,8 +15,7 @@ class LAvenir(BasicNewsRecipe):
publisher = u'lavenir.net' publisher = u'lavenir.net'
category = 'news, Belgium' category = 'news, Belgium'
oldest_article = 3 oldest_article = 3
encoding = 'utf8' language = 'fr'
language = 'fr_BE'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
@ -35,6 +34,13 @@ class LAvenir(BasicNewsRecipe):
,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1&section=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' ) ,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1&section=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' )
] ]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF' cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF'
return cover_url return cover_url

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Lionel Bergeret <lbergeret at gmail.com>' __copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
''' '''
lesoir.be lesoir.be
''' '''
@ -16,7 +16,8 @@ class LeSoirBe(BasicNewsRecipe):
publisher = u'lesoir.be' publisher = u'lesoir.be'
category = 'news, Belgium' category = 'news, Belgium'
oldest_article = 3 oldest_article = 3
language = 'fr_BE' language = 'fr'
masthead_url = 'http://pdf.lesoir.be/pdf/images/SOIR//logo.gif'
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True