calibre/recipes/lalibre_be.recipe

#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2008-2011, Lionel Bergeret <lbergeret at gmail.com>'
'''
lalibre.be
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class LaLibre(BasicNewsRecipe):
    title                 = u'La Libre Belgique'
    __author__            = u'Lionel Bergeret'
    description           = u'News from Belgium in French'
    publisher             = u'lalibre.be'
    category              = 'news, Belgium'
    oldest_article        = 3
    language              = 'fr'
    masthead_url          = 'http://www.lalibre.be/img/logoLaLibre.gif'

    max_articles_per_feed = 20
    no_stylesheets        = True
    use_embedded_content  = False
    timefmt               = ' [%d %b %Y]'

    remove_tags_before = dict(name = 'div', attrs = {'class': 'extraMainContent'})
    remove_tags_after  = dict(name = 'div', attrs = {'id': 'articleText'})

    remove_tags = [dict(name = 'div', attrs = {'id': 'strongArticleLinks'})]

    feeds = [
         (u'L\'actu'           , u'http://www.lalibre.be/rss/?section=10' )
        ,(u'Culture'           , u'http://www.lalibre.be/rss/?section=5' )
        ,(u'Economie'          , u'http://www.lalibre.be/rss/?section=3' )
        ,(u'Libre Entreprise'  , u'http://www.lalibre.be/rss/?section=904' )
        ,(u'Sports'            , u'http://www.lalibre.be/rss/?section=2' )
        ,(u'Societe'           , u'http://www.lalibre.be/rss/?section=12' )
    ]

    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup

    def get_cover_url(self):
        cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg')
        return cover_url