calibre/recipes/brecha.recipe

from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.brecha.com.uy
'''

try:
    from urllib.parse import urlencode, quote
except ImportError:
    from urllib import urlencode, quote
from calibre.web.feeds.news import BasicNewsRecipe


class Brecha(BasicNewsRecipe):
    title = 'Brecha Digital'
    __author__ = 'Darko Miletic'
    description = 'Brecha , Cultura ,Sociales , Separatas, Lupas, Vueltas de Montevideo y toda la infomacion que caracteriza a este semanario'
    publisher = 'Brecha'
    category = 'brecha, digital, prensa, uruguay, semanario, sociedad, politica, cultura'
    oldest_article = 7
    max_articles_per_feed = 200
    no_stylesheets = True
    encoding = 'utf8'
    use_embedded_content = False
    language = 'es_UY'
    remove_empty_feeds = True
    publication_type = 'magazine'
    auto_cleanup = True
    needs_subscription = 'optional'
    masthead_url = 'http://www.brecha.com.uy/templates/ja_nex/themes/orange/images/logo.png'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                            """

    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.brecha.com.uy/index.php/acceder-miembros')
        if self.username is not None and self.password is not None:
            data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password
                                     })
            br.open(
                'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data)
        return br

    remove_tags = [
        dict(name=['meta', 'link']),
        dict(name='div', attrs={'id': 'js_ja'}),
        dict(name='ul', attrs={'class': 'actions'})
    ]
    remove_attributes = ['lang', 'border']

    feeds = [
        (u'Politica', u'http://www.brecha.com.uy/index.php/politica-uruguaya?format=feed&type=rss'),
        (u'Mundo', u'http://www.brecha.com.uy/index.php/mundo?format=feed&type=rss'),
        (u'Mapamundi', u'http://www.brecha.com.uy/index.php/mundo/mapamundi?format=feed&type=rss'),
        (u'Cultura', u'http://www.brecha.com.uy/index.php/cultura?format=feed&type=rss'),
        (u'Vueltas de Montevideo',
         u'http://www.brecha.com.uy/index.php/cultura/vueltas-de-montevideo?format=feed&type=rss'),
        (u'Secos y Mojados', u'http://www.brecha.com.uy/index.php/cultura/secos-y-mojados?format=feed&type=rss'),
        (u'Literarias', u'http://www.brecha.com.uy/index.php/cultura/literarias?format=feed&type=rss'),
        (u'Sociedad', u'http://www.brecha.com.uy/index.php/sociedad?format=feed&type=rss'),
        (u'Especiales', u'http://www.brecha.com.uy/index.php/especiales?format=feed&type=rss'),
        (u'Contratapa', u'http://www.brecha.com.uy/index.php/contratapa?format=feed&type=rss')
    ]

    def print_version(self, url):
        return url + '?tmpl=component&print=1&layout=default&page='

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.brecha.com.uy/index.php')
        for image in soup.findAll('img', alt=True):
            if image['alt'].startswith('Tapa '):
                return 'http://www.brecha.com.uy' + quote(image['src'])
        return None