from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2012, Darko Miletic ' ''' www.brecha.com.uy ''' try: from urllib.parse import urlencode, quote except ImportError: from urllib import urlencode, quote from calibre.web.feeds.news import BasicNewsRecipe class Brecha(BasicNewsRecipe): title = 'Brecha Digital' __author__ = 'Darko Miletic' description = 'Brecha , Cultura ,Sociales , Separatas, Lupas, Vueltas de Montevideo y toda la infomacion que caracteriza a este semanario' publisher = 'Brecha' category = 'brecha, digital, prensa, uruguay, semanario, sociedad, politica, cultura' oldest_article = 7 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'es_UY' remove_empty_feeds = True publication_type = 'magazine' auto_cleanup = True needs_subscription = 'optional' masthead_url = 'http://www.brecha.com.uy/templates/ja_nex/themes/orange/images/logo.png' extra_css = """ body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open('http://www.brecha.com.uy/index.php/acceder-miembros') if self.username is not None and self.password is not None: data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password }) br.open( 'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data) return br remove_tags = [ dict(name=['meta', 'link']), dict(name='div', attrs={'id': 'js_ja'}), dict(name='ul', attrs={'class': 'actions'}) ] remove_attributes = ['lang', 'border'] feeds = [ (u'Politica', u'http://www.brecha.com.uy/index.php/politica-uruguaya?format=feed&type=rss'), (u'Mundo', u'http://www.brecha.com.uy/index.php/mundo?format=feed&type=rss'), (u'Mapamundi', u'http://www.brecha.com.uy/index.php/mundo/mapamundi?format=feed&type=rss'), (u'Cultura', u'http://www.brecha.com.uy/index.php/cultura?format=feed&type=rss'), (u'Vueltas de Montevideo', u'http://www.brecha.com.uy/index.php/cultura/vueltas-de-montevideo?format=feed&type=rss'), (u'Secos y Mojados', u'http://www.brecha.com.uy/index.php/cultura/secos-y-mojados?format=feed&type=rss'), (u'Literarias', u'http://www.brecha.com.uy/index.php/cultura/literarias?format=feed&type=rss'), (u'Sociedad', u'http://www.brecha.com.uy/index.php/sociedad?format=feed&type=rss'), (u'Especiales', u'http://www.brecha.com.uy/index.php/especiales?format=feed&type=rss'), (u'Contratapa', u'http://www.brecha.com.uy/index.php/contratapa?format=feed&type=rss') ] def print_version(self, url): return url + '?tmpl=component&print=1&layout=default&page=' def get_cover_url(self): soup = self.index_to_soup('http://www.brecha.com.uy/index.php') for image in soup.findAll('img', alt=True): if image['alt'].startswith('Tapa '): return 'http://www.brecha.com.uy' + quote(image['src']) return None