__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' cubadebate.cu ''' from calibre.web.feeds.news import BasicNewsRecipe class CubaDebate(BasicNewsRecipe): title = 'CubaDebate' __author__ = 'Darko Miletic' description = 'Contra el Terorismo Mediatico' oldest_article = 15 language = 'es_CU' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False publisher = 'Cubadebate' category = 'news, politics, Cuba' encoding = 'utf-8' masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif' publication_type = 'newsportal' extra_css = """ #BlogTitle{font-size: xx-large; font-weight: bold} body{font-family: Verdana, Arial, Tahoma, sans-serif} """ conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher } keep_only_tags = [dict(name='div', attrs={'id': 'Outline'})] remove_tags_after = dict(name='div', attrs={'id': 'BlogContent'}) remove_tags = [ dict(name=['link', 'base', 'embed', 'object', 'meta', 'iframe']), dict( attrs={'id': 'addthis_container'}) ] feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')] remove_attributes = ['width', 'height', 'lang'] def print_version(self, url): return url + 'print/' def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll('img', alt=False): item['alt'] = 'image' return soup