__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' cubadebate.cu ''' from calibre.web.feeds.news import BasicNewsRecipe class CubaDebate(BasicNewsRecipe): title = 'CubaDebate' __author__ = 'Darko Miletic' description = 'Contra el Terorismo Mediatico' oldest_article = 15 language = 'es_CU' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False publisher = 'Cubadebate' category = 'news, politics, Cuba' encoding = 'utf-8' masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif' publication_type = 'newsportal' extra_css = """ #BlogTitle{font-size: xx-large; font-weight: bold} body{font-family: Verdana, Arial, Tahoma, sans-serif} """ conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } keep_only_tags = [dict(name='div', attrs={'id':'Outline'})] remove_tags_after = dict(name='div',attrs={'id':'BlogContent'}) remove_tags = [ dict(name=['link','base','embed','object','meta','iframe']) ,dict(attrs={'id':'addthis_container'}) ] feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')] remove_attributes=['width','height','lang'] def print_version(self, url): return url + 'print/' def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll('img'): if not item.has_key('alt'): item['alt'] = 'image' return soup