__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' dw-world.de ''' from calibre.web.feeds.news import BasicNewsRecipe class DeutscheWelle_es(BasicNewsRecipe): title = 'Deutsche Welle' __author__ = 'Darko Miletic' description = 'Noticias desde Alemania y mundo' publisher = 'Deutsche Welle' category = 'news, politics, Germany' oldest_article = 1 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True language = 'de' publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' extra_css = """ body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} .caption{font-size: x-small; display: block; margin-bottom: 0.4em} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher': publisher , 'language' : language } remove_tags = [ dict(name=['iframe','embed','object','form','base','meta','link']) ,dict(attrs={'class':'actionFooter'}) ] keep_only_tags=[dict(attrs={'class':'ArticleDetail detail'})] remove_attributes = ['height','width','onclick','border','lang'] feeds = [(u'Noticias', u'http://rss.dw-world.de/rdf/rss-sp-all')] def print_version(self, url): artl = url.rpartition('/')[2] return 'http://www.dw-world.de/popups/popup_printcontent/' + artl def preprocess_html(self, soup): for item in soup.findAll('a'): limg = item.find('img') if item.string is not None: str = item.string item.replaceWith(str) else: if limg: item.name = 'div' del item['href'] if item.has_key('target'): del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) return soup