__license__ = 'GPL v3' __copyright__ = '2009-2011, Darko Miletic ' ''' exiledonline.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Exiled(BasicNewsRecipe): title = 'Exiled Online' __author__ = 'Darko Miletic' description = "Mankind's only alternative since 1997 - Formerly known as The eXile" publisher = 'Exiled Online' category = 'news, politics, international' oldest_article = 15 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf8' remove_javascript = True language = 'en' publication_type = 'newsblog' masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' extra_css = """ body{font-family: Arial,Helvetica,sans-serif} #topslug{font-size: xx-large; font-weight: bold; color: red} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [dict(name='div', attrs={'id': 'main'})] remove_tags = [ dict(name=['object', 'link']), dict(name='div', attrs={'class': 'info'}), dict( name='div', attrs={'id': ['comments', 'navig']}) ] feeds = [(u'Articles', u'http://exiledonline.com/feed/')] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup def get_article_url(self, article): raw = article.get('link', None) final = raw + 'all/1/' return final