__license__ = 'GPL v3' __copyright__ = '2009-2011, Darko Miletic ' ''' exiledonline.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Exiled(BasicNewsRecipe): title = 'Exiled Online' __author__ = 'Darko Miletic' description = "Mankind's only alternative since 1997 - Formerly known as The eXile" publisher = 'Exiled Online' category = 'news, politics, international' oldest_article = 15 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf8' remove_javascript = True language = 'en' publication_type = 'newsblog' masthead_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' extra_css = """ body{font-family: Arial,Helvetica,sans-serif} #topslug{font-size: xx-large; font-weight: bold; color: red} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = [dict(name='div', attrs={'id':'main'})] remove_tags = [ dict(name=['object','link']) ,dict(name='div', attrs={'class':'info'}) ,dict(name='div', attrs={'id':['comments','navig']}) ] feeds = [(u'Articles', u'http://exiledonline.com/feed/')] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup def get_article_url(self, article): raw = article.get('link', None) final = raw + 'all/1/' return final