__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' balkaninsight.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class BalkanInsight(BasicNewsRecipe): title = 'Balkan Insight' __author__ = 'Darko Miletic' description = 'Get exclusive news and in depth information on business, politics, events and lifestyle in the Balkans. Free and exclusive premium content.' publisher = 'BalkanInsight.com' category = 'news, politics, Balcans' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False encoding = 'utf-8' masthead_url = 'http://www.balkaninsight.com/templates/balkaninsight/images/aindex_02.jpg' language = 'en' publication_type = 'newsportal' remove_empty_feeds = True extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} img{margin-bottom: 0.8em} h1,h2,h3,h4{font-family: Times,Georgia,serif1,serif; color: #24569E} .article-deck {color:#777777; font-size: small;} .main_news_img{font-size: small} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(name='div', attrs={'id': 'article'})] remove_tags = [ dict(name=['object', 'link', 'iframe']) ] feeds = [ (u'Albania', u'http://www.balkaninsight.com/?tpl=653&tpid=144'), (u'Bosnia', u'http://www.balkaninsight.com/?tpl=653&tpid=145'), (u'Bulgaria', u'http://www.balkaninsight.com/?tpl=653&tpid=146'), (u'Croatia', u'http://www.balkaninsight.com/?tpl=653&tpid=147'), (u'Kosovo', u'http://www.balkaninsight.com/?tpl=653&tpid=148'), (u'Macedonia', u'http://www.balkaninsight.com/?tpl=653&tpid=149'), (u'Montenegro', u'http://www.balkaninsight.com/?tpl=653&tpid=150'), (u'Romania', u'http://www.balkaninsight.com/?tpl=653&tpid=151'), (u'Serbia', u'http://www.balkaninsight.com/?tpl=653&tpid=152') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return self.adeify_images(soup)